diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 15ef03642..606d9d074 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -17,7 +17,7 @@ on: jobs: analyze: name: Analyze - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: fail-fast: false @@ -27,26 +27,17 @@ jobs: language: ['cpp'] # Supported version for OpenCV opencv: [ - 4.2.0 + 4.10.0 ] compiler: [g++] steps: - name: Checkout repository - uses: actions/checkout@v2 - with: - # We must fetch at least the immediate parents so that if this is - # a pull request then we can checkout the head. - fetch-depth: 2 - - # If this run was triggered by a pull request event, then checkout - # the head of the pull request instead of the merge commit. - - run: git checkout HEAD^2 - if: ${{ github.event_name == 'pull_request' }} + uses: actions/checkout@v3 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v1 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -66,7 +57,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v1 + uses: github/codeql-action/autobuild@v3 # ℹ️ Command-line programs to run using the OS shell. # 📚 https://git.io/JvXDl @@ -80,4 +71,4 @@ jobs: # make release - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 + uses: github/codeql-action/analyze@v3 diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 000000000..175aa01c6 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,30 @@ +name: Macos C/C++ CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build-macos: + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + brew uninstall cmake + brew update + brew install opencv cmake + + - name: Configure CMake + run: | + mkdir -p build + cd build + cmake .. + + - name: Build + working-directory: build + run: make + diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml new file mode 100644 index 000000000..6e60e5663 --- /dev/null +++ b/.github/workflows/ubuntu.yml @@ -0,0 +1,29 @@ +name: Ubuntu C/C++ CI + +on: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libopencv-dev cmake g++ + + - name: Configure CMake + run: | + mkdir -p build + cd build + cmake .. + + - name: Build + working-directory: build + run: make + diff --git a/.gitignore b/.gitignore index 5eff17020..c25a85a74 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,8 @@ build CMakeLists.txt.user* CMakeFiles CMakeCache.txt +*.engine +*.onnx +*.weights +*.pt +*.pth diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 5f7bfcbb2..000000000 --- a/.travis.yml +++ /dev/null @@ -1,74 +0,0 @@ -language: - - cpp -dist: trusty -compiler: - - g++ - -matrix: - include: - - os: linux - addons: - apt: - sources: - - ubuntu-toolchain-r-test - packages: - - g++-7 - env: - - MATRIX_EVAL="CC=gcc-7 && CXX=g++-7" - -before_install: - - eval "${MATRIX_EVAL}" - - sudo apt-get update - - sudo apt-get update -qq - -install: - -# OpenCV v3.0.0 install code (modified from orignal source: https://github.com/jayrambhia/Install-OpenCV) - - # OpenCV dependencies - Details available at: http://docs.opencv.org/trunk/doc/tutorials/introduction/linux_install/linux_install.html - - sudo apt-get install -y --allow-unauthenticated build-essential - - sudo apt-get install -y --allow-unauthenticated cmake git libgtk2.0-dev pkg-config libavcodec-dev libavformat-dev libswscale-dev - - sudo apt-get install -y --allow-unauthenticated python-dev python-numpy libtbb2 libtbb-dev libjpeg-dev libpng-dev libtiff-dev libjasper-dev libdc1394-22-dev - - # Download v3.0.0 .zip file and extract. - - curl -sL https://github.com/opencv/opencv/archive/4.3.0.zip > opencv.zip - - unzip opencv.zip - - cd opencv-4.3.0 - - # Create a new 'build' folder. - - mkdir build - - cd build - - # Set build instructions for Ubuntu distro. - - cmake -D CMAKE_BUILD_TYPE=RELEASE \ - -D CMAKE_INSTALL_PREFIX=/usr/local \ - -D WITH_TBB=ON \ - -D WITH_V4L=ON \ - -D BUILD_JAVA=OFF -D BUILD_opencv_java_bindings_generator=OFF -D BUILD_opencv_js=OFF\ - -D INSTALL_C_EXAMPLES=OFF -D BUILD_EXAMPLES=OFF \ - -D BUILD_TESTS=OFF -D BUILD_PERF_TESTS=OFF -D INSTALL_TESTS=OFF \ - -D WITH_QT=OFF \ - -D WITH_OPENGL=OFF \ - -D BUILD_opencv_apps=OFF \ - -D BUILD_opencv_python3=OFF -D BUILD_opencv_python_tests=OFF -D INSTALL_PYTHON_EXAMPLES=OFF -D BUILD_NEW_PYTHON_SUPPORT=OFF -D BUILD_opencv_python_bindings_generator=OFF \ - -D CPU_BASELINE=AVX2 -D CPU_DISPATCH=AVX2 .. - - - # Run 'make' with four threads. - - make -j4 - - # Install to OS. - - sudo make install - - # Add configuration to OpenCV to tell it where the library files are located on the file system (/usr/local/lib) - - sudo sh -c 'echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf' - - - sudo ldconfig - - echo "OpenCV installed." - - # We need to return to the repo "root" folder, so we can then 'cd' into the C++ project folder. - - cd ../../ - -script: - - cmake . - - make -j4 diff --git a/CMakeLists.txt b/CMakeLists.txt index da4bbd81d..fed4b1aec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,10 @@ cmake_minimum_required(VERSION 3.9) -project(MTTracking) +project(MTTracking VERSION 1.1.0) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +unset(CMAKE_C_FLAGS CACHE) unset(CMAKE_CXX_FLAGS CACHE) unset(CMAKE_CXX_FLAGS_RELEASE CACHE) # unset(CMAKE_CXX_FLAGS_DEBUG CACHE) @@ -15,11 +18,12 @@ endif() set(CMAKE_CXX_STANDARD 17) if (CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic-errors" CACHE STRING COMPILE_FLAGS FORCE) - set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -march=native -mtune=native -funroll-loops -Wall -DNDEBUG -DBOOST_DISABLE_ASSERTS" CACHE STRING COMPILE_FLAGS FORCE) - set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -march=native -mtune=native -Wall -DDEBUG" CACHE STRING COMPILE_FLAGS FORCE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC" CACHE STRING COMPILE_FLAGS FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic-errors -fPIC" CACHE STRING COMPILE_FLAGS FORCE) + set(CMAKE_CXX_FLAGS_RELEASE "-O3 -g -march=native -mtune=native -funroll-loops -DNDEBUG -DBOOST_DISABLE_ASSERTS" CACHE STRING COMPILE_FLAGS FORCE) + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -march=native -mtune=native -DDEBUG" CACHE STRING COMPILE_FLAGS FORCE) elseif (MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W4 -DGTL_STATIC" CACHE STRING COMPILE_FLAGS FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /W4 /utf-8" CACHE STRING COMPILE_FLAGS FORCE) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MD /Ox /Ob2 /Oi /Ot /arch:AVX2 /fp:fast /DNDEBUG" CACHE STRING COMPILE_FLAGS FORCE) # set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MDd /Od /Ob0 /DDEBUG" CACHE STRING COMPILE_FLAGS FORCE) @@ -40,21 +44,26 @@ endif(SILENT_WORK) include(CheckIncludeFileCXX) check_include_file_cxx(filesystem HAVE_FILESYSTEM) if(HAVE_FILESYSTEM) - add_definitions(-DHAVE_FILESYSTEM) message("Founded filesystem header") else(HAVE_FILESYSTEM) + add_definitions(-DHAVE_EXPERIMENTAL_FILESYSTEM) message("Do not found filesystem header") endif(HAVE_FILESYSTEM) - option(BUILD_EXAMPLES "Should compiled examples (motion detection, pedestrians, faces, DNNs etc)?" ON) if (BUILD_EXAMPLES) add_subdirectory(example) endif(BUILD_EXAMPLES) + +option(USE_CLIP "Should be used RuCLIP|CLIP for objects classification?" OFF) +if (USE_CLIP) + add_definitions(-DUSE_CLIP) +endif(USE_CLIP) + option(BUILD_CARS_COUNTING "Should compiled Cars counting example?" OFF) if (BUILD_CARS_COUNTING) - add_subdirectory(cars_counting) + add_definitions(-DBUILD_CARS_COUNTING) endif(BUILD_CARS_COUNTING) option(BUILD_ASYNC_DETECTOR "Should compiled async example with low fps Detector?" OFF) @@ -62,39 +71,57 @@ if (BUILD_ASYNC_DETECTOR) add_subdirectory(async_detector) endif(BUILD_ASYNC_DETECTOR) -option(BUILD_YOLO_LIB "Should compiled standalone yolo_lib with original darknet?" OFF) -if (BUILD_YOLO_LIB) - add_subdirectory(src/Detector/darknet) - add_definitions(-DBUILD_YOLO_LIB) - -if (MSVC) - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(BIT_SYSTEM x32) - else() - set(BIT_SYSTEM x64) - endif() - - set(LIB_PTHREAD pthreadVC2) -else() - set(LIB_PTHREAD pthread) +option(BUILD_ONNX_TENSORRT "Should compiled TensorRT binding for ONNX models?" OFF) +if (BUILD_ONNX_TENSORRT) + add_subdirectory(src/Detector/tensorrt_onnx) + add_definitions(-DBUILD_ONNX_TENSORRT) +endif(BUILD_ONNX_TENSORRT) + +option(MTRACKER_PYTHON "Build mtracking Python bindings?" OFF) +if(MTRACKER_PYTHON) + set(NUMPY_INCLUDE_DIR "" CACHE FILEPATH "Path to numpy header if cmake can't find them.") + if (NOT ${NUMPY_INCLUDE_DIR} STREQUAL "") + message( " *** NUMPY_INCLUDE_DIR : ${NUMPY_INCLUDE_DIR}" ) + if(NOT EXISTS ${NUMPY_INCLUDE_DIR}/numpy/ndarrayobject.h) + message(SEND_ERROR "Can't find numpy/ndarrayobject.h in ${NUMPY_INCLUDE_DIR}") + endif() + include_directories(${NUMPY_INCLUDE_DIR}) endif() -if (MSVC) - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/Detector/darknet/3rdparty/dll/${BIT_SYSTEM}/pthreadVC2.dll DESTINATION ${CMAKE_BINARY_DIR}/Debug) - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/src/Detector/darknet/3rdparty/dll/${BIT_SYSTEM}/pthreadVC2.dll DESTINATION ${CMAKE_BINARY_DIR}/Release) + set(PYBIND11_LTO_CXX_FLAGS "") + set(PYBIND11_PYTHON_VERSION 3) + add_subdirectory(thirdparty/pybind11) +endif(MTRACKER_PYTHON) - file(GLOB CUDNN_DLL ${CUDNN_DIR}/bin/*.dll) - file(COPY ${CUDNN_DLL} DESTINATION ${CMAKE_BINARY_DIR}/Release) - file(COPY ${CUDNN_DLL} DESTINATION ${CMAKE_BINARY_DIR}/Debug) -endif() - -endif(BUILD_YOLO_LIB) - -option(BUILD_YOLO_TENSORRT "Should compiled TensorRT binding for YOLO?" OFF) -if (BUILD_YOLO_TENSORRT) - add_subdirectory(src/Detector/tensorrt_yolo) - add_definitions(-DBUILD_YOLO_TENSORRT) -endif(BUILD_YOLO_TENSORRT) add_subdirectory(thirdparty) add_subdirectory(src) + + +# Create CMake config files for distribution +set(INCLUDE_INSTALL_DIR include/ ) +set(LIB_INSTALL_DIR lib/ ) + +install(EXPORT MTTrackingExports + FILE ${PROJECT_NAME}Targets.cmake + NAMESPACE ${PROJECT_NAME}:: + DESTINATION ${LIB_INSTALL_DIR}/${PROJECT_NAME}/cmake +) + +include(CMakePackageConfigHelpers) + +set(CONFIG_FILENAME ${PROJECT_NAME}Config.cmake) + +configure_package_config_file(${CONFIG_FILENAME}.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_FILENAME} + INSTALL_DESTINATION ${LIB_INSTALL_DIR}/${PROJECT_NAME}/cmake + PATH_VARS INCLUDE_INSTALL_DIR) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion ) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_FILENAME} + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + DESTINATION ${LIB_INSTALL_DIR}/${PROJECT_NAME}/cmake ) diff --git a/MTTrackingConfig.cmake.in b/MTTrackingConfig.cmake.in new file mode 100644 index 000000000..796457847 --- /dev/null +++ b/MTTrackingConfig.cmake.in @@ -0,0 +1,10 @@ +set(@PROJECT_NAME@_VERSION @PROJECT_VERSION@) + +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") + +set_and_check(@PROJECT_NAME@_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@") +set_and_check(@PROJECT_NAME@_LIB_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@../lib") + +check_required_components(@PROJECT_NAME@) diff --git a/README.md b/README.md index 8c5b5017d..a7850596d 100644 --- a/README.md +++ b/README.md @@ -1,209 +1,143 @@ -![travis ci:](https://travis-ci.org/Smorodov/Multitarget-tracker.svg?branch=master) -[![CodeQL](https://github.com/Smorodov/Multitarget-tracker/workflows/CodeQL/badge.svg?branch=master)](https://github.com/Smorodov/Multitarget-tracker/actions?query=workflow%3ACodeQL) - -# Last changes - -* New license Apache 2.0 instead GPLv3 - -* Added new parameter Batch size - simultaneous detection on several consecutive frames. it can increase processing speed on a powerful GPU. Works with Darknet and TensorRT backends but can add some latency - -# New videos! - -* Vehicles speed calculation with YOLO v4 (Thanks [Sam Blake for great idea!](https://medium.com/hal24k-techblog/how-to-track-objects-in-the-real-world-with-tensorflow-sort-and-opencv-a64d9564ccb1)) - -[![Vehicles speed:](https://img.youtube.com/vi/qOHYvDwpsO0/0.jpg)](https://youtu.be/qOHYvDwpsO0) - - -* First step to ADAS with YOLO v4 - -[![Simple ADAS:](https://img.youtube.com/vi/5cgg5fy90Xg/0.jpg)](https://youtu.be/5cgg5fy90Xg) - -# Multitarget (multiple objects) tracker - -#### 1. Objects detector can be created with function [CreateDetector](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Detector/BaseDetector.cpp) with different values of the detectorType: - -1.1. Based on background substraction: built-in Vibe (tracking::Motion_VIBE), SuBSENSE (tracking::Motion_SuBSENSE) and LOBSTER (tracking::Motion_LOBSTER); MOG2 (tracking::Motion_MOG2) from [opencv](https://github.com/opencv/opencv/blob/master/modules/video/include/opencv2/video/background_segm.hpp); MOG (tracking::Motion_MOG), GMG (tracking::Motion_GMG) and CNT (tracking::Motion_CNT) from [opencv_contrib](https://github.com/opencv/opencv_contrib/tree/master/modules/bgsegm). For foreground segmentation used contours from OpenCV with result as cv::RotatedRect - -1.2. Haar face detector from OpenCV (tracking::Face_HAAR) - -1.3. HOG pedestrian detector from OpenCV (tracking::Pedestrian_HOG) and C4 pedestrian detector from [sturkmen72](https://github.com/sturkmen72/C4-Real-time-pedestrian-detection) (tracking::Pedestrian_C4) - -1.4. Detector based on opencv_dnn (tracking::DNN_OCV) and pretrained models from [chuanqi305](https://github.com/chuanqi305/MobileNet-SSD) and [pjreddie](https://pjreddie.com/darknet/yolo/) - -1.5. YOLO detector (tracking::Yolo_Darknet) with darknet inference from [AlexeyAB](https://github.com/AlexeyAB/darknet) and pretrained models from [pjreddie](https://pjreddie.com/darknet/yolo/) - -1.6. YOLO detector (tracking::Yolo_TensorRT) with NVidia TensorRT inference from [enazoe](https://github.com/enazoe/yolo-tensorrt) and pretrained models from [pjreddie](https://pjreddie.com/darknet/yolo/) - -1.7. You can to use custom detector with bounding or rotated rectangle as output. - -#### 2. Matching or solve an [assignment problem](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h): - -2.1. Hungrian algorithm (tracking::MatchHungrian) with cubic time O(N^3) where N is objects count - -2.2. Algorithm based on weighted bipartite graphs (tracking::MatchBipart) from [rdmpage](https://github.com/rdmpage/maximum-weighted-bipartite-matching) with time O(M * N^2) where N is objects count and M is connections count between detections on frame and tracking objects. It can be faster than Hungrian algorithm - -2.3. [Distance](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h) from detections and objects: euclidean distance in pixels between centers (tracking::DistCenters), euclidean distance in pixels between rectangles (tracking::DistRects), Jaccard or IoU distance from 0 to 1 (tracking::DistJaccard) - -#### 3. [Smoothing trajectories and predict missed objects](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h): - -3.1. Linear Kalman filter from OpenCV (tracking::KalmanLinear) - -3.2. Unscented Kalman filter from OpenCV (tracking::KalmanUnscented) with constant velocity or constant acceleration models - -3.3. [Kalman goal](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h) is only coordinates (tracking::FilterCenter) or coordinates and size (tracking::FilterRect) - -3.4. Simple [Abandoned detector](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h) - -3.5. [Line intersection](https://github.com/Smorodov/Multitarget-tracker/blob/master/cars_counting/CarsCounting.cpp) counting - -#### 4. [Advanced visual search](https://github.com/Smorodov/Multitarget-tracker/blob/master/src/Tracker/Ctracker.h) for objects if they have not been detected: - -4.1. No search (tracking::TrackNone) - -4.2. Built-in DAT (tracking::TrackDAT) from [foolwood](https://github.com/foolwood/DAT), STAPLE (tracking::TrackSTAPLE) from [xuduo35](https://github.com/xuduo35/STAPLE) or LDES (tracking::TrackLDES) from [yfji](https://github.com/yfji/LDESCpp); KCF (tracking::TrackKCF), MIL (tracking::TrackMIL), MedianFlow (tracking::TrackMedianFlow), GOTURN (tracking::TrackGOTURN), MOSSE (tracking::TrackMOSSE) or CSRT (tracking::TrackCSRT) from [opencv_contrib](https://github.com/opencv/opencv_contrib/tree/master/modules/tracking) - -With this option the tracking can work match slower but more accuracy. - -#### 5. Pipeline - -5.1. Syncronous [pipeline - SyncProcess](https://github.com/Smorodov/Multitarget-tracker/blob/master/example/VideoExample.h): -- get frame from capture device; -- decoding; -- objects detection (1); -- tracking (2-4); -- show result. - -This pipeline is good if all algorithms are fast and works faster than time between two frames (40 ms for device with 25 fps). Or it can be used if we have only 1 core for all (no parallelization). - -5.2. Pipeline with [2 threads - AsyncProcess](https://github.com/Smorodov/Multitarget-tracker/blob/master/example/VideoExample.h): -- 1th thread takes frame t and makes capture, decoding and objects detection; -- 2th thread takes frame t-1, results from first thread and makes tracking and results presentation (this is the Main read). - -So we have a latency on 1 frame but on two free CPU cores we can increase performance on 2 times. - -5.3. Fully [acynchronous pipeline](https://github.com/Smorodov/Multitarget-tracker/tree/master/async_detector) can be used if the objects detector works with low fps and we have a free 2 CPU cores. In this case we use 4 threads: -- 1th main thread is not busy and used for GUI and result presentation; -- 2th thread makes capture and decoding, puts frames in threadsafe queue; -- 3th thread is used for objects detection on the newest frame from the queue; -- 4th thread is used for objects tracking: waits the frame with detection from 3th tread and used advanced visual search (4) in intermediate frames from queue until it ges a frame with detections. - -This pipeline can used with slow but accuracy DNN and track objects in intermediate frame in realtime without latency. - -Also you can read [Wiki in Russian](https://github.com/Smorodov/Multitarget-tracker/wiki). - -#### Demo Videos - -* Mouse tracking: - -[![Tracking:](https://img.youtube.com/vi/2fW5TmAtAXM/0.jpg)](https://www.youtube.com/watch?v=2fW5TmAtAXM) - -* Motion Detection and tracking: - -[![Motion Detection and tracking:](https://img.youtube.com/vi/GjN8jOy4kVw/0.jpg)](https://www.youtube.com/watch?v=GjN8jOy4kVw) - -* Multiple Faces tracking: - -[![Multiple Faces tracking:](https://img.youtube.com/vi/j67CFwFtciU/0.jpg)](https://www.youtube.com/watch?v=j67CFwFtciU) - -* Simple Abandoned detector: - -[![Simple Abandoned detector:](https://img.youtube.com/vi/fpkHRsFzspA/0.jpg)](https://www.youtube.com/watch?v=fpkHRsFzspA) - -#### Tested Platforms -1. Ubuntu Linux 18.04 with x86 processors -2. Ubuntu Linux 18.04 with Nvidia Jetson Nano (YOLO + darknet on GPU works!) -3. Windows 10 (x64 and x32 builds) - -#### Build -1. Download project sources -2. Install CMake -3. Install OpenCV (https://github.com/opencv/opencv) and OpenCV contrib (https://github.com/opencv/opencv_contrib) repositories -4. Configure project CmakeLists.txt, set OpenCV_DIR (-DOpenCV_DIR=/path/to/opencv/build). -5. If opencv_contrib don't installed then disable options USE_OCV_BGFG=OFF, USE_OCV_KCF=OFF and USE_OCV_UKF=OFF -6. If you want to use native darknet YOLO detector with CUDA + cuDNN then set BUILD_YOLO_LIB=ON (Install first CUDA and cuDNN libraries from Nvidia) -7. If you want to use YOLO detector with TensorRT then set BUILD_YOLO_TENSORRT=ON (Install first TensorRT library from Nvidia) -8. For building example with low fps detector (now native darknet YOLO detector) and Tracker worked on each frame: BUILD_ASYNC_DETECTOR=ON -9. For building example with line crossing detection (cars counting): BUILD_CARS_COUNTING=ON -10. Go to the build directory and run make - -**Full build:** - - git clone https://github.com/Smorodov/Multitarget-tracker.git - cd Multitarget-tracker - mkdir build - cd build - cmake . .. -DUSE_OCV_BGFG=ON -DUSE_OCV_KCF=ON -DUSE_OCV_UKF=ON -DBUILD_YOLO_LIB=ON -DBUILD_YOLO_TENSORRT=ON -DBUILD_ASYNC_DETECTOR=ON -DBUILD_CARS_COUNTING=ON - make -j - -How to run cmake on Windows for Visual Studio 15 2017 Win64: [example](https://github.com/Smorodov/Multitarget-tracker/blob/master/data/cmake_vs2017.bat). You need to add directory with cmake.exe to PATH and change build params in cmake.bat - - -**Usage:** - - Usage: - ./MultitargetTracker [--example]= [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= [--gpu]= [--async]= [--res]= [--settings]= [--batch_size=] - ./MultitargetTracker ../data/atrium.avi -e=1 -o=../data/atrium_motion.avi - Press: - * 'm' key for change mode: play|pause. When video is paused you can press any key for get next frame. - * Press Esc to exit from video - - Params: - 1. Movie file, for example ../data/atrium.avi - 2. [Optional] Number of example: 0 - MouseTracking, 1 - MotionDetector, 2 - FaceDetector, 3 - PedestrianDetector, 4 - OpenCV dnn objects detector, 5 - Yolo Darknet detector, 6 - YOLO TensorRT Detector - -e=0 or --example=1 - 3. [Optional] Frame number to start a video from this position - -sf=0 or --start_frame==1500 - 4. [Optional] Play a video to this position (if 0 then played to the end of file) - -ef=0 or --end_frame==200 - 5. [Optional] Delay in milliseconds after video ending - -ed=0 or --end_delay=1000 - 6. [Optional] Name of result video file - -o=out.avi or --out=result.mp4 - 7. [Optional] Show Trackers logs in terminal - -sl=1 or --show_logs=0 - 8. [Optional] Use built-in OpenCL - -g=1 or --gpu=0 - 9. [Optional] Use 2 threads for processing pipeline - -a=1 or --async=0 - 10. [Optional] Path to the csv file with tracking result - -r=res.csv or --res=res.csv - 11. [Optional] Path to the ini file with tracker settings - -s=settings.ini or --settings=settings.ini - 12. [Optional] Batch size - simultaneous detection on several consecutive frames - -bs=2 or --batch_size=1 - -More details here: [How to run examples](https://github.com/Smorodov/Multitarget-tracker/wiki/Run-examples). - -#### Thirdparty libraries -* OpenCV (and contrib): https://github.com/opencv/opencv and https://github.com/opencv/opencv_contrib -* Vibe: https://github.com/BelBES/VIBE -* SuBSENSE and LOBSTER: https://github.com/ethereon/subsense -* GTL: https://github.com/rdmpage/graph-template-library -* MWBM: https://github.com/rdmpage/maximum-weighted-bipartite-matching -* Pedestrians detector: https://github.com/sturkmen72/C4-Real-time-pedestrian-detection -* Non Maximum Suppression: https://github.com/Nuzhny007/Non-Maximum-Suppression -* MobileNet SSD models: https://github.com/chuanqi305/MobileNet-SSD -* YOLO v3 models: https://pjreddie.com/darknet/yolo/ -* Darknet inference and YOLO v4 models: https://github.com/AlexeyAB/darknet -* NVidia TensorRT inference and YOLO v5 models: https://github.com/enazoe/yolo-tensorrt -* GOTURN models: https://github.com/opencv/opencv_extra/tree/c4219d5eb3105ed8e634278fad312a1a8d2c182d/testdata/tracking -* DAT tracker: https://github.com/foolwood/DAT -* STAPLE tracker: https://github.com/xuduo35/STAPLE -* LDES tracker: https://github.com/yfji/LDESCpp -* Ini file parser: https://github.com/benhoyt/inih - -#### License -Apache 2.0: [LICENSE text](https://github.com/Smorodov/Multitarget-tracker/blob/master/LICENSE) - -#### Project cititations +# Multitarget Tracker + +[![Build Ubuntu](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/ubuntu.yml) +[![Build MacOS](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/macos.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/macos.yml) +[![CodeQL](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/Smorodov/Multitarget-tracker/actions/workflows/codeql-analysis.yml) + +## Latest Features + +- Add new SOTA: YOLOv26, YOLOv26-obb and YOLOv26-seg models from [ultralytics/ultralytics](https://github.com/ultralytics/ultralytics) +- Add RT-DETRv4 (API similar D-FINE) detection model [RT-DETRs/RT-DETRv4](https://github.com/RT-DETRs/RT-DETRv4) +- Add D-FINE seg detection model [ArgoHA/D-FINE-seg](https://github.com/ArgoHA/D-FINE-seg) +- Add ByteTrack MOT algorithm based on [Vertical-Beach/ByteTrack-cpp](https://github.com/Vertical-Beach/ByteTrack-cpp) +- Big code cleanup from old style algorithms and detectors: some bgfg detectors, some VOT trackes, Face and Pedestrin detectors, Darknet based backend for old YOLO etc +- YOLOv13 detector works with TensorRT! Export pre-trained PyTorch models [here (iMoonLab/yolov13)](https://github.com/iMoonLab/yolov13) to ONNX format and run Multitarget-tracker with `-e=3` example +- Instance segmentation model from RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=3` example +- New linear assignment algorithm - [Jonker-Volgenant / LAPJV algorithm](https://github.com/yongyanghz/LAPJV-algorithm-c) used in [scipy](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html) as alternative for Hungarian allgorithm +- D-FINE detector works with TensorRT! Export pre-trained PyTorch models [here (Peterande/D-FINE)](https://github.com/Peterande/D-FINE) to ONNX format and run Multitarget-tracker with `-e=3` example +- RF-DETR detector works with TensorRT! Export pre-trained PyTorch models [here (roboflow/rf-detr)](https://github.com/roboflow/rf-detr) to ONNX format and run Multitarget-tracker with `-e=3` example +- YOLOv12 detector works with TensorRT! Export pre-trained PyTorch models [here (sunsmarterjie/yolov12)](https://github.com/sunsmarterjie/yolov12) to ONNX format and run Multitarget-tracker with `-e=3` example + +## Demo Videos + +### Detection & Tracking + +[![RF-DETR: detection vs instance segmentation](https://img.youtube.com/vi/oKy7jEKT83c/0.jpg)](https://youtu.be/oKy7jEKT83c) +[![Satellite planes detection and tracking with YOLOv11-obb](https://img.youtube.com/vi/gTpWnkMF7Lg/0.jpg)](https://youtu.be/gTpWnkMF7Lg) +[![4-in-1 latest SOTA detectors](https://img.youtube.com/vi/Pb_HnejRpY4/0.jpg)](https://youtu.be/Pb_HnejRpY4) +[![YOLOv8-obb detection with rotated boxes](https://img.youtube.com/vi/1e6ur57Fhzs/0.jpg)](https://youtu.be/1e6ur57Fhzs) +[![Very fast and small objects tracking](https://img.youtube.com/vi/PalIIAfgX88/0.jpg)](https://youtu.be/PalIIAfgX88) + +## Documentation + +### Core Components + +#### 1. Object Detectors +Available through `CreateDetector` function with different `detectorType`: +1. **Background Subtraction**: + - Built-in: VIBE (`tracking::Motion_VIBE`), SuBSENSE (`tracking::Motion_SuBSENSE`), LOBSTER (`tracking::Motion_LOBSTER`) + - OpenCV: MOG2 (`tracking::Motion_MOG2`) + - OpenCV Contrib: MOG (`tracking::Motion_MOG`), GMG (`tracking::Motion_GMG`), CNT (`tracking::Motion_CNT`) + - Foreground segmentation uses OpenCV contours producing `cv::RotatedRect` +2. **Deep Learning Models**: + - OpenCV DNN module (`tracking::DNN_OCV`) + - TensorRT-accelerated YOLO (`tracking::Yolo_TensorRT`) + +#### 2. Matching Algorithms +For solving assignment problems: +- **Hungarian Algorithm** (`tracking::MatchHungrian`) - O(N³) complexity +- **LAPJV** (`tracking::MatchBipart`) - O(M*N²) complexity +- **Distance Metrics**: + - Center distance (`tracking::DistCenters`) + - Bounding box distance (`tracking::DistRects`) + - Jaccard/IoU similarity (`tracking::DistJaccard`) + +#### 3. Trajectory Smoothing +- Kalman filters: Linear (`tracking::KalmanLinear`) and Unscented (`tracking::KalmanUnscented`) +- State models: Constant velocity and constant acceleration +- Tracking modes: Position-only (`tracking::FilterCenter`) and position+size (`tracking::FilterRect`) +- Specialized features: Abandoned object detection, line intersection counting + +#### 4. Visual Search +When targets disappear: +- KCF (`tracking::TrackKCF`) +- CSRT (`tracking::TrackCSRT`) +- DaSiamRPN (`tracking::TrackDaSiamRPN`) +- Vit (`tracking::TrackVit`) +- Nano (`tracking::TrackNano`) + +### Processing Pipelines +1. **Synchronous** (`SyncProcess`): Single-threaded processing +2. **Asynchronous (2 threads)** (`AsyncProcess`): Decouples detection and tracking +3. **Fully Asynchronous (4 threads)**: For low-FPS deep learning detectors + +### Installation & Building +```bash +git clone https://github.com/Smorodov/Multitarget-tracker.git +cd Multitarget-tracker +mkdir build && cd build +cmake . .. \ + -DUSE_OCV_BGFG=ON \ + -DUSE_OCV_KCF=ON \ + -DUSE_OCV_UKF=ON \ + -DBUILD_ONNX_TENSORRT=ON \ + -DBUILD_ASYNC_DETECTOR=ON \ + -DBUILD_CARS_COUNTING=ON +make -j +``` + +### Usage Guide +Basic command syntax: +```bash +./MultitargetTracker [--example=] [--start_frame=] + [--end_frame=] [--end_delay=] [--out=] + [--show_logs] [--gpu] [--async] [--res=] + [--settings=] [--batch_size=] +``` + +Example: +```bash +./MultitargetTracker ../data/atrium.avi -e=1 -o=../data/atrium_motion.avi +``` + +Keyboard Controls: +- `m`: Toggle play/pause +- Any key: Step forward when paused +- `Esc`: Exit + +### Integration as Library +```cpp +#include + +std::unique_ptr m_tracker; +TrackerSettings settings; +settings.SetDistance(tracking::DistJaccard); +m_tracker = BaseTracker::CreateTracker(settings); +``` + +### Third-party Dependencies + +- [OpenCV (and contrib)](https://github.com/opencv/opencv) +- [Vibe](https://github.com/BelBES/VIBE) +- [Non Maximum Suppression](https://github.com/Nuzhny007/Non-Maximum-Suppression) +- [Ini file parser](https://github.com/benhoyt/inih) +- [Circular Code](https://github.com/LiorKogan/Circular) + +### License +[Apache 2.0 License](https://github.com/Smorodov/Multitarget-tracker/blob/master/LICENSE) + +#### Project citations 1. Jeroen PROVOOST "Camera gebaseerde analysevan de verkeersstromen aaneen kruispunt", 2014 ( https://iiw.kuleuven.be/onderzoek/eavise/mastertheses/provoost.pdf ) 2. Roberto Ciano, Dimitrij Klesev "Autonome Roboterschwarme in geschlossenen Raumen", 2015 ( https://www.hs-furtwangen.de/fileadmin/user_upload/fak_IN/Dokumente/Forschung_InformatikJournal/informatikJournal_2016.pdf#page=18 ) 3. Wenda Qin, Tian Zhang, Junhe Chen "Traffic Monitoring By Video: Vehicles Tracking and Vehicle Data Analysing", 2016 ( http://cs-people.bu.edu/wdqin/FinalProject/CS585%20FinalProjectReport.html ) 4. Ipek BARIS "CLASSIFICATION AND TRACKING OF VEHICLES WITH HYBRID CAMERA SYSTEMS", 2016 ( http://cvrg.iyte.edu.tr/publications/IpekBaris_MScThesis.pdf ) 5. Cheng-Ta Lee, Albert Y. Chen, Cheng-Yi Chang "In-building Coverage of Automated External Defibrillators Considering Pedestrian Flow", 2016 ( http://www.see.eng.osaka-u.ac.jp/seeit/icccbe2016/Proceedings/Full_Papers/092-132.pdf ) 6. Roberto Ciano, Dimitrij Klesev "Autonome Roboterschwarme in geschlossenen Raumen" in "informatikJournal 2016/17", 2017 ( https://docplayer.org/124538994-2016-17-informatikjournal-2016-17-aktuelle-berichte-aus-forschung-und-lehre-der-fakultaet-informatik.html ) -7. Omid Noorshams "Automated systems to assess weights and activity in grouphoused mice", 2017 ( https://pdfs.semanticscholar.org/e5ff/f04b4200c149fb39d56f171ba7056ab798d3.pdf ) +7. Omid Noorshams "Automated systems to assess weights and activity in grouphoused mice", 2017 ( https://pdfs.semanticscholar.org/e5ff/f04b4200c149fb39d56f171ba7056ab798d3.pdf ) 8. RADEK VOPÁLENSKÝ "DETECTION,TRACKING AND CLASSIFICATION OF VEHICLES", 2018 ( https://www.vutbr.cz/www_base/zav_prace_soubor_verejne.php?file_id=181063 ) 9. Márk Rátosi, Gyula Simon "Real-Time Localization and Tracking using Visible Light Communication", 2018 ( https://ieeexplore.ieee.org/abstract/document/8533800 ) -10. Thi Nha Ngo, Kung-Chin Wu, En-Cheng Yang, Ta-Te Lin "Areal-time imaging system for multiple honey bee tracking and activity monitoring", 2019 ( https://www.sciencedirect.com/science/article/pii/S0168169919301498 ) -11. ROS, http://docs.ros.org/lunar/api/costmap_converter/html/Ctracker_8cpp_source.html +10. Thi Nha Ngo, Kung-Chin Wu, En-Cheng Yang, Ta-Te Lin "A real-time imaging system for multiple honey bee tracking and activity monitoring", 2019 ( https://www.sciencedirect.com/science/article/pii/S0168169919301498 ) +11. Tiago Miguel, Rodrigues de Almeida "Multi-Camera and Multi-Algorithm Architecture for VisualPerception onboard the ATLASCAR2", 2019 ( http://lars.mec.ua.pt/public/LAR%20Projects/Vision/2019_TiagoAlmeida/Thesis_Tiago_AlmeidaVF_26Jul2019.pdf ) +12. ROS, http://docs.ros.org/lunar/api/costmap_converter/html/Ctracker_8cpp_source.html +13. Sangeeth Kochanthara, Yanja Dajsuren, Loek Cleophas, Mark van den Brand "Painting the Landscape of Automotive Software in GitHub", 2022 ( https://arxiv.org/abs/2203.08936 ) +14. Fesus, A., Kovari, B., Becsi, T., Leginusz, L. "Dynamic Prompt-Based Approach for Open Vocabulary Multi-Object Tracking", 2025 ( https://link.springer.com/chapter/10.1007/978-3-031-81799-1_25 ) diff --git a/TODO b/TODO deleted file mode 100644 index 88bb8aace..000000000 --- a/TODO +++ /dev/null @@ -1,28 +0,0 @@ -Global data association for multi-object tracking using network flows: -1. https://github.com/nwojke/mcf -2. https://github.com/jutanke/cabbage -3. Tracking The Untrackable: Learning To Track Multiple Cues with Long-Term Dependencies -4. http://openaccess.thecvf.com/content_cvpr_2017/papers/Tang_Multiple_People_Tracking_CVPR_2017_paper.pdf -5. https://github.com/abhineet123/Deep-Learning-for-Tracking-and-Detection -6. https://arxiv.org/abs/1903.05625 -7. https://arxiv.org/abs/1907.03961 -8. http://www.cvlibs.net/projects/online_tracking/ -9. https://github.com/jwchoi384/Gaussian_YOLOv3 -10. muSSP: https://github.com/yu-lab-vt/muSSP -11. https://github.com/ifzhang/FairMOT -12. https://github.com/AndreaHor/LifT_Solver - -Deep SORT: -1. https://github.com/humoncy/YOLOv3-SORT-ReID -2. https://github.com/nwojke/deep_sort -3. https://github.com/bitzy/DeepSort -3. https://github.com/oylz/DS - -New: -1. https://github.com/ceccocats/tkDNN -2. dasiamrpn_tracker.py -> C++ - -Tests: -1. Quality tests -2. Performance tests - diff --git a/async_detector/AsyncDetector.cpp b/async_detector/AsyncDetector.cpp index b1e97b14e..58854ed2e 100644 --- a/async_detector/AsyncDetector.cpp +++ b/async_detector/AsyncDetector.cpp @@ -1,49 +1,14 @@ #include "AsyncDetector.h" -/// -/// \brief DrawFilledRect -/// -void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, int alpha) -{ - if (alpha) - { - const int alpha_1 = 255 - alpha; - const int nchans = frame.channels(); - int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; - for (int y = rect.y; y < rect.y + rect.height; ++y) - { - uchar* ptr = frame.ptr(y) + nchans * rect.x; - for (int x = rect.x; x < rect.x + rect.width; ++x) - { - for (int i = 0; i < nchans; ++i) - { - ptr[i] = cv::saturate_cast((alpha_1 * ptr[i] + alpha * color[i]) / 255); - } - ptr += nchans; - } - } - } - else - { - cv::rectangle(frame, rect, cl, cv::FILLED); - } -} - /// /// \brief AsyncDetector::AsyncDetector /// \param parser /// AsyncDetector::AsyncDetector(const cv::CommandLineParser& parser) - : - m_showLogs(true), - m_fps(25), - m_startFrame(0), - m_endFrame(0), - m_finishDelay(0) { m_inFile = parser.get(0); m_outFile = parser.get("out"); - m_showLogs = parser.get("show_logs") != 0; + m_showLogsLevel = parser.get("show_logs"); m_startFrame = parser.get("start_frame"); m_endFrame = parser.get("end_frame"); m_finishDelay = parser.get("end_delay"); @@ -57,13 +22,33 @@ AsyncDetector::AsyncDetector(const cv::CommandLineParser& parser) m_colors.emplace_back(255, 127, 255); m_colors.emplace_back(127, 0, 255); m_colors.emplace_back(127, 0, 127); -} -/// -/// \brief AsyncDetector::~AsyncDetector -/// -AsyncDetector::~AsyncDetector() -{ + // Create loggers + m_consoleSink = std::make_shared(); + m_consoleSink->set_level(spdlog::level::from_str(m_showLogsLevel)); + m_consoleSink->set_pattern("[%^%l%$] %v"); + + auto currentTime = std::chrono::system_clock::now(); + auto transformed = currentTime.time_since_epoch().count() / 1000000; + std::time_t tt = std::chrono::system_clock::to_time_t(currentTime); + char buffer[80]; +#ifdef WIN32 + tm timeInfo; + localtime_s(&timeInfo, &tt); + strftime(buffer, 80, "%G%m%d_%H%M%S", &timeInfo); +#else + auto timeInfo = localtime(&tt); + strftime(buffer, 80, "%G%m%d_%H%M%S", timeInfo); +#endif + + size_t max_size = 1024 * 1024 * 5; + size_t max_files = 3; + m_fileSink = std::make_shared("logs/" + std::string(buffer) + std::to_string(transformed % 1000) + ".txt", max_size, max_files); + m_fileSink->set_level(spdlog::level::from_str(m_showLogsLevel)); + + m_logger = std::shared_ptr(new spdlog::logger("traffic", { m_consoleSink, m_fileSink })); + m_logger->set_level(spdlog::level::from_str(m_showLogsLevel)); + m_logger->info("Start service"); } /// @@ -76,7 +61,7 @@ void AsyncDetector::Process() bool stopFlag = false; - std::thread thCapture(CaptureThread, m_inFile, m_startFrame, &m_fps, &m_framesQue, &stopFlag); + std::thread thCapture(CaptureThread, m_inFile, m_startFrame, &m_framesCount, &m_fps, &m_framesQue, &stopFlag); #ifndef SILENT_WORK cv::namedWindow("Video", cv::WINDOW_NORMAL | cv::WINDOW_KEEPRATIO); @@ -122,25 +107,25 @@ void AsyncDetector::Process() if (k == 27) break; #else - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + //std::this_thread::sleep_for(std::chrono::milliseconds(1)); #endif ++framesCounter; if (m_endFrame && framesCounter > m_endFrame) { - std::cout << "Process: riched last " << m_endFrame << " frame" << std::endl; + m_logger->info("Process: riched last {} frame", m_endFrame); break; } } - std::cout << "Stopping threads..." << std::endl; + m_logger->info("Stopping threads..."); stopFlag = true; m_framesQue.SetBreak(true); if (thCapture.joinable()) thCapture.join(); - std::cout << "work time = " << (allTime / freq) << std::endl; + m_logger->info("work time = {}", allTime / freq); #ifndef SILENT_WORK cv::waitKey(m_finishDelay); #endif @@ -149,61 +134,50 @@ void AsyncDetector::Process() /// /// \brief AsyncDetector::DrawTrack /// \param frame -/// \param resizeCoeff /// \param track /// \param drawTrajectory /// \param isStatic /// void AsyncDetector::DrawTrack(cv::Mat frame, - int resizeCoeff, const TrackingObject& track, bool drawTrajectory) { - auto ResizeRect = [&](const cv::Rect& r) -> cv::Rect - { - return cv::Rect(resizeCoeff * r.x, resizeCoeff * r.y, resizeCoeff * r.width, resizeCoeff * r.height); - }; - auto ResizePoint = [&](const cv::Point& pt) -> cv::Point - { - return cv::Point(resizeCoeff * pt.x, resizeCoeff * pt.y); - }; - if (track.m_isStatic) { #if (CV_VERSION_MAJOR >= 4) - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(255, 0, 255), 2, cv::LINE_AA); + cv::rectangle(frame, track.m_rrect.boundingRect(), cv::Scalar(255, 0, 255), 2, cv::LINE_AA); #else - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(255, 0, 255), 2, CV_AA); + cv::rectangle(frame, track.m_rrect.boundingRect(), cv::Scalar(255, 0, 255), 2, CV_AA); #endif } else { #if (CV_VERSION_MAJOR >= 4) - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(0, 255, 0), 1, cv::LINE_AA); + cv::rectangle(frame, track.m_rrect.boundingRect(), cv::Scalar(0, 255, 0), 1, cv::LINE_AA); #else - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(0, 255, 0), 1, CV_AA); + cv::rectangle(frame, track.m_rrect.boundingRect(), cv::Scalar(0, 255, 0), 1, CV_AA); #endif } if (drawTrajectory) { - cv::Scalar cl = m_colors[track.m_ID % m_colors.size()]; + cv::Scalar cl = m_colors[track.m_ID.ID2Module(m_colors.size())]; for (size_t j = 0; j < track.m_trace.size() - 1; ++j) { const TrajectoryPoint& pt1 = track.m_trace.at(j); const TrajectoryPoint& pt2 = track.m_trace.at(j + 1); #if (CV_VERSION_MAJOR >= 4) - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, cv::LINE_AA); + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, cv::LINE_AA); #else - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, CV_AA); + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, CV_AA); #endif if (pt2.m_hasRaw) { #if (CV_VERSION_MAJOR >= 4) - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 4, cv::LINE_AA); + cv::circle(frame, pt2.m_prediction, 4, cl, 4, cv::LINE_AA); #else - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 4, CV_AA); + cv::circle(frame, pt2.m_prediction, 4, cl, 4, CV_AA); #endif } } @@ -216,20 +190,17 @@ void AsyncDetector::DrawTrack(cv::Mat frame, /// void AsyncDetector::DrawData(frame_ptr frameInfo, int framesCounter, int currTime) { - if (m_showLogs) - { - std::cout << "Frame " << framesCounter << ": "; - int id = frameInfo->m_inDetector.load(); - if (id != FrameInfo::StateNotProcessed && id != FrameInfo::StateSkipped) - std::cout << "(" << id << ") detects = " << frameInfo->m_regions.size() << ", "; - std::cout << "tracks = " << frameInfo->m_tracks.size() << ", time = " << currTime << std::endl; - } + int id = frameInfo->m_inDetector.load(); + if (id != FrameInfo::StateNotProcessed && id != FrameInfo::StateSkipped) + m_logger->info("Frame {0} ({1}): ({2}) detects= {3}, tracks = {4}, time = {5}", framesCounter, m_framesCount, id, frameInfo->m_regions.size(), frameInfo->m_tracks.size(), currTime); + else + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, frameInfo->m_tracks.size(), currTime); for (const auto& track : frameInfo->m_tracks) { if (track.m_isStatic) { - DrawTrack(frameInfo->m_frame, 1, track, true); + DrawTrack(frameInfo->m_frame, track, true); } else { @@ -239,7 +210,7 @@ void AsyncDetector::DrawData(frame_ptr frameInfo, int framesCounter, int currTim { //std::cout << TypeConverter::Type2Str(track.m_type) << " - " << track.m_rect << std::endl; - DrawTrack(frameInfo->m_frame, 1, track, true); + DrawTrack(frameInfo->m_frame, track, true); std::string label = TypeConverter::Type2Str(track.m_type);// +": " + std::to_string(track.m_confidence); int baseLine = 0; @@ -279,7 +250,7 @@ void AsyncDetector::DrawData(frame_ptr frameInfo, int framesCounter, int currTim /// \param framesQue /// \param stopFlag /// -void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* fps, FramesQueue* framesQue, bool* stopFlag) +void AsyncDetector::CaptureThread(std::string fileName, int startFrame, int* framesCount, float* fps, FramesQueue* framesQue, bool* stopFlag) { cv::VideoCapture capture; if (fileName.size() == 1) @@ -293,8 +264,11 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f std::cerr << "Can't open " << fileName << std::endl; return; } + *framesCount = cvRound(capture.get(cv::CAP_PROP_FRAME_COUNT)); capture.set(cv::CAP_PROP_POS_FRAMES, startFrame); + time_point_t startTimeStamp = std::chrono::system_clock::now(); + *fps = std::max(1.f, (float)capture.get(cv::CAP_PROP_FPS)); int frameHeight = cvRound(capture.get(cv::CAP_PROP_FRAME_HEIGHT)); @@ -319,14 +293,12 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f detectorConfig.emplace("maxCropRatio", "3.0"); #if 1 - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_person)); - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_car)); - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bicycle)); - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_motorbike)); - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bus)); - detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_truck)); - //detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_traffic light)); - //detectorConfig.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_stop sign)); + detectorConfig.emplace("white_list", "person"); + detectorConfig.emplace("white_list", "car"); + detectorConfig.emplace("white_list", "bicycle"); + detectorConfig.emplace("white_list", "motorbike"); + detectorConfig.emplace("white_list", "bus"); + detectorConfig.emplace("white_list", "truck"); #endif // Tracker @@ -348,13 +320,13 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f { trackerSettings.m_minStaticTime = minStaticTime; trackerSettings.m_maxStaticTime = 60; - trackerSettings.m_maximumAllowedSkippedFrames = cvRound(trackerSettings.m_minStaticTime * (*fps)); // Maximum allowed skipped frames - trackerSettings.m_maxTraceLength = 2 * trackerSettings.m_maximumAllowedSkippedFrames; // Maximum trace length + trackerSettings.m_maximumAllowedLostTime = trackerSettings.m_minStaticTime; // Maximum allowed lost time + trackerSettings.m_maxTraceLength = 2 * trackerSettings.m_maximumAllowedLostTime; // Maximum trace length } else { - trackerSettings.m_maximumAllowedSkippedFrames = cvRound(2 * (*fps)); // Maximum allowed skipped frames - trackerSettings.m_maxTraceLength = cvRound(4 * (*fps)); // Maximum trace length + trackerSettings.m_maximumAllowedLostTime = 2.; // Maximum allowed lost time + trackerSettings.m_maxTraceLength = 4.; // Maximum trace length } // Capture the first frame @@ -364,13 +336,14 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f ++frameInd; std::thread thDetection(DetectThread, detectorConfig, firstFrame, framesQue, stopFlag); - std::thread thTracking(TrackingThread, trackerSettings, framesQue, stopFlag); + std::thread thTracking(TrackingThread, trackerSettings, framesQue, *fps, stopFlag); // Capture frame for (; !(*stopFlag);) { frame_ptr frameInfo(new FrameInfo(frameInd)); frameInfo->m_dt = cv::getTickCount(); + frameInfo->m_frameTimeStamp = startTimeStamp + std::chrono::milliseconds(cvRound(frameInd * (1000.f / (*fps)))); capture >> frameInfo->m_frame; if (frameInfo->m_frame.empty()) { @@ -385,9 +358,9 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f framesQue->AddNewFrame(frameInfo, 15); #if 1 - std::this_thread::sleep_for(std::chrono::milliseconds(1000 / cvRound(*fps) - 1)); + //std::this_thread::sleep_for(std::chrono::milliseconds(1000 / cvRound(*fps) - 1)); #else - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + //std::this_thread::sleep_for(std::chrono::milliseconds(1)); #endif ++frameInd; @@ -411,8 +384,7 @@ void AsyncDetector::CaptureThread(std::string fileName, int startFrame, float* f void AsyncDetector::DetectThread(const config_t& config, cv::Mat firstFrame, FramesQueue* framesQue, bool* stopFlag) { cv::UMat ufirst = firstFrame.getUMat(cv::ACCESS_READ); - std::unique_ptr detector = std::unique_ptr(CreateDetector(tracking::Detectors::Yolo_Darknet, config, ufirst)); - detector->SetMinObjectSize(cv::Size(firstFrame.cols / 50, firstFrame.cols / 50)); + std::unique_ptr detector = BaseDetector::CreateDetector(tracking::Detectors::ONNX_TensorRT, config, ufirst); for (; !(*stopFlag);) { @@ -420,7 +392,6 @@ void AsyncDetector::DetectThread(const config_t& config, cv::Mat firstFrame, Fra if (frameInfo) { detector->Detect(frameInfo->m_clFrame); - //std::this_thread::sleep_for(std::chrono::milliseconds(500)); const regions_t& regions = detector->GetDetects(); frameInfo->m_regions.assign(regions.begin(), regions.end()); @@ -435,16 +406,16 @@ void AsyncDetector::DetectThread(const config_t& config, cv::Mat firstFrame, Fra /// \brief AsyncDetector::TrackingThread /// \param /// -void AsyncDetector::TrackingThread(const TrackerSettings& settings, FramesQueue* framesQue, bool* stopFlag) +void AsyncDetector::TrackingThread(const TrackerSettings& settings, FramesQueue* framesQue, float fps, bool* stopFlag) { - std::unique_ptr tracker = std::make_unique(settings); + std::unique_ptr tracker = BaseTracker::CreateTracker(settings, fps); for (; !(*stopFlag);) { frame_ptr frameInfo = framesQue->GetFirstDetectedFrame(); if (frameInfo) { - tracker->Update(frameInfo->m_regions, frameInfo->m_clFrame, frameInfo->m_fps); + tracker->Update(frameInfo->m_regions, frameInfo->m_clFrame, frameInfo->m_frameTimeStamp); tracker->GetTracks(frameInfo->m_tracks); frameInfo->m_inTracker.store(FrameInfo::StateCompleted); diff --git a/async_detector/AsyncDetector.h b/async_detector/AsyncDetector.h index d76928324..9a0a2a763 100644 --- a/async_detector/AsyncDetector.h +++ b/async_detector/AsyncDetector.h @@ -10,7 +10,13 @@ #include #include "BaseDetector.h" -#include "Ctracker.h" +#include "BaseTracker.h" + +#include "spdlog/spdlog.h" +#include "spdlog/async.h" +#include "spdlog/sinks/stdout_color_sinks.h" +#include "spdlog/sinks/basic_file_sink.h" +#include "spdlog/sinks/rotating_file_sink.h" // ---------------------------------------------------------------------- @@ -23,8 +29,8 @@ struct FrameInfo cv::UMat m_clFrame; regions_t m_regions; std::vector m_tracks; + time_point_t m_frameTimeStamp; int64 m_dt = 0; - float m_fps = 0; size_t m_frameInd = 0; static constexpr int StateNotProcessed = 0; @@ -52,28 +58,32 @@ class AsyncDetector { public: AsyncDetector(const cv::CommandLineParser& parser); - ~AsyncDetector(); + ~AsyncDetector() = default; void Process(); private: - bool m_showLogs = false; - float m_fps = 0; + std::string m_showLogsLevel = "info"; + float m_fps = 25; std::string m_inFile; std::string m_outFile; int m_startFrame = 0; int m_endFrame = 0; int m_finishDelay = 0; + int m_framesCount = 0; std::vector m_colors; FramesQueue m_framesQue; void DrawData(frame_ptr frameInfo, int framesCounter, int currTime); + void DrawTrack(cv::Mat frame, const TrackingObject& track, bool drawTrajectory = true); - void DrawTrack(cv::Mat frame, int resizeCoeff, const TrackingObject& track, bool drawTrajectory = true); - - static void CaptureThread(std::string fileName, int startFrame, float* fps, FramesQueue* framesQue, bool* stopFlag); + static void CaptureThread(std::string fileName, int startFrame, int* framesCount, float* fps, FramesQueue* framesQue, bool* stopFlag); static void DetectThread(const config_t& config, cv::Mat firstFrame, FramesQueue* framesQue, bool* stopFlag); - static void TrackingThread(const TrackerSettings& settings, FramesQueue* framesQue, bool* stopFlag); + static void TrackingThread(const TrackerSettings& settings, FramesQueue* framesQue, float fps, bool* stopFlag); + + std::shared_ptr m_consoleSink; + std::shared_ptr m_fileSink; + std::shared_ptr m_logger; }; diff --git a/async_detector/CMakeLists.txt b/async_detector/CMakeLists.txt index d03a46051..2a552a621 100644 --- a/async_detector/CMakeLists.txt +++ b/async_detector/CMakeLists.txt @@ -2,50 +2,30 @@ cmake_minimum_required (VERSION 3.5) project(AsyncDetector) -set(SOURCES - main.cpp - AsyncDetector.cpp -) +set(SOURCES main.cpp + AsyncDetector.cpp) -set(HEADERS - AsyncDetector.h - Queue.h -) +set(HEADERS AsyncDetector.h + Queue.h) # ---------------------------------------------------------------------------- # добавляем include директории # ---------------------------------------------------------------------------- -INCLUDE_DIRECTORIES( - ${PROJECT_SOURCE_DIR}/../src - ${PROJECT_SOURCE_DIR}/../src/common +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../src + ${PROJECT_SOURCE_DIR}/../src/mtracking ${PROJECT_SOURCE_DIR}/../src/Detector ${PROJECT_SOURCE_DIR}/../src/Detector/vibe_src ${PROJECT_SOURCE_DIR}/../src/Detector/Subsense ${PROJECT_SOURCE_DIR}/../src/Tracker ${PROJECT_SOURCE_DIR}/../src/Tracker/HungarianAlg -) + ${PROJECT_SOURCE_DIR}/../thirdparty/spdlog/include) -set(LIBS - ${OpenCV_LIBS} - mtracking - mdetection -) +set(LIBS ${OpenCV_LIBS} + mtracking + mdetection) -if (BUILD_YOLO_LIB) -if (MSVC) - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(BIT_SYSTEM x32) - else() - set(BIT_SYSTEM x64) - endif() - - link_directories(${PROJECT_SOURCE_DIR}/../src/Detector/darknet/3rdparty/lib/${BIT_SYSTEM}) -endif(MSVC) - - add_definitions(-DBUILD_YOLO_LIB) -endif(BUILD_YOLO_LIB) - ADD_EXECUTABLE(${PROJECT_NAME} ${SOURCES} ${HEADERS}) +TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE ${LIBS}) -TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${LIBS}) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "apps") \ No newline at end of file diff --git a/async_detector/Queue.h b/async_detector/Queue.h index eb931979b..46aa468f0 100644 --- a/async_detector/Queue.h +++ b/async_detector/Queue.h @@ -221,7 +221,7 @@ class FramesQueue } if (!m_break.load()) { - frame_ptr frameInfo = m_que.front(); + frame_ptr frameInfo = std::move(m_que.front()); m_que.pop_front(); #if SHOW_QUE_LOG QUE_LOG << "GetFirstProcessedFrame end: " << frameInfo->m_dt << ", frameInd " << frameInfo->m_frameInd << std::endl; diff --git a/async_detector/main.cpp b/async_detector/main.cpp index 09d1c2356..27a422b8e 100644 --- a/async_detector/main.cpp +++ b/async_detector/main.cpp @@ -18,13 +18,14 @@ static void Help() const char* keys = { - "{ @1 |../data/atrium.avi | movie file | }" - "{ sf start_frame |0 | Start a video from this position | }" - "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" - "{ ed end_delay |0 | Delay in milliseconds after video ending | }" - "{ o out | | Name of result video file | }" - "{ sl show_logs |1 | Show Trackers logs | }" - "{ g gpu |0 | Use OpenCL acceleration | }" + "{ @1 |../data/atrium.avi | movie file | }" + "{ sf start_frame |0 | Start a video from this position | }" + "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" + "{ ed end_delay |0 | Delay in milliseconds after video ending | }" + "{ o out | | Name of result video file | }" + "{ show_logs |info | Show Trackers logs: trace, debug, info, warning, error, critical, off | }" + "{ g gpu |0 | Use OpenCL acceleration | }" + "{ contrast_adjustment |0 | Use contrast adjustment for frames before detection | }" }; // ---------------------------------------------------------------------- diff --git a/cars_counting/CMakeLists.txt b/cars_counting/CMakeLists.txt deleted file mode 100644 index 2b811ac89..000000000 --- a/cars_counting/CMakeLists.txt +++ /dev/null @@ -1,50 +0,0 @@ -cmake_minimum_required (VERSION 3.5) - -project(CarsCounting) - -set(SOURCES - main.cpp - CarsCounting.cpp -) - -set(HEADERS - CarsCounting.h -) - -# ---------------------------------------------------------------------------- -# добавляем include директории -# ---------------------------------------------------------------------------- -INCLUDE_DIRECTORIES( - ${PROJECT_SOURCE_DIR}/../src - ${PROJECT_SOURCE_DIR}/../src/common - ${PROJECT_SOURCE_DIR}/../src/Detector - ${PROJECT_SOURCE_DIR}/../src/Detector/vibe_src - ${PROJECT_SOURCE_DIR}/../src/Detector/Subsense - ${PROJECT_SOURCE_DIR}/../src/Tracker - ${PROJECT_SOURCE_DIR}/../src/Tracker/HungarianAlg -) - -set(LIBS - ${OpenCV_LIBS} - mtracking - mdetection -) - -if (BUILD_YOLO_LIB) -if (MSVC) - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(BIT_SYSTEM x32) - else() - set(BIT_SYSTEM x64) - endif() - - link_directories(${PROJECT_SOURCE_DIR}/../src/Detector/darknet/3rdparty/lib/${BIT_SYSTEM}) -endif(MSVC) - - add_definitions(-DBUILD_YOLO_LIB) -endif(BUILD_YOLO_LIB) - -ADD_EXECUTABLE(${PROJECT_NAME} ${SOURCES} ${HEADERS}) - - -TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${LIBS}) diff --git a/cars_counting/CarsCounting.cpp b/cars_counting/CarsCounting.cpp deleted file mode 100644 index 6e26fac11..000000000 --- a/cars_counting/CarsCounting.cpp +++ /dev/null @@ -1,587 +0,0 @@ -#include "CarsCounting.h" - -/// -/// \brief CarsCounting::CarsCounting -/// \param parser -/// -CarsCounting::CarsCounting(const cv::CommandLineParser& parser) - : - m_showLogs(true), - m_fps(25), - m_isTrackerInitialized(false), - m_startFrame(0), - m_endFrame(0), - m_finishDelay(0) -{ -#ifdef _WIN32 - std::string pathToModel = "../../data/"; -#else - std::string pathToModel = "../data/"; -#endif - - - m_inFile = parser.get(0); - m_outFile = parser.get("out"); - m_showLogs = parser.get("show_logs") != 0; - m_startFrame = parser.get("start_frame"); - m_endFrame = parser.get("end_frame"); - m_finishDelay = parser.get("end_delay"); - m_drawHeatMap = parser.get("heat_map") != 0; - - m_weightsFile = parser.get("weights"); - m_configFile = parser.get("config"); - m_namesFile = parser.get("names"); - if (m_weightsFile.empty() && m_configFile.empty()) - { - m_weightsFile = pathToModel + "yolov4.weights"; - m_configFile = pathToModel + "yolov4.cfg"; - } - if (m_namesFile.empty()) - m_namesFile = pathToModel + "coco.names"; - - std::map infMap; - infMap.emplace("darknet", tracking::Detectors::Yolo_Darknet); - infMap.emplace("ocvdnn", tracking::Detectors::DNN_OCV); - std::string inference = parser.get("inference"); - auto infType = infMap.find(inference); - if (infType != std::end(infMap)) - m_detectorType = infType->second; - else - m_detectorType = tracking::Detectors::Yolo_Darknet; - - std::cout << "Inference framework set " << inference << " used " << m_detectorType << ", weights: " << m_weightsFile << ", config: " << m_configFile << ", names: " << m_namesFile << std::endl; - - m_colors.emplace_back(255, 0, 0); - m_colors.emplace_back(0, 255, 0); - m_colors.emplace_back(0, 0, 255); - m_colors.emplace_back(255, 255, 0); - m_colors.emplace_back(0, 255, 255); - m_colors.emplace_back(255, 0, 255); - m_colors.emplace_back(255, 127, 255); - m_colors.emplace_back(127, 0, 255); - m_colors.emplace_back(127, 0, 127); -} - -/// -/// \brief CarsCounting::~CarsCounting -/// -CarsCounting::~CarsCounting() -{ - -} - -/// -/// \brief CarsCounting::Process -/// -void CarsCounting::Process() -{ - cv::VideoWriter writer; - -#ifndef SILENT_WORK - cv::namedWindow("Video", cv::WINDOW_NORMAL | cv::WINDOW_KEEPRATIO); -#endif - - int k = 0; - - double freq = cv::getTickFrequency(); - - int64 allTime = 0; - - bool manualMode = false; - int framesCounter = m_startFrame + 1; - - cv::VideoCapture capture; - if (m_inFile.size() == 1) - capture.open(atoi(m_inFile.c_str())); - else - capture.open(m_inFile); - - if (!capture.isOpened()) - { - std::cerr << "Can't open " << m_inFile << std::endl; - return; - } - capture.set(cv::CAP_PROP_POS_FRAMES, m_startFrame); - - m_fps = std::max(1.f, (float)capture.get(cv::CAP_PROP_FPS)); - - cv::Mat colorFrame; - capture >> colorFrame; - if (colorFrame.empty()) - { - std::cerr << "Frame is empty!" << std::endl; - return; - } - if (!m_isTrackerInitialized) - { - cv::UMat uframe = colorFrame.getUMat(cv::ACCESS_READ); - m_isTrackerInitialized = InitTracker(uframe); - if (!m_isTrackerInitialized) - { - std::cerr << "Tracker initialize error!!!" << std::endl; - return; - } - } - - for (;;) - { - capture >> colorFrame; - if (colorFrame.empty()) - { - std::cerr << "Frame is empty!" << std::endl; - break; - } - - int64 t1 = cv::getTickCount(); - - cv::UMat uframe; - if (!m_detector->CanGrayProcessing() || m_tracker->CanColorFrameToTrack()) - uframe = colorFrame.getUMat(cv::ACCESS_READ); - else - cv::cvtColor(colorFrame, uframe, cv::COLOR_BGR2GRAY); - - m_detector->Detect(uframe); - - const regions_t& regions = m_detector->GetDetects(); - - m_tracker->Update(regions, uframe, m_fps); - - int64 t2 = cv::getTickCount(); - - allTime += t2 - t1; - int currTime = cvRound(1000 * (t2 - t1) / freq); - - DrawData(colorFrame, framesCounter, currTime); - -#ifndef SILENT_WORK - cv::imshow("Video", colorFrame); - - int waitTime = manualMode ? 0 : 1;// std::max(1, cvRound(1000 / m_fps - currTime)); - k = cv::waitKey(waitTime); - if (k == 'm' || k == 'M') - manualMode = !manualMode; - else if (k == 27) - break; - -#else - std::this_thread::sleep_for(std::chrono::milliseconds(1)); -#endif - - if (!m_outFile.empty() && !writer.isOpened()) - writer.open(m_outFile, cv::VideoWriter::fourcc('H', 'F', 'Y', 'U'), m_fps, colorFrame.size(), true); - if (writer.isOpened()) - writer << colorFrame; - - ++framesCounter; - if (m_endFrame && framesCounter > m_endFrame) - { - std::cout << "Process: riched last " << m_endFrame << " frame" << std::endl; - break; - } - } - - std::cout << "work time = " << (allTime / freq) << std::endl; -#ifndef SILENT_WORK - cv::waitKey(m_finishDelay); -#endif -} - -/// -/// \brief CarsCounting::DrawTrack -/// \param frame -/// \param resizeCoeff -/// \param track -/// \param drawTrajectory -/// -void CarsCounting::DrawTrack(cv::Mat frame, - int resizeCoeff, - const TrackingObject& track, - bool drawTrajectory - ) -{ - auto ResizeRect = [&](const cv::Rect& r) -> cv::Rect - { - return cv::Rect(resizeCoeff * r.x, resizeCoeff * r.y, resizeCoeff * r.width, resizeCoeff * r.height); - }; - auto ResizePoint = [&](const cv::Point& pt) -> cv::Point - { - return cv::Point(resizeCoeff * pt.x, resizeCoeff * pt.y); - }; - - if (track.m_isStatic) - { -#if (CV_VERSION_MAJOR >= 4) - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(255, 0, 255), 2, cv::LINE_AA); -#else - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(255, 0, 255), 2, CV_AA); -#endif - } - else - { -#if (CV_VERSION_MAJOR >= 4) - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(0, 255, 0), 1, cv::LINE_AA); -#else - cv::rectangle(frame, ResizeRect(track.m_rrect.boundingRect()), cv::Scalar(0, 255, 0), 1, CV_AA); -#endif - - if (!m_geoParams.Empty()) - { - int traceSize = static_cast(track.m_trace.size()); - int period = std::min(2 * cvRound(m_fps), traceSize); - const auto& from = m_geoParams.Pix2Geo(track.m_trace[traceSize - period]); - const auto& to = m_geoParams.Pix2Geo(track.m_trace[traceSize - 1]); - auto dist = DistanceInMeters(from, to); - - std::stringstream label; - if (period >= cvRound(m_fps) / 4) - { - auto velocity = (3.6f * dist * m_fps) / period; - //std::cout << TypeConverter::Type2Str(track.m_type) << ": distance " << std::fixed << std::setw(2) << std::setprecision(2) << dist << " on time " << (period / m_fps) << " with velocity " << velocity << " km/h: " << track.m_confidence << std::endl; - if (velocity < 1.f || std::isnan(velocity)) - velocity = 0; - //label << TypeConverter::Type2Str(track.m_type) << " " << std::fixed << std::setw(2) << std::setprecision(2) << velocity << " km/h"; - label << TypeConverter::Type2Str(track.m_type) << " " << cvRound(velocity) << " km/h"; - - int baseLine = 0; - double fontScale = 0.5; - cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, fontScale, 1, &baseLine); - - cv::Rect brect = track.m_rrect.boundingRect(); - if (brect.x < 0) - { - brect.width = std::min(brect.width, frame.cols - 1); - brect.x = 0; - } - else if (brect.x + brect.width >= frame.cols) - { - brect.x = std::max(0, frame.cols - brect.width - 1); - brect.width = std::min(brect.width, frame.cols - 1); - } - if (brect.y - labelSize.height < 0) - { - brect.height = std::min(brect.height, frame.rows - 1); - brect.y = labelSize.height; - } - else if (brect.y + brect.height >= frame.rows) - { - brect.y = std::max(0, frame.rows - brect.height - 1); - brect.height = std::min(brect.height, frame.rows - 1); - } - cv::rectangle(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), cv::FILLED); - cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_SIMPLEX, fontScale, cv::Scalar(0, 0, 0)); - - if (velocity > 3) - AddToHeatMap(brect); - } - } - } - - if (drawTrajectory) - { - cv::Scalar cl = m_colors[track.m_ID % m_colors.size()]; - - for (size_t j = 0; j < track.m_trace.size() - 1; ++j) - { - const TrajectoryPoint& pt1 = track.m_trace.at(j); - const TrajectoryPoint& pt2 = track.m_trace.at(j + 1); -#if (CV_VERSION_MAJOR >= 4) - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, cv::LINE_AA); -#else - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, CV_AA); -#endif - if (!pt2.m_hasRaw) - { -#if (CV_VERSION_MAJOR >= 4) - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 1, cv::LINE_AA); -#else - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 1, CV_AA); -#endif - } - } - } -} - -/// -/// \brief CarsCounting::InitTracker -/// \param grayFrame -/// -bool CarsCounting::InitTracker(cv::UMat frame) -{ - bool res = true; - - if (m_drawHeatMap) - { - if (frame.channels() == 3) - m_keyFrame = frame.getMat(cv::ACCESS_READ).clone(); - else - cv::cvtColor(frame, m_keyFrame, cv::COLOR_GRAY2BGR); - m_heatMap = cv::Mat(m_keyFrame.size(), CV_32FC1, cv::Scalar::all(0)); - } - - m_minObjWidth = frame.cols / 50; - - const int minStaticTime = 5; - - config_t config; - -#if 1 - switch (m_detectorType) - { - case tracking::Detectors::Yolo_Darknet: - break; - - case tracking::Detectors::DNN_OCV: -#if 1 - config.emplace("dnnTarget", "DNN_TARGET_CPU"); - config.emplace("dnnBackend", "DNN_BACKEND_OPENCV"); -#else - config.emplace("dnnTarget", "DNN_TARGET_CUDA"); - config.emplace("dnnBackend", "DNN_BACKEND_CUDA"); -#endif - break; - - default: - break; - } - - config.emplace("modelConfiguration", m_configFile); - config.emplace("modelBinary", m_weightsFile); - config.emplace("classNames", m_namesFile); - config.emplace("confidenceThreshold", "0.5"); - config.emplace("nmsThreshold", "0.4"); - config.emplace("swapRB", "0"); - config.emplace("maxCropRatio", "-1"); - - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_person)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_car)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bicycle)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_motorbike)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bus)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_truck)); - - m_detector = std::unique_ptr(CreateDetector(m_detectorType, config, frame)); - -#else // Background subtraction - -#if 1 - config.emplace("history", std::to_string(cvRound(10 * minStaticTime * m_fps))); - config.emplace("varThreshold", "16"); - config.emplace("detectShadows", "1"); - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::Motion_MOG2, config, frame)); -#else - config.emplace("minPixelStability", "15"); - config.emplace("maxPixelStability", "900"); - config.emplace("useHistory", "1"); - config.emplace("isParallel", "1"); - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::Motion_CNT, config, m_useLocalTracking, frame)); -#endif - -#endif - - if (m_detector.get()) - m_detector->SetMinObjectSize(cv::Size(m_minObjWidth, m_minObjWidth)); - else - res = false; - - if (res) - { - TrackerSettings settings; - settings.SetDistance(tracking::DistJaccard); - settings.m_kalmanType = tracking::KalmanLinear; - settings.m_filterGoal = tracking::FilterCenter; - settings.m_lostTrackType = tracking::TrackCSRT; // Use KCF tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - settings.m_matchType = tracking::MatchHungrian; - settings.m_dt = 0.3f; // Delta time for Kalman filter - settings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter - settings.m_distThres = 0.7f; // Distance threshold between region and object on two frames - settings.m_minAreaRadiusPix = frame.rows / 20.f; - settings.m_maximumAllowedSkippedFrames = cvRound(2 * m_fps); // Maximum allowed skipped frames - - settings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_bus, false); - settings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_truck, false); - settings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_bicycle, true); - settings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_motorbike, true); - - settings.m_useAbandonedDetection = false; - if (settings.m_useAbandonedDetection) - { - settings.m_minStaticTime = minStaticTime; - settings.m_maxStaticTime = 60; - settings.m_maximumAllowedSkippedFrames = cvRound(settings.m_minStaticTime * m_fps); // Maximum allowed skipped frames - settings.m_maxTraceLength = 2 * settings.m_maximumAllowedSkippedFrames; // Maximum trace length - } - else - { - settings.m_maximumAllowedSkippedFrames = cvRound(10 * m_fps); // Maximum allowed skipped frames - settings.m_maxTraceLength = cvRound(4 * m_fps); // Maximum trace length - } - - m_tracker = std::make_unique(settings); - } -#if 0 -#if 0 - std::vector framePoints{ cv::Point(420, 348), cv::Point(509, 283), cv::Point(731, 281), cv::Point(840, 343) }; - std::vector geoPoints{ cv::Point2f(45.526646, 5.974535), cv::Point2f(45.527566, 5.973849), cv::Point2f(45.527904, 5.974135), cv::Point2f(45.526867, 5.974826) }; -#else - std::vector framePoints{ cv::Point(1665, 746), cv::Point(246, 521), cv::Point(570, 282), cv::Point(1773, 378) }; - std::vector geoPoints{ cv::Point2f(30.258855, 60.006536), cv::Point2f(30.258051, 60.006855), cv::Point2f(30.258080, 60.007414), cv::Point2f(30.259066, 60.007064) }; -#endif - m_geoParams.SetKeyPoints(framePoints, geoPoints); -#endif - return res; -} - -/// -/// \brief CarsCounting::DrawData -/// \param frame -/// -void CarsCounting::DrawData(cv::Mat frame, int framesCounter, int currTime) -{ - std::vector tracks; - m_tracker->GetTracks(tracks); - - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; - - if (!m_geoParams.Empty()) - { - std::vector points = m_geoParams.GetFramePoints(); - for (size_t i = 0; i < points.size(); ++i) - { - cv::line(frame, points[i % points.size()], points[(i + 1) % points.size()], cv::Scalar(255, 255, 255), 1, cv::LINE_AA); - } - } - - for (const auto& track : tracks) - { - if (track.m_isStatic) - { - DrawTrack(frame, 1, track, true); - } - else - { - if (track.IsRobust(cvRound(m_fps / 4), // Minimal trajectory size - 0.8f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f)) // Min and max ratio: width / height - ) - { - DrawTrack(frame, 1, track, true); - - CheckLinesIntersection(track, static_cast(frame.cols), static_cast(frame.rows)); - } - } - } - //m_detector->CalcMotionMap(frame); - - for (const auto& rl : m_lines) - { - rl.Draw(frame); - } - - cv::Mat heatMap = DrawHeatMap(); - if (!heatMap.empty()) - cv::imshow("Heat map", heatMap); -} - -/// -/// \brief CarsCounting::AddLine -/// \param newLine -/// -void CarsCounting::AddLine(const RoadLine& newLine) -{ - m_lines.push_back(newLine); -} - -/// -/// \brief CarsCounting::GetLine -/// \param lineUid -/// \return -/// -bool CarsCounting::GetLine(unsigned int lineUid, RoadLine& line) -{ - for (const auto& rl : m_lines) - { - if (rl.m_uid == lineUid) - { - line = rl; - return true; - } - } - return false; -} - -/// -/// \brief CarsCounting::RemoveLine -/// \param lineUid -/// \return -/// -bool CarsCounting::RemoveLine(unsigned int lineUid) -{ - for (auto it = std::begin(m_lines); it != std::end(m_lines);) - { - if (it->m_uid == lineUid) - it = m_lines.erase(it); - else - ++it; - } - return false; -} - -/// -/// \brief CarsCounting::CheckLinesIntersection -/// \param track -/// -void CarsCounting::CheckLinesIntersection(const TrackingObject& track, float xMax, float yMax) -{ - auto Pti2f = [&](cv::Point pt) -> cv::Point2f - { - return cv::Point2f(pt.x / xMax, pt.y / yMax); - }; - - constexpr size_t minTrack = 5; - if (track.m_trace.size() >= minTrack) - { - for (auto& rl : m_lines) - { - if (m_lastIntersections.find(track.m_ID) == m_lastIntersections.end()) - { - if (rl.IsIntersect(Pti2f(track.m_trace[track.m_trace.size() - minTrack]), Pti2f(track.m_trace[track.m_trace.size() - 1]))) - m_lastIntersections.emplace(track.m_ID); - } - } - } -} - -/// -/// \brief CarsCounting::DrawHeatMap -/// -cv::Mat CarsCounting::DrawHeatMap() -{ - cv::Mat res; - if (!m_heatMap.empty()) - { - cv::normalize(m_heatMap, m_normHeatMap, 255, 0, cv::NORM_MINMAX, CV_8UC1); - cv::applyColorMap(m_normHeatMap, m_colorMap, cv::COLORMAP_HOT); - cv::bitwise_or(m_keyFrame, m_colorMap, res); - } - return res; -} - -/// -/// \brief CarsCounting::AddToHeatMap -/// -void CarsCounting::AddToHeatMap(const cv::Rect& rect) -{ - if (m_heatMap.empty()) - return; - - constexpr float w = 0.001f; - for (int y = 0; y < rect.height; ++y) - { - float* heatPtr = m_heatMap.ptr(rect.y + y) + rect.x; - for (int x = 0; x < rect.width; ++x) - { - heatPtr[x] += w; - } - } -} diff --git a/cars_counting/main.cpp b/cars_counting/main.cpp deleted file mode 100644 index 211f8ad6a..000000000 --- a/cars_counting/main.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "CarsCounting.h" - -#include -#include - -// ---------------------------------------------------------------------- - -static void Help() -{ - printf("\nExamples of the CarsCounting\n" - "Usage: \n" - " ./CarsCounting [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= \n\n" - "Press:\n" - "\'m\' key for change mode: play|pause. When video is paused you can press any key for get next frame. \n\n" - "Press Esc to exit from video \n\n" - ); -} - -const char* keys = -{ - "{ @1 |../data/atrium.avi | movie file | }" - "{ inf inference |darknet | Type of inference framework: darknet, ocvdnn | }" - "{ w weights | | Weights of neural network: yolov4.weights | }" - "{ c config | | Config file of neural network: yolov4.cfg | }" - "{ n names | | File with classes names: coco.names | }" - "{ sf start_frame |0 | Start a video from this position | }" - "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" - "{ ed end_delay |0 | Delay in milliseconds after video ending | }" - "{ o out | | Name of result video file | }" - "{ sl show_logs |1 | Show Trackers logs | }" - "{ g gpu |0 | Use OpenCL acceleration | }" - "{ hm heat_map |0 | Draw heat map | }" -}; - -// ---------------------------------------------------------------------- - -int main(int argc, char** argv) -{ - Help(); - - cv::CommandLineParser parser(argc, argv, keys); - - bool useOCL = parser.get("gpu") ? 1 : 0; - cv::ocl::setUseOpenCL(useOCL); - std::cout << (cv::ocl::useOpenCL() ? "OpenCL is enabled" : "OpenCL not used") << std::endl; - - CarsCounting cars_counting(parser); - - cars_counting.AddLine(RoadLine(cv::Point2f(0.1f, 0.7f), cv::Point2f(0.47f, 0.7f), 0)); - cars_counting.AddLine(RoadLine(cv::Point2f(0.52f, 0.6f), cv::Point2f(0.8f, 0.6f), 1)); - - cars_counting.Process(); - -#ifndef SILENT_WORK - cv::destroyAllWindows(); -#endif - - return 0; -} diff --git a/data/MobileNetSSD_deploy.caffemodel b/data/MobileNetSSD_deploy.caffemodel deleted file mode 100644 index 7104f06cd..000000000 Binary files a/data/MobileNetSSD_deploy.caffemodel and /dev/null differ diff --git a/data/MobileNetSSD_deploy.prototxt b/data/MobileNetSSD_deploy.prototxt deleted file mode 100644 index fdc812628..000000000 --- a/data/MobileNetSSD_deploy.prototxt +++ /dev/null @@ -1,1912 +0,0 @@ -name: "MobileNet-SSD" -input: "data" -input_shape { - dim: 1 - dim: 3 - dim: 300 - dim: 300 -} -layer { - name: "conv0" - type: "Convolution" - bottom: "data" - top: "conv0" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 32 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv0/relu" - type: "ReLU" - bottom: "conv0" - top: "conv0" -} -layer { - name: "conv1/dw" - type: "Convolution" - bottom: "conv0" - top: "conv1/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 32 - pad: 1 - kernel_size: 3 - group: 32 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv1/dw/relu" - type: "ReLU" - bottom: "conv1/dw" - top: "conv1/dw" -} -layer { - name: "conv1" - type: "Convolution" - bottom: "conv1/dw" - top: "conv1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 64 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv1/relu" - type: "ReLU" - bottom: "conv1" - top: "conv1" -} -layer { - name: "conv2/dw" - type: "Convolution" - bottom: "conv1" - top: "conv2/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 64 - pad: 1 - kernel_size: 3 - stride: 2 - group: 64 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv2/dw/relu" - type: "ReLU" - bottom: "conv2/dw" - top: "conv2/dw" -} -layer { - name: "conv2" - type: "Convolution" - bottom: "conv2/dw" - top: "conv2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv2/relu" - type: "ReLU" - bottom: "conv2" - top: "conv2" -} -layer { - name: "conv3/dw" - type: "Convolution" - bottom: "conv2" - top: "conv3/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - pad: 1 - kernel_size: 3 - group: 128 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv3/dw/relu" - type: "ReLU" - bottom: "conv3/dw" - top: "conv3/dw" -} -layer { - name: "conv3" - type: "Convolution" - bottom: "conv3/dw" - top: "conv3" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv3/relu" - type: "ReLU" - bottom: "conv3" - top: "conv3" -} -layer { - name: "conv4/dw" - type: "Convolution" - bottom: "conv3" - top: "conv4/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - pad: 1 - kernel_size: 3 - stride: 2 - group: 128 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv4/dw/relu" - type: "ReLU" - bottom: "conv4/dw" - top: "conv4/dw" -} -layer { - name: "conv4" - type: "Convolution" - bottom: "conv4/dw" - top: "conv4" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv4/relu" - type: "ReLU" - bottom: "conv4" - top: "conv4" -} -layer { - name: "conv5/dw" - type: "Convolution" - bottom: "conv4" - top: "conv5/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - group: 256 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv5/dw/relu" - type: "ReLU" - bottom: "conv5/dw" - top: "conv5/dw" -} -layer { - name: "conv5" - type: "Convolution" - bottom: "conv5/dw" - top: "conv5" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv5/relu" - type: "ReLU" - bottom: "conv5" - top: "conv5" -} -layer { - name: "conv6/dw" - type: "Convolution" - bottom: "conv5" - top: "conv6/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - stride: 2 - group: 256 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv6/dw/relu" - type: "ReLU" - bottom: "conv6/dw" - top: "conv6/dw" -} -layer { - name: "conv6" - type: "Convolution" - bottom: "conv6/dw" - top: "conv6" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv6/relu" - type: "ReLU" - bottom: "conv6" - top: "conv6" -} -layer { - name: "conv7/dw" - type: "Convolution" - bottom: "conv6" - top: "conv7/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv7/dw/relu" - type: "ReLU" - bottom: "conv7/dw" - top: "conv7/dw" -} -layer { - name: "conv7" - type: "Convolution" - bottom: "conv7/dw" - top: "conv7" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv7/relu" - type: "ReLU" - bottom: "conv7" - top: "conv7" -} -layer { - name: "conv8/dw" - type: "Convolution" - bottom: "conv7" - top: "conv8/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv8/dw/relu" - type: "ReLU" - bottom: "conv8/dw" - top: "conv8/dw" -} -layer { - name: "conv8" - type: "Convolution" - bottom: "conv8/dw" - top: "conv8" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv8/relu" - type: "ReLU" - bottom: "conv8" - top: "conv8" -} -layer { - name: "conv9/dw" - type: "Convolution" - bottom: "conv8" - top: "conv9/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv9/dw/relu" - type: "ReLU" - bottom: "conv9/dw" - top: "conv9/dw" -} -layer { - name: "conv9" - type: "Convolution" - bottom: "conv9/dw" - top: "conv9" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv9/relu" - type: "ReLU" - bottom: "conv9" - top: "conv9" -} -layer { - name: "conv10/dw" - type: "Convolution" - bottom: "conv9" - top: "conv10/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv10/dw/relu" - type: "ReLU" - bottom: "conv10/dw" - top: "conv10/dw" -} -layer { - name: "conv10" - type: "Convolution" - bottom: "conv10/dw" - top: "conv10" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv10/relu" - type: "ReLU" - bottom: "conv10" - top: "conv10" -} -layer { - name: "conv11/dw" - type: "Convolution" - bottom: "conv10" - top: "conv11/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv11/dw/relu" - type: "ReLU" - bottom: "conv11/dw" - top: "conv11/dw" -} -layer { - name: "conv11" - type: "Convolution" - bottom: "conv11/dw" - top: "conv11" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv11/relu" - type: "ReLU" - bottom: "conv11" - top: "conv11" -} -layer { - name: "conv12/dw" - type: "Convolution" - bottom: "conv11" - top: "conv12/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - stride: 2 - group: 512 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv12/dw/relu" - type: "ReLU" - bottom: "conv12/dw" - top: "conv12/dw" -} -layer { - name: "conv12" - type: "Convolution" - bottom: "conv12/dw" - top: "conv12" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 1024 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv12/relu" - type: "ReLU" - bottom: "conv12" - top: "conv12" -} -layer { - name: "conv13/dw" - type: "Convolution" - bottom: "conv12" - top: "conv13/dw" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 1024 - pad: 1 - kernel_size: 3 - group: 1024 - engine: CAFFE - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv13/dw/relu" - type: "ReLU" - bottom: "conv13/dw" - top: "conv13/dw" -} -layer { - name: "conv13" - type: "Convolution" - bottom: "conv13/dw" - top: "conv13" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 1024 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv13/relu" - type: "ReLU" - bottom: "conv13" - top: "conv13" -} -layer { - name: "conv14_1" - type: "Convolution" - bottom: "conv13" - top: "conv14_1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv14_1/relu" - type: "ReLU" - bottom: "conv14_1" - top: "conv14_1" -} -layer { - name: "conv14_2" - type: "Convolution" - bottom: "conv14_1" - top: "conv14_2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 512 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv14_2/relu" - type: "ReLU" - bottom: "conv14_2" - top: "conv14_2" -} -layer { - name: "conv15_1" - type: "Convolution" - bottom: "conv14_2" - top: "conv15_1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv15_1/relu" - type: "ReLU" - bottom: "conv15_1" - top: "conv15_1" -} -layer { - name: "conv15_2" - type: "Convolution" - bottom: "conv15_1" - top: "conv15_2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv15_2/relu" - type: "ReLU" - bottom: "conv15_2" - top: "conv15_2" -} -layer { - name: "conv16_1" - type: "Convolution" - bottom: "conv15_2" - top: "conv16_1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv16_1/relu" - type: "ReLU" - bottom: "conv16_1" - top: "conv16_1" -} -layer { - name: "conv16_2" - type: "Convolution" - bottom: "conv16_1" - top: "conv16_2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 256 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv16_2/relu" - type: "ReLU" - bottom: "conv16_2" - top: "conv16_2" -} -layer { - name: "conv17_1" - type: "Convolution" - bottom: "conv16_2" - top: "conv17_1" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 64 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv17_1/relu" - type: "ReLU" - bottom: "conv17_1" - top: "conv17_1" -} -layer { - name: "conv17_2" - type: "Convolution" - bottom: "conv17_1" - top: "conv17_2" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 128 - pad: 1 - kernel_size: 3 - stride: 2 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv17_2/relu" - type: "ReLU" - bottom: "conv17_2" - top: "conv17_2" -} -layer { - name: "conv11_mbox_loc" - type: "Convolution" - bottom: "conv11" - top: "conv11_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 12 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv11_mbox_loc_perm" - type: "Permute" - bottom: "conv11_mbox_loc" - top: "conv11_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv11_mbox_loc_flat" - type: "Flatten" - bottom: "conv11_mbox_loc_perm" - top: "conv11_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv11_mbox_conf" - type: "Convolution" - bottom: "conv11" - top: "conv11_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 63 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv11_mbox_conf_perm" - type: "Permute" - bottom: "conv11_mbox_conf" - top: "conv11_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv11_mbox_conf_flat" - type: "Flatten" - bottom: "conv11_mbox_conf_perm" - top: "conv11_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv11_mbox_priorbox" - type: "PriorBox" - bottom: "conv11" - bottom: "data" - top: "conv11_mbox_priorbox" - prior_box_param { - min_size: 60.0 - aspect_ratio: 2.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "conv13_mbox_loc" - type: "Convolution" - bottom: "conv13" - top: "conv13_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 24 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv13_mbox_loc_perm" - type: "Permute" - bottom: "conv13_mbox_loc" - top: "conv13_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv13_mbox_loc_flat" - type: "Flatten" - bottom: "conv13_mbox_loc_perm" - top: "conv13_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv13_mbox_conf" - type: "Convolution" - bottom: "conv13" - top: "conv13_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 126 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv13_mbox_conf_perm" - type: "Permute" - bottom: "conv13_mbox_conf" - top: "conv13_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv13_mbox_conf_flat" - type: "Flatten" - bottom: "conv13_mbox_conf_perm" - top: "conv13_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv13_mbox_priorbox" - type: "PriorBox" - bottom: "conv13" - bottom: "data" - top: "conv13_mbox_priorbox" - prior_box_param { - min_size: 105.0 - max_size: 150.0 - aspect_ratio: 2.0 - aspect_ratio: 3.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "conv14_2_mbox_loc" - type: "Convolution" - bottom: "conv14_2" - top: "conv14_2_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 24 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv14_2_mbox_loc_perm" - type: "Permute" - bottom: "conv14_2_mbox_loc" - top: "conv14_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv14_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv14_2_mbox_loc_perm" - top: "conv14_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv14_2_mbox_conf" - type: "Convolution" - bottom: "conv14_2" - top: "conv14_2_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 126 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv14_2_mbox_conf_perm" - type: "Permute" - bottom: "conv14_2_mbox_conf" - top: "conv14_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv14_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv14_2_mbox_conf_perm" - top: "conv14_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv14_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv14_2" - bottom: "data" - top: "conv14_2_mbox_priorbox" - prior_box_param { - min_size: 150.0 - max_size: 195.0 - aspect_ratio: 2.0 - aspect_ratio: 3.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "conv15_2_mbox_loc" - type: "Convolution" - bottom: "conv15_2" - top: "conv15_2_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 24 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv15_2_mbox_loc_perm" - type: "Permute" - bottom: "conv15_2_mbox_loc" - top: "conv15_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv15_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv15_2_mbox_loc_perm" - top: "conv15_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv15_2_mbox_conf" - type: "Convolution" - bottom: "conv15_2" - top: "conv15_2_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 126 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv15_2_mbox_conf_perm" - type: "Permute" - bottom: "conv15_2_mbox_conf" - top: "conv15_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv15_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv15_2_mbox_conf_perm" - top: "conv15_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv15_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv15_2" - bottom: "data" - top: "conv15_2_mbox_priorbox" - prior_box_param { - min_size: 195.0 - max_size: 240.0 - aspect_ratio: 2.0 - aspect_ratio: 3.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "conv16_2_mbox_loc" - type: "Convolution" - bottom: "conv16_2" - top: "conv16_2_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 24 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv16_2_mbox_loc_perm" - type: "Permute" - bottom: "conv16_2_mbox_loc" - top: "conv16_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv16_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv16_2_mbox_loc_perm" - top: "conv16_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv16_2_mbox_conf" - type: "Convolution" - bottom: "conv16_2" - top: "conv16_2_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 126 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv16_2_mbox_conf_perm" - type: "Permute" - bottom: "conv16_2_mbox_conf" - top: "conv16_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv16_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv16_2_mbox_conf_perm" - top: "conv16_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv16_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv16_2" - bottom: "data" - top: "conv16_2_mbox_priorbox" - prior_box_param { - min_size: 240.0 - max_size: 285.0 - aspect_ratio: 2.0 - aspect_ratio: 3.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "conv17_2_mbox_loc" - type: "Convolution" - bottom: "conv17_2" - top: "conv17_2_mbox_loc" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 24 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv17_2_mbox_loc_perm" - type: "Permute" - bottom: "conv17_2_mbox_loc" - top: "conv17_2_mbox_loc_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv17_2_mbox_loc_flat" - type: "Flatten" - bottom: "conv17_2_mbox_loc_perm" - top: "conv17_2_mbox_loc_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv17_2_mbox_conf" - type: "Convolution" - bottom: "conv17_2" - top: "conv17_2_mbox_conf" - param { - lr_mult: 1.0 - decay_mult: 1.0 - } - param { - lr_mult: 2.0 - decay_mult: 0.0 - } - convolution_param { - num_output: 126 - kernel_size: 1 - weight_filler { - type: "msra" - } - bias_filler { - type: "constant" - value: 0.0 - } - } -} -layer { - name: "conv17_2_mbox_conf_perm" - type: "Permute" - bottom: "conv17_2_mbox_conf" - top: "conv17_2_mbox_conf_perm" - permute_param { - order: 0 - order: 2 - order: 3 - order: 1 - } -} -layer { - name: "conv17_2_mbox_conf_flat" - type: "Flatten" - bottom: "conv17_2_mbox_conf_perm" - top: "conv17_2_mbox_conf_flat" - flatten_param { - axis: 1 - } -} -layer { - name: "conv17_2_mbox_priorbox" - type: "PriorBox" - bottom: "conv17_2" - bottom: "data" - top: "conv17_2_mbox_priorbox" - prior_box_param { - min_size: 285.0 - max_size: 300.0 - aspect_ratio: 2.0 - aspect_ratio: 3.0 - flip: true - clip: false - variance: 0.1 - variance: 0.1 - variance: 0.2 - variance: 0.2 - offset: 0.5 - } -} -layer { - name: "mbox_loc" - type: "Concat" - bottom: "conv11_mbox_loc_flat" - bottom: "conv13_mbox_loc_flat" - bottom: "conv14_2_mbox_loc_flat" - bottom: "conv15_2_mbox_loc_flat" - bottom: "conv16_2_mbox_loc_flat" - bottom: "conv17_2_mbox_loc_flat" - top: "mbox_loc" - concat_param { - axis: 1 - } -} -layer { - name: "mbox_conf" - type: "Concat" - bottom: "conv11_mbox_conf_flat" - bottom: "conv13_mbox_conf_flat" - bottom: "conv14_2_mbox_conf_flat" - bottom: "conv15_2_mbox_conf_flat" - bottom: "conv16_2_mbox_conf_flat" - bottom: "conv17_2_mbox_conf_flat" - top: "mbox_conf" - concat_param { - axis: 1 - } -} -layer { - name: "mbox_priorbox" - type: "Concat" - bottom: "conv11_mbox_priorbox" - bottom: "conv13_mbox_priorbox" - bottom: "conv14_2_mbox_priorbox" - bottom: "conv15_2_mbox_priorbox" - bottom: "conv16_2_mbox_priorbox" - bottom: "conv17_2_mbox_priorbox" - top: "mbox_priorbox" - concat_param { - axis: 2 - } -} -layer { - name: "mbox_conf_reshape" - type: "Reshape" - bottom: "mbox_conf" - top: "mbox_conf_reshape" - reshape_param { - shape { - dim: 0 - dim: -1 - dim: 21 - } - } -} -layer { - name: "mbox_conf_softmax" - type: "Softmax" - bottom: "mbox_conf_reshape" - top: "mbox_conf_softmax" - softmax_param { - axis: 2 - } -} -layer { - name: "mbox_conf_flatten" - type: "Flatten" - bottom: "mbox_conf_softmax" - top: "mbox_conf_flatten" - flatten_param { - axis: 1 - } -} -layer { - name: "detection_out" - type: "DetectionOutput" - bottom: "mbox_loc" - bottom: "mbox_conf_flatten" - bottom: "mbox_priorbox" - top: "detection_out" - include { - phase: TEST - } - detection_output_param { - num_classes: 21 - share_location: true - background_label_id: 0 - nms_param { - nms_threshold: 0.45 - top_k: 100 - } - code_type: CENTER_SIZE - keep_top_k: 100 - confidence_threshold: 0.25 - } -} diff --git a/data/cmake_vs2017.bat b/data/cmake_vs2017.bat index f5061fe1e..810a1c36d 100644 --- a/data/cmake_vs2017.bat +++ b/data/cmake_vs2017.bat @@ -11,9 +11,9 @@ cmake.exe . .. -G "Visual Studio 15 2017 Win64" ^ -DBUILD_ASYNC_DETECTOR=ON ^ -DBUILD_CARS_COUNTING=ON ^ -DBUILD_YOLO_LIB=ON ^ - -DCUDNN_INCLUDE_DIR=C:/cudnn-10.0-windows10-x64-v7.6.5.32/cuda/include ^ - -DCUDNN_LIBRARY=C:/cudnn-10.0-windows10-x64-v7.6.5.32/cuda/lib/x64/cudnn.lib ^ + -DCUDNN_INCLUDE_DIR=C:/cudnn-11.1-windows-x64-v8.0.5.39/cuda/include ^ + -DCUDNN_LIBRARY=C:/cudnn-11.1-windows-x64-v8.0.5.39/cuda/lib/x64/cudnn.lib ^ -DBUILD_YOLO_TENSORRT=ON ^ - -DTensorRT_LIBRARY=C:/TensorRT-5.1.5.0/lib/*.lib ^ - -DTensorRT_INCLUDE_DIR=C:/TensorRT-5.1.5.0/include + -DTensorRT_LIBRARY=C:/TensorRT-7.2.3.4/lib/*.lib ^ + -DTensorRT_INCLUDE_DIR=C:/TensorRT-7.2.3.4/include cmake.exe --build . -j 6 --config Release diff --git a/data/cmake_vs2022.bat b/data/cmake_vs2022.bat new file mode 100644 index 000000000..f554e4ba9 --- /dev/null +++ b/data/cmake_vs2022.bat @@ -0,0 +1,19 @@ +cd .. +md build +cd build +cmake.exe . .. -G "Visual Studio 17 2022" -A "x64" ^ + -DOpenCV_DIR=C:/work/libraries/opencv/opencv_64_2022 ^ + -DUSE_OCV_BGFG=ON ^ + -DUSE_OCV_KCF=ON ^ + -DUSE_OCV_UKF=ON ^ + -DSILENT_WORK=OFF ^ + -DBUILD_EXAMPLES=ON ^ + -DBUILD_ASYNC_DETECTOR=ON ^ + -DBUILD_CARS_COUNTING=ON ^ + -DBUILD_YOLO_LIB=ON ^ + -DCUDNN_INCLUDE_DIR=C:/cuda/cudnn-windows-x86_64-8.6.0.163_cuda11/include ^ + -DCUDNN_LIBRARY=C:/cuda/cudnn-windows-x86_64-8.6.0.163_cuda11/lib/x64/cudnn.lib ^ + -DBUILD_YOLO_TENSORRT=ON ^ + -DTensorRT_LIBRARY=C:/cuda/TensorRT-8.4.3.1/lib/*.lib ^ + -DTensorRT_INCLUDE_DIR=C:/cuda/TensorRT-8.4.3.1/include +cmake.exe --build . -j 6 --config Release diff --git a/data/coco.data b/data/coco.data deleted file mode 100644 index 30038417c..000000000 --- a/data/coco.data +++ /dev/null @@ -1,8 +0,0 @@ -classes= 80 -train = /home/pjreddie/data/coco/trainvalno5k.txt -valid = coco_testdev -#valid = data/coco_val_5k.list -names = data/coco.names -backup = /home/pjreddie/backup/ -eval=coco - diff --git a/data/coco.names b/data/coco/coco.names similarity index 100% rename from data/coco.names rename to data/coco/coco.names diff --git a/data/coco/coco_91.names b/data/coco/coco_91.names new file mode 100644 index 000000000..c5b8f040d --- /dev/null +++ b/data/coco/coco_91.names @@ -0,0 +1,91 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +trafficlight +firehydrant +streetsign +stopsign +parkingmeter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +hat +backpack +umbrella +shoe +eyeglasses +handbag +tie +suitcase +frisbee +skis +snowboard +sportsball +kite +baseballbat +baseballglove +skateboard +surfboard +tennisracket +bottle +plate +wineglass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hotdog +pizza +donut +cake +chair +sofa +pottedplant +bed +mirror +diningtable +window +desk +toilet +door +tvmonitor +laptop +mouse +remote +keyboard +cellphone +microwave +oven +toaster +sink +refrigerator +blender +book +clock +vase +scissors +teddybear +hairdrier +toothbrush +hairbrush diff --git a/data/coco/full.names b/data/coco/full.names new file mode 100644 index 000000000..ca76c80b5 --- /dev/null +++ b/data/coco/full.names @@ -0,0 +1,80 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +sofa +pottedplant +bed +diningtable +toilet +tvmonitor +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/data/coco/white_full.names b/data/coco/white_full.names new file mode 100644 index 000000000..4e53804f6 --- /dev/null +++ b/data/coco/white_full.names @@ -0,0 +1,9 @@ +person +bicycle +car +motorbike +aeroplane +bus +train +truck +boat diff --git a/data/combined.txt.model b/data/combined.txt.model deleted file mode 100644 index d76ef96cd..000000000 --- a/data/combined.txt.model +++ /dev/null @@ -1,6151 +0,0 @@ -solver_type L2R_L2LOSS_SVC -nr_class 2 -label 1 -1 -nr_feature 6144 -bias 1 -w --0.005003520347911358 -0.002069803013184546 -0.005174648715386205 -0.005770423962997794 --0.01448714985262205 --0.0367621344258923 --0.02794459664664868 -0.01101089457859012 -0.008666665134766542 --0.02012648943416123 -0.06575279828180118 -0.02424015059444452 --0.02078629004738948 --0.0174857471188182 -0.01500355425557434 -0.00630631014205099 -0.01284952839634534 --0.01440907292094286 -0.03312943265799884 --0.01037956465816266 -0.01091253397466537 --0.008546722578458163 --0.000137242882255898 --0.006039288632080092 -0.02785929565990963 -0.01875779505813649 -0.001523177575111411 -0.001975857592573251 -0.0355570028413526 -0.00222207838522583 -0.01676492228306924 --0.014860750409855 --0.0007474025127886761 -0.001263892979786343 -0.01315113524375824 --0.004714853506517448 --0.02282390016881265 --0.005469724342330157 --0.02510834369198174 -0.01764526328517421 --0.007660696427159833 -0.004981475262397511 --0.01690123403309758 -0.006578004626564695 --0.01275568672542528 --0.002289109263499082 --0.0003020207915855601 --0.0247449283333634 --0.004337326818695084 --0.02127970823354044 --0.00205813933788758 --0.00697602903805972 -0.0378438461173811 -0.0271264404242818 --0.01749792865161751 --0.02820862901581239 --0.02182719361448771 -0.03875054149879911 -0.02987474445180364 -0.008319553173496204 -0.01863953592520786 --0.01377758079542864 -0.02447160859508605 --0.006161226087019822 -0.002664889266725591 --0.004195764438625247 -0.0258392781694716 -0.03597235631195608 --0.02312395751737616 --0.0264391761526756 -0.008879937882809405 -0.01317848370247718 -0.01442150630026301 -0.01147596267241498 -0.008140992211158772 -0.005028185738844742 -0.006646845288083352 --0.03718286608957103 --0.003187494927532327 -0.02737291596187517 --0.001307662724359647 -0.01299300062055115 --0.0077356816468095 --0.02314412706978508 -0.007754396219211935 -0.00237656846180913 --0.001021865536112009 --0.01633714512603848 -0.03010638215034521 -0.001959273739762304 --0.01388451168609694 --0.02902924355619753 --0.006209452098316521 --0.04394066992596347 -0.005438391961759498 -0.0004430470439009493 -0.01839911411320389 -0.01312083096711165 --0.003596098504759031 -0.008205299317585775 -0.01158208655470198 -0.01143715477397159 --0.01337154284471177 -0.01332920143227819 --0.01079045366246222 --0.002444117903920587 -0.0004225568703275601 -0.008454014893288557 -0.01382371036393016 --0.0008277937384174614 --0.003095762379452143 -0.005920876257174643 --0.018435862770501 --0.01325746795130324 -0.01572515069785909 --0.03636954142028007 -0.03327623206298854 --0.008945717976554125 --0.01512813884792744 -0.002350702483139244 -0.01876030251118957 -0.01426424964277364 --0.0005273014953322304 --0.02554912327528925 -0.02471023092682494 -0.0005748998346132857 --0.001425466379099617 -0.001345201563785435 -0.009799302154985012 --0.001669371383296941 --0.03301632929190409 -0.001847080476171332 -0.0001326086229401944 --0.0209294907115855 --0.006623246612253285 -0.004417154127878315 -0.01134309840262906 --0.02975562097965451 -0.0137113098352971 --0.01336515888658423 --0.03056310050626673 --0.01643979100209925 --0.02140371227986727 --0.003964933677113916 --0.01220623609634657 -0.001639161831019996 --0.02070818633068418 -0.01311884189460462 -0.005854753472316884 -0.01308041798150547 -0.002198357793834611 --0.0007248654118830881 -0.04799990767843172 -0.003443565906509785 --0.0172558476936482 -0.0325125542177515 -0.0360486890477587 -0.01615102901872734 -0.02015870522725214 -0.0005302504072158193 --0.01491375362800119 --0.03574601936142056 --0.03892317418311501 --0.009719590114107404 --0.01384706348878529 --0.02032916461597326 --0.007949499630951016 -0.03510776808035251 --0.0007874502885196378 -0.006597192831704409 --0.00103788408425645 --0.03949375140812197 -0.01678745005331527 -0.009661148644304823 -0.0008120819316022267 --0.01360737543568321 -0.002153041211443719 --0.03278667140273392 --0.03419092177714698 -0.009065814936435299 -0.01915344068421205 --0.0166592764738116 --0.04608819472223481 -0.006810744023721218 -0.006806002439833975 --0.02293703473477056 --0.05606649769217235 -0.005418833639128474 -0.009339001173000717 -0.003557382462747092 --0.0003072634595633458 -0.01103428562528969 --0.001007442607148272 --0.01986254205070798 --0.003417074690970731 --0.02089191748796974 -0.01755733343424117 --0.01421880445163473 --0.007706256573348884 --0.0316305685923691 --0.02001652813974759 -0.02608050258069597 --0.03436624696618167 -0.02159538092131812 -0.00708380668103448 --0.003111134841378749 --0.004926318633533562 --0.001885235455617492 --0.01859751732172839 --0.01414900801814637 --0.02612153641307975 --0.009910123218352357 --0.006365778234346128 --0.01733177591856793 --0.006387543032471968 -0.008249748548657456 -0.01882365990003728 -0.004518595720905678 -0.004560404907278093 -0.02327821450917032 --0.002912346258285851 -0.02390941125930645 --0.03844018508339368 -0.0007930765426637359 -0.008025583670277952 -0.03943723601097654 -0.01167127417601206 -0.01772513051309598 -0.03317502203206862 -0.000893028963210232 --0.002394382684041357 -0.005016197319593135 --0.005279746775419308 --0.01388213471146204 -0.02621994621239625 -0.01302632594667858 -0.01185686121428527 -0.009362310203846862 -9.299257314664079e-05 -0.0004835098562204914 --0.002713304851430683 --0.01226718338014476 --0.009877020734323479 --0.0002919310650716624 -0.005194238050333014 --0.001662500000528359 --0.01011829167243831 -0.00773409574225973 --0.01235530121162729 --0.001547045998920955 --0.03075499845710892 -0.005740627724918379 -0.006992768992037683 -0.01175696745869032 --0.0007052100534328659 -0.0004624023950462225 -0.009228664121560546 -0.01924883493209572 --0.008364770638044009 -0.01567359364829157 -0.02287365117026793 -0.01211287886899546 -0.02567785299236859 -0.01503671634458731 -0.00189163873778995 -0.01758876251815896 -0.03250564528844271 -0.006482028534461236 -0.02290977956631286 -0.01272808235187591 -0.01443941206680375 -0.0002526422229615169 -0.003539317372202528 -0.01253998372839131 -0.02069401285759962 --0.002050895412099214 -0.01399534715079634 -0.01687501104736621 -0.002147809845668891 --3.142267451271448e-06 --0.03082919024973758 -0.00409833618300046 -0.01764242996451706 --0.01824280153221269 --0.006756723434835348 --0.0001450205760352501 --0.00533948906003771 --0.0266457012220864 -0.001344069557904624 --0.02128193239408986 --0.003659783389711483 --0.001852477576654513 --0.005583923348945666 --0.01929853851723307 -0.005032591258722955 -0.01916912290010374 -0.0308524055529778 -0.01373768182407962 -0.04879855877438867 -0.002400002027748995 -0.01194276900965131 -0.02472784420662469 -0.0152947015615006 -0.008229897751963602 --0.008467217513181489 --0.02719336608141725 -0.002982316793370578 --0.005297610306972945 --0.01178221269438317 -0.00291670535831723 -0.01514055354402407 -0.000843004470314921 -0.03418930834618266 --0.007025330524047327 --0.006446516301463833 -0.02243084941706928 -0.01535965071131457 --0.01781964505478506 --0.005839037552966624 --0.004932980815182085 -0.003013755445519076 --0.006938248188829542 --0.0162717211528717 --0.01798497036975532 --0.004645095001116832 -0.01089958322771388 --0.02477933155243444 -0.009662665748671747 --0.006078020463963898 --0.01345160543476964 --0.01660954500628031 -0.002251851517249183 --0.01416684620464115 --0.02017698268253854 --0.008873431760135699 --0.02034543497200495 -0.0006466372085425197 --0.04878086408083279 --0.0027483181373843 -0.01151986776061373 --0.0162265344855075 --0.003256801104442416 --0.0005921090590034859 --0.03103687301830104 -0.02003929866102026 -0.002993458840512375 --0.01774967334631447 --0.01129472265352868 -0.003989231828537564 --0.01614046959996082 --0.007969116104857841 --0.02862444942950542 -0.009149897923654771 --0.003472215813725276 -0.00824400324360478 --0.00514985193198036 -0.0300742921766338 -0.002550702409033196 -0.02522315171355281 -0.002239783524537956 --0.004458744012660044 --0.002685748035980243 --0.02003084152623159 --0.01394261241327988 --0.007585878529702793 --0.009753075559757908 -0.01003755003767885 --0.03275255100672173 -0.04517121808063266 -0.002434769598134361 -0.008076037975557804 --0.01300263749545634 -0.06302110617897295 -0.01687153647848769 --0.00930962876055192 --0.02893428004246289 -0.02770691707775359 --0.005809321332112434 --0.01989749364275967 --0.02537833095641314 -0.014181271454644 -0.004880702816569228 --0.002332279482607529 -2.360044861530883e-05 --0.0009735975942982823 --0.0006798436192469192 -0.01153117588693445 -0.01366220762121318 --0.01213616902933886 --0.03932064342835775 -0.01955031541064437 -0.02112332835847193 --0.01045117123023636 -0.0092375852883199 -0.005412830881953227 --0.009652129185600017 --0.03190723978774343 --0.004397134614096571 -0.0004653366920910375 -0.001801215448612087 -0.03659409852610278 -0.02164238473311287 -0.02952981655803447 -0.02760507272583731 -0.004868429239963265 -0.008575218199526917 -0.007607863558952699 -0.001140099589671244 -0.0359315056212372 -0.005974921143204162 --0.02207612606506396 -0.02224194803133473 --9.614643673055539e-05 --0.01235900634964228 -0.004810958582936387 -0.0005553201699475764 --0.04959617981761505 --0.02175683843999135 --0.002460682721387544 --0.04291025402634877 --0.007117533720152886 -0.009538461446059016 --0.02533512467883599 -0.0108719727858132 -0.01030006497856292 -0.00534026568387216 --0.02907301857171546 -0.009029428125775235 -0.003677969233221066 --0.00835826756718748 --0.006584188295177362 --0.01725879084872028 -0.01481922594341135 --0.007829060961651034 --0.02239608867235906 --0.007239649108550513 --0.02079908011132775 --0.0122402558111806 -0.02223178833145775 --0.02642044855104548 -0.02460513396431681 -0.01025987325311695 --0.007924876597065754 --0.008469098669326826 -0.01162427789506362 -0.00757125579340331 --0.006651687432548528 --2.011760012418087e-05 -0.007571021043648778 -0.004030482263946447 -0.03764152288361701 -0.01113734846155956 -0.00762682292082857 --0.03114581685183132 --0.01292423174770759 --0.006479712360404123 -0.02762814909895847 -0.06441575785681913 --0.01090172949626452 --0.007697231186210299 --0.004743469016488679 --0.005243778891252429 --0.008732594053048595 --0.01768043245652324 -0.005280437836049898 -0.0006756851338277547 --0.002040749662085092 -0.01905870498060573 --0.003946572948967477 --0.0008642364785054555 -0.001916326215645003 --0.01981899711425225 -0.02575605458110392 -0.003919275294279793 --0.03415197974394633 --0.02038051887277834 --0.01947517087181787 -0.00232395253007428 --0.005005491814130655 --0.01574473896058118 --0.02108647113005644 --0.0390933560020202 --0.03381029152027456 --0.01374129627045977 --0.02156975197338225 --0.01742708450368665 --0.02264858345077536 --0.004319312911059957 -0.003887998324021627 -0.0232337907043668 --0.01624569974857062 --0.01125570695150681 --0.003702716727484901 --0.01400482921065592 -0.007011594349336642 --0.007573526736130579 --0.001032097377261878 --0.001047323103030321 --0.01016048814324655 --0.003540457030422892 -0.02083373788634737 --0.004967312684087704 --0.01222224714773651 --0.01253554718983541 -0.03627487978079577 --0.009200758609774632 --0.007995646261622871 --0.00661199873420455 -0.006537820686641866 --0.016958763151676 --0.007092649599914428 --0.007604167826467551 --0.001179703955100132 --0.004741446528877903 -0.01117942094133146 --0.01394367513870646 -0.006387018723445646 --0.01684775476809702 --0.008020723392915732 -0.02508636717351514 -0.01386299878517946 -0.0006163434569274912 -0.0226836256010943 --0.01192388276706972 --0.0145837783431355 --0.005101115192672873 -0.001440175842190362 --0.005931771432837823 -0.00751238104121529 --0.01367162748473635 -0.05651837815750417 -0.01617179191117598 --0.007314792298486002 --0.01149634251780092 -0.02047849803547178 -0.001023744937027721 -0.01471597875686846 -0.022454024687387 -0.0005124787304554669 -0.01797356687499928 -0.02827670547412039 -0.003611171801694697 -0.0153327716641086 --0.01084053129238595 -0.01120063482298114 -0.01146654149954871 --0.02186676349862713 --0.00878975941847282 -0.008550492104857975 --0.02310384955313644 --0.02413534865119355 --0.008913855602135069 --0.01039153630834005 -0.01066052334187154 --0.03595931444715042 --0.0008432535929275898 --0.008362226615794064 -0.012899442247164 -0.02930312588308863 --0.0009787165783535431 -0.00892214047220974 --0.02385351670709739 -0.002751156663230699 --0.05194250425000137 -0.005817084444285272 --0.006364928017669904 -0.006554827842347569 --0.008228184021934926 -0.02026067920090452 -0.0215193956363785 --0.0008296911817124732 -0.01498914366173797 --0.01072137031975259 --0.01324846683708499 -0.02690443982661931 -0.01217516804592704 -0.009245736318703481 --0.008722076304629822 -0.02223697697300151 -0.0006550776533903614 -0.008829673087587061 --0.009493019881536384 -0.02427893935681946 -0.01138262875558655 -0.008640065790915905 -0.008527334397447163 --0.0098517077633191 --0.001196774832557466 -0.01245135534212849 --0.01246543959827981 --0.01467837007481979 -0.005603671578263095 -0.01812599547963147 --0.003369978597163579 -0.02449992214460639 --0.01115553459047753 -0.005743490644701667 --0.01878079907846746 -0.014432070644365 -0.01323665835919854 -0.0231617086915668 -0.007303629654081573 --0.01427799433969487 --0.00831587623286807 -0.01432656833254217 --0.05565068721077037 --0.01649008086536423 -0.003193506865209827 -0.007043880530607463 -0.001381125822896695 --0.01284515838845966 -0.006167126542678558 --0.04102781952506392 --0.01144135058687889 --0.0171272919308507 --0.02751586690268498 --0.008662432431961922 --0.006198087795207047 --0.02528455231929987 --0.007763599861541351 --0.02216969816341033 --0.008917916822090642 -0.01167172747087434 --0.00395815915473401 --0.01948004640889512 -0.0256515703053247 --0.04185080715448322 --0.007234759438274927 -0.03046349003474088 -0.004402390410109493 -0.0188800543541134 -0.006061779987044675 -0.005257153766757948 -0.01501098576484898 --0.003452513228169648 --0.009840069829909743 -0.01654368552238866 -0.01980687921490583 -0.01317655728945977 -0.005416029144507892 -0.005951078966694971 --0.0132302571559345 -0.008730753821912659 --0.00626315087461618 --0.01274903461132639 --0.01032682640154472 -0.004530181275655699 -0.002186481569415141 -0.001848766116629715 -0.01948645099304751 --0.00984287213593949 --0.0180245006576852 --0.03178474172238394 -0.01668474916730084 --0.03386605864417302 --0.0209899028109063 --0.01516451159349272 --0.01415197055902736 -0.004454583359133935 --0.002347227621329079 -0.007593957870811625 -0.01094008076835564 -0.002658616813757957 --0.01021824349388779 --0.01473303058960936 -0.01503165577939028 -0.001971848067470669 -0.005478605934674059 -0.03879781096731207 --0.01418999381390136 --0.02919782949085926 -0.002659307296285537 --0.004162860232794933 --0.02452273251202593 --0.02897421148975042 --0.03982240308212088 --0.01756816048193873 --0.02231481586771881 --0.01283711115525081 -0.009687819609816677 -0.0003102851276981148 -0.004850545105721929 --0.02300374335417232 --0.03357373683186745 --0.007839363000441121 --0.01276449801594297 -0.001456108171957251 -0.01902255297265176 -0.004446782695133483 -0.02133695901712592 --0.01976270669646911 --0.009685266451037178 -0.01117016334713575 -0.001370637539317458 --0.04889195949797102 --0.01341348733981518 -0.008660924479100355 --0.006907570434681563 --0.05135507268549426 --0.006779827440184504 -0.0008447743350087972 -0.0003712496640469208 --0.0003096522608354965 -0.01243270782590681 -0.01482536527371885 -0.02348964808340296 -0.005496272695601416 --0.0114049635202043 -0.01840097419337173 -0.006809039144939852 --0.0007937697922170802 -0.01955452591090784 --0.01876173414526512 -0.02710441349839449 --0.01917579124605914 -0.008709505240794215 --0.03285763002067066 -0.008752924583107179 --0.009943121157616173 -0.0154154708695081 --0.004067415511192045 --0.01662204632073492 -0.007713758102871562 --0.02813539294527369 --0.007732075358615671 --0.009669999234922904 -0.003334904816078541 -0.006649279863757005 -0.01590848455206553 -0.01644953937864884 --0.00417288889175121 --0.002969069162019801 -0.002023646471877018 -0.007582525973530554 --0.007257317751561066 -0.008973606018068452 -0.01348266443738668 -0.02305226829085775 -0.01666651816093993 -0.005733506299952687 -0.02159362222256716 -0.02116430086849335 -0.02082676800221974 -0.01677199759208592 --0.003676742192525374 -0.0133594044288274 --0.006966099325457501 --0.01086624358270451 --0.01750917256243403 -0.003772616822794145 -0.01445849007631814 -0.01008133637260704 --0.005569721859292821 -0.008549287445647404 -0.02281150273017499 --0.001416827691289391 --0.0150205897591585 -0.009187222437584734 -0.01236589259930272 -0.001630923087993068 --0.008152626816441411 -0.00252836853997357 --0.03222698840858164 --0.004781627978895055 --0.01246961236144454 -0.007181335714098646 -0.002028017078066252 --0.000931230815235418 --0.004526607560690233 --0.004996772754726691 --0.01279344662708906 -0.009563394319059554 -0.02632692231095652 --0.01028169608130237 -0.01228557262409598 --0.006016241022531138 --0.005900086155128476 -0.004077276432807645 --0.003347002165918217 --0.007247045554594302 --0.01728603566366559 --0.007859542706558123 -0.008038538291788092 --0.002645537288856739 -5.631471902796162e-05 -0.02961785542620612 --0.007282038702990256 -0.009838163956636713 --0.0004541778055609444 --0.018851081439879 --0.007379994526259774 -0.00434105343449142 --0.0261505533204982 --0.01014342479063603 --0.013312296220665 -0.02535063133725272 --0.008604934926263814 -0.004249021610531419 -0.03842678837784637 -0.01633589451662827 -0.02331866322491402 --0.01513965575352876 --0.0006038281169199562 -0.01997283243793812 --0.03909843620979078 --0.03575608791174428 --0.007581068743560676 -0.0134128227318906 --0.01239968046154283 --0.01261845051053615 -0.03049350370135833 --0.007226058769735589 --0.03017111294837905 -0.003929833351086065 -0.02064870228684692 -0.01667975044362359 --0.005677985457621018 -0.01203943098849442 --0.006498714389489587 --0.008899659825761869 --0.03245627621117831 -0.002287396896372641 --0.01747357375295201 --0.005263202761156655 --0.002032089753542359 --0.02790047025537045 -0.01059833973557582 -0.01282794753260645 --0.004232534790009283 -0.0009357382639767375 --0.03576428937547799 --0.00083780002237321 -0.004978412793922927 -0.01258086485260979 --0.02632671321711023 -0.002238249644627302 -0.009422383756532184 --0.03012115733417143 -0.02005045674263074 --0.02794243219462856 -0.007944221635426341 -0.04687920120304011 -0.01769916917983936 -0.01151348530964719 -0.004522737294149561 --0.02845976288069472 -0.0294433539432547 --0.0005098037823555218 -0.01334523409664714 --0.02082987004203069 --0.01235735391652316 --0.03065008463184635 -0.01668406439250987 -0.001161124906661016 --0.006925260487972621 -0.004003793255223161 -0.006773856522758004 -0.006686522031750745 --0.002928694763745174 --0.03515073814306881 -0.02232431930467069 --0.01810688808631062 -0.01101243690552882 -0.006149247655117805 -0.006563621638512924 -0.002742267343763326 --0.002934133638926621 -0.006218681022348667 --0.01299984475141811 --0.009197691693698922 --0.01707199630179087 -0.0003562807362521091 -0.00942523639502338 -0.00534091608854026 --0.003072840871387063 -0.005975332047230792 -0.003410723032808418 --0.004858688991411602 -0.008732599815901198 --0.006263204051530742 -0.01976643804093755 --0.004093722954093836 --0.02379095106889214 --0.009397992598695693 -0.01512927032265278 --0.01912868562825778 -0.0022241066256846 --0.02146993368663007 --0.0160001202402291 -0.008397155968020901 --0.002147075557762623 -0.0286505559232231 --0.006493694282837291 --0.00555377503013729 --0.03541788382866402 --0.00971100561622322 -0.0106989109655744 -0.003088634382949682 -0.01342848260381314 -0.01738877692590295 --0.0004090858687376276 -0.01080847082271384 -0.02624079697899995 --0.01006288148859815 --0.001314596113568419 -0.002427969831360457 -0.01759485420134566 -0.00335404119796532 --0.0006235249319439905 -0.01576131784070835 -0.01024487503860761 --0.01323614327231774 -0.01347837951322855 --0.008469883535618087 -0.0234011066639342 -0.0245682063330038 --0.004141336085151608 -0.007272684160143496 --0.003025842895219159 -0.01215033041189319 -0.009843505777525033 -0.003392946616741205 --0.0008123250732940032 -0.005735360954720714 --0.02214883681806597 -0.01152493861337968 -0.005657061300064797 -0.0214609622218948 -0.02191934252453447 --0.01181257471694092 --0.02185621762028104 --0.005980574085987012 --0.001250567183323252 --0.01352735566891516 --0.01324867112556336 --0.007003203601505609 -0.04801858977417573 --0.007267288733386458 --0.01192539853984659 -0.01126640007483809 -0.003475505967396253 --0.02071941632647525 --0.0005726632876119915 --0.02124756696850617 -0.009165408753565928 --0.01596268692181218 --0.005410651845821386 -0.03260214469903287 -0.01180725287964493 --0.0172347020873347 --0.02303862219300323 -0.007084016282822183 -0.009034675608347871 -0.02071271363538896 --0.009337486351723779 --0.002787646393651261 --0.007540650869381602 -0.001861903136428323 -0.008547987580841467 --0.006074118708461347 -0.004796385069469019 -0.036937313265285 --0.007751895864964408 --0.01396263115572428 --0.0116446885137104 -0.004208290438050806 --0.003508682408418307 --0.009086530015347198 -0.007974846659870112 -0.01356496285280434 -0.003528156114944038 --0.01425837980743495 --0.02639577591607816 --0.01147037072516825 -0.008995922258968891 --0.005476601754817025 -0.00701839078881001 --0.01197198408600142 -0.03583687709242954 --0.004955060770351197 --0.003485903071434123 --0.005966957995723868 --0.00239923975529647 --0.02938000831829269 -0.01724271618774327 -0.007881455062929622 --0.002376120846033699 --0.02201814907702552 --0.01516904893084525 -0.001345944917531528 --0.01581797713778328 -0.02007238195262654 --0.02152801725351697 --0.003499986557644242 -0.002047534776443752 --0.01703357104186727 --0.001862878534423749 --0.0006485279936755399 -0.0157709023893298 --0.02129401144776363 --7.010851507036585e-05 -0.009858520788405455 -0.002171557588504047 -0.01372494912786795 --0.01131839846222653 --0.002573610063288637 -0.004621947287358354 -0.004480559250421569 -0.008404828524134737 --0.005233266333193224 -0.01015328308671275 --0.02031568488211503 -0.01613797689501945 -0.01087596118627642 -0.001261085050951664 --0.004333542983386869 --0.001655390807485342 --0.02829844289512774 --0.02780752210416536 --0.01149875156822533 -0.009339410166709276 --0.004295733687255748 --0.001730982684304364 --0.0005518559355724889 --0.0006390047475683922 --0.006176003354337999 -0.001652431106243968 -0.009975680609600053 --0.02797716032439142 --0.02796078978464962 -0.01293655799452167 -0.009462661686558181 -0.001720462174180446 -0.00538565413143505 --0.03982530334741089 --0.0230692181112452 --0.02775607670932757 --0.01867121876882981 -0.006124590369981068 --0.0115123357938056 --0.003292444294299765 --0.04226045495291274 --0.04357481183566375 -0.02271855345833626 --0.00368781224889539 --0.02605808767965867 --0.0207169378045665 --0.01040852873734821 -0.002794919733733894 --0.01272359864278693 --0.01715868726144977 --0.0226767421324506 --0.01394224301366201 --0.03070126894664099 --0.02572050885216095 --0.002008218893069433 --0.004650434812568835 --0.019088732402207 --0.01858335187924904 -0.01147255610535523 -0.02778753112518982 -0.01532477851527851 -0.02947139164742346 -0.0006836624135609246 -0.02169082365026173 -0.007002409027876449 --0.02969237553403356 --0.004677918740103713 --0.03079164869444623 -0.005921347620402013 -0.01535746133649107 -0.02607031986513803 -0.0009086930421562581 --0.01339850803104417 -0.001333154985832634 -4.523194894381093e-05 --0.003980997228864974 -0.007425556043199984 --0.01715065179755247 --0.009572315371755902 -0.01939270169416925 --0.006169037978444524 -0.002081107773190845 -0.02022986357913467 --0.0296829271989799 --0.01265391942337506 -0.002324521523848259 --0.007833253639127683 --0.01065668480385865 --0.007275690202289063 --0.03829107984712437 --0.001788424778669945 -0.01740012438947449 --0.02592312340793472 --0.005363133735930519 -0.008910702131600497 -0.0268562611766555 -0.004729785523850201 --0.02132410325556956 -0.003506265723436317 --0.004407086210031078 --0.002573578308815765 -0.01639184693073134 --0.02121863359582059 -0.03216023953632659 --0.01966522325521147 --0.008384905317312516 -0.007667680613204453 -0.006738477741570359 -0.02110768068472282 -0.01416261736187769 --0.002783537962769521 -0.01350074401990306 --0.01238523711923566 --0.00836844333622231 --0.0001439869586081977 --0.01188376881311585 -0.01436928598772737 -0.003701623706305913 --0.0001570848046949758 --0.01819086682984613 -0.003876201604736075 --0.02628373458248906 -0.01532336312094816 --0.002162087531885517 --0.003681890038165341 --0.02158096626403967 --0.008089374395960726 -0.01871300583129416 -0.00171283457770458 -0.01472417724262834 --0.006057375363335498 -0.01507825626174323 --0.0285148420666857 --0.002616051751570292 -0.03035710440489543 -0.01991031087699378 -0.006601068137507364 -0.006701670317906971 -0.006709548563959679 --0.05549094661077508 -0.00846851981125136 --0.001310453723899519 -0.0093500523387716 -0.04403808006011854 -0.03026800906903658 -0.00354281819681558 -0.02073404252501665 -0.01229763550669773 -0.003145973733199772 --0.006398797982830115 -0.002429985726766671 -0.006928799740781565 -0.02301236623245129 --0.01999113303747349 -0.02447745467679782 -0.02432190174233914 -0.0006711037648610372 -0.0002625404952274126 -0.01644475727207527 -0.01381828021931978 --0.006573518789668793 --0.001156057956530029 --0.02613865519501167 -0.02361864292094831 -0.001538780202158474 --0.02099547098618208 -0.01180313448441145 -0.02124335357395203 -0.01069889427636538 --0.03134015728741897 -0.01124294269619394 -0.00526642481142349 -0.0009161121776108466 --0.01239099961886825 -0.0324917634048297 -0.004414170836474048 --0.02711929184169255 --0.0025413945878939 --0.009420688242076257 --0.01465679300880252 -0.002994704691665026 -0.01303448781022158 --0.008555980553149619 -0.03758171739578691 -0.01369238095434317 -0.03102939622740859 --0.02139151757909076 -0.01996929817050929 -0.006100769717538641 --0.01907228158195173 --0.004594001391666422 --0.01526195415261814 -6.404736927731995e-05 -0.007539458885203562 --0.003758019633489102 --0.0129643274399308 --0.0317117822959932 -0.01430740619264509 -0.01694960457866395 --0.006149883023033967 --0.006285779399859284 -0.01300576890431393 --0.001299501562367687 --0.00831666041744163 --0.03836364702808483 --0.0002810624539674921 -0.03089618347328809 -0.00912228057212104 -0.01045996785905204 -0.002976913329276479 --0.003029722063112611 -0.01883520752600152 -0.0001731651016554949 -0.00180922617372372 --0.007927668771411461 --0.01684815360985548 --0.009873785166053379 --0.03507024190894514 --0.01450769214639179 --0.001469580626923542 --0.004086785500224501 -0.009139807157084722 --0.003662090009687014 -0.01857811816775874 --0.05140866349543106 --0.00891822066339383 -0.009491031535568597 --0.005602467195330627 --0.0006649467196109227 --8.829420871987943e-05 -0.001953480971514434 -0.01842889028580623 -0.01055343116676067 -0.03310209584083586 -0.01316171414399559 -0.0006077435305859221 -0.0074151944193758 --0.01070742607748996 -0.04039232784923243 -0.04351448887082744 -0.01554534296542149 --0.002623518226829833 --0.01291027010589196 -0.02102299872240167 -0.003941404585195916 -0.01065377304111713 --0.002679951433724593 --0.002197120987992306 -0.0004573578802347325 --0.04940684894304934 --0.01257021805803177 --0.004517346375347697 -0.02969722084853378 -0.01062016921093883 --0.01679256910348336 -0.002506829076528198 --0.02793814340952067 -0.008155525009253166 --0.002999720645911231 --0.004507767137964646 --0.008437836693765133 -0.01350066833075329 -0.0009649577049395295 -0.004327225998526005 --0.03894084336514281 -0.03011161810407604 --0.005866895842685601 --0.02962246632526599 --0.01681067036895511 --0.005837722828876494 -0.001806177792797483 -0.02239029701557491 -0.01567381931622651 -0.0161521231265102 -0.005420737670518611 -0.01430669777088642 -0.0001729845813669936 --0.006186082840723674 --0.001544224393595961 --0.01365928360859867 -0.01762679295692892 --0.01484575650890804 -0.02162990788304155 --0.001737349184228652 --0.008102272154194312 -0.01417889801209776 --0.005233640200117716 --0.009979657204175272 -0.001468139079119028 -0.006999075277513353 --0.01070522731509243 -0.01776039103677174 --0.002902028748232633 -0.002690456055858746 --0.005389317738992409 --0.001106849239182241 --0.002280029606724315 --0.0041617960039521 --0.00271124534171149 --0.002274521205382005 --0.001199122806967336 --0.003381413462688697 --0.002069650024562991 --0.02045259891373353 --0.004691797159059354 --0.01208489563332363 -0.02952037745740787 -0.01863784978971038 --0.0008226739496181532 -0.0281790813136313 -0.01162368607078986 -0.01057071758529727 --0.01141344276211927 -0.02485476244422052 -0.04142366328399727 --0.01840940780746401 -0.007121708177212917 -0.01465907309643502 -0.001962801902275464 -0.01929200351396545 --0.01321126031683692 -0.03201451917980419 --0.004374109292591829 -0.01376705380192939 --0.01392858697730624 -0.02131172669155383 -0.01361921131946055 --0.0007643569560068617 --0.008327947797862457 -0.02493391528440564 --0.009019696476093448 --0.01193179010597067 --0.009205421559163807 --0.008873285819021167 -0.004123744671031855 -0.0151181856841698 --0.0257053897731305 -0.006408013558245591 -0.006055883490354082 -0.003394584847893433 -0.01048456725519591 --0.002901671959328282 -0.0177621789019933 --0.005446654006457716 -0.008041762327690672 --0.02755412078947034 -0.009040012365746202 -0.02055796391038681 --0.01432827622475474 --0.006200933929039688 --0.02595742122388938 -0.01195999481460946 -0.02078173343079194 --0.02717770899474817 -0.0121787165001857 --0.009760118634534383 --0.01693100754318937 --0.0191939037441852 -0.002165468622998385 -0.007951599055087752 -0.01197262017914202 -0.02903205101763904 -0.00510062691965784 -0.03955743110441621 --0.03802025548196284 -0.02341398939831249 --0.005006010168649757 --0.002596679966943346 -0.02580468541147135 --0.005439449416049159 -0.00242446870376706 --0.003027646069901763 --0.003832517352031034 --0.02071579284597203 -0.02475879872796235 --0.0001523846707009587 --0.01717768808928039 -0.0006527949369039565 -0.002764497900412905 -0.04319860048639278 --0.0003247530481430921 --0.004274968872881717 --0.0161417394934138 -0.01056885762068592 --0.002245443626152647 -0.02357311089687288 -0.01071281725118027 --0.01525025907003227 -0.02029888360897047 -0.001712451028408189 -0.001235863575940385 -0.009623287200969353 --0.01190860524543881 --0.0002081385004862613 --0.004649755010509842 -0.02583741107595951 --0.006368630882671673 --0.01131622678774626 --0.01795852580958378 --0.008555671738609756 --0.02929152273554363 --0.000709073223111646 -0.01646982830366987 -0.009003043399942207 --0.01250697082509796 --0.0143529999347808 -0.005058496428771095 -0.009903129116738418 --0.01387846206508754 -0.01033258842816794 --0.02241265100679323 --0.01049867657282457 --0.03472017120212344 -0.01140018640965411 -0.006353605583671204 -0.007044850867990847 --0.03334084130889706 --0.01815536714042336 --0.006597813653152828 --0.002944114564730585 --0.003151301080311825 --0.0175489836338453 --0.00292277431506027 --0.01269710773226217 --0.01817665904715475 -0.02891782468591024 -0.02363387193656099 --0.002334970686911417 -0.005075179967321053 --0.004293165263022737 -0.01338752888530809 --0.008189891283890605 --0.02888004328256247 -0.005976171943321576 --0.03054265593550055 -0.00409360046619447 -0.00700114704571883 -0.02040441627155631 --0.007255636770380255 -0.02471835355939004 --0.005770396316131142 --0.009481817421261014 --0.02537280598799997 --0.01616607340333887 --0.01166795471462989 --0.01030717362160463 --0.01020563128249139 -0.01204355715910457 -0.00107665412671168 --0.01105781610676676 --0.009829295582062216 --0.02579946510737528 --0.008813465255136351 -0.02744419233645272 -0.003154862143748513 -0.0306946417970386 --0.01923205519884618 --0.003731101453174634 -0.007690085875117345 --0.01173853945392214 --0.01176456165208266 -0.007089938013491295 --0.006279176213257642 -0.01609908110490792 --0.007092850346320017 -0.01161369460883531 --0.009005255962950274 --0.009507957296795625 -0.001804834127237599 --0.01478675733682424 --0.006397884495899582 --0.005405912722089408 -0.002908319350599251 --0.01114769937290839 --0.01938655970709622 -0.009852432980088754 --0.005054752670656166 -0.007016266710337288 --0.006703192086792324 -0.01871598645536511 -0.01492743141270434 --0.03745023957899398 --0.008485888380715899 --0.004038292982514992 --0.01198558971160972 --0.02375426326151019 --0.002903958307600663 -0.01133342954193736 --0.01683629094173543 --0.01223375044813632 --0.01739028583023601 -0.01037516025504453 --0.003107623465307698 -0.0002396046402810485 --0.02212865045010241 --0.004616187445806634 --0.02056273883767412 --0.01937964917474239 --0.01201589248589414 -0.008737530601627855 --0.01635095025773224 --0.003659369417039584 --0.01000491752906364 --0.01853179667369896 -0.01416680610967523 --0.00482065920008995 -0.004412067732489665 -0.005343602111942241 --0.004125881152733555 --0.01227852810373454 --0.01698938299633002 -0.0101925541242239 -0.007192541702351497 -0.003587128149857245 -0.004254068054333355 -0.02453396486988631 -0.003291990695359868 --0.01150989698542116 --0.007292218301004141 -0.006416084845809224 -0.004905692259397617 -0.005119232514710382 -0.003315245338359686 -0.007894083650786334 --0.003514580909909075 -0.008650577108185733 -0.005749784513878927 --0.004622953060973246 -0.01551305274611372 --0.00561225874185945 -0.01675692317561542 --0.00150730086269186 -0.0005249396477207156 --0.009371017454593199 -0.006201585857213717 --0.008948643361457943 --0.009399518899456512 -0.01320490256867866 -0.01140117538422683 --0.0217766939943769 -0.005361646334480745 -0.02220180489809371 -0.008501566039285405 -0.006953726085302888 -0.008704864669685175 --0.006441751769055778 --0.01381542177323393 --0.005944034331242305 --0.01218475348220682 -0.003922472310812214 --0.00602366271385793 --0.00213509669400386 --0.001033230842284614 -0.04120285166456205 --0.002188274964100489 --0.03098711780986696 --0.01049611767253735 -0.01124084396989755 --0.01111288004194584 --0.01014125674386878 -0.01778457458737278 -0.003281076382235856 -0.005805308666886533 --0.01742328898907234 -0.0009151763155163628 -0.01070431167493432 --0.01131637486733626 --0.006217205649200906 -0.000762881670301675 -0.01416269392415296 --0.009981211427369267 --0.01444880317085256 --0.01272026328173392 --0.02540926451129358 --0.0110940278000756 -0.01028752658432173 -0.01645240640409192 -0.02051555290889537 -0.00587454949398061 --0.003231574489585367 --0.03068125216141897 -0.01120657639672886 -0.008330686250254356 --0.009493475668337539 --0.01368142099305075 --0.01400800180051238 -0.01697814095090428 --0.01229647990724075 -0.01934460456855885 -0.001551965902217507 --0.009596304714087308 --0.01050409980890137 --0.01452079698399888 -0.01115842872397633 -0.01563242221630854 -0.007327640732617182 -0.00579267812636958 -0.001650448967289342 -0.009306936907413816 -0.007439166366189124 --0.01018925317033319 -0.00969163873057791 --0.01175712955749901 --0.007964891666828764 --0.006324480316764834 --0.002676458230663862 --0.01830728065071105 -0.0004147968191985238 --0.004296348619290511 --0.008092367990933611 --0.022057375682272 --0.0124513396451638 -0.0176467554648664 -0.01998212856241889 --0.01206368036008044 -0.001550206286200484 -0.007952215907272707 -0.003401219608152595 --0.01441786192752461 -0.02077645225973389 --0.03084454894573975 -0.0363784128960089 -0.02018201823968818 -0.01364249708937373 -0.00617436671021727 --0.002228029271523364 --0.03883719842159266 -0.01478723499798924 --0.01409578532458002 -0.009263019766992578 --0.02527446887922895 -0.001067351001919242 --0.003147110161307728 -0.0218906940497717 --0.03186871323025285 -0.00206437100102532 --0.0101308930174031 -0.0253970890795401 --0.02081592442687558 --0.0004463766730193994 --0.01028576381632534 -0.002770651697642995 --0.01424911978077413 --0.008075192534093672 --0.00623227572630522 --0.01474215367962464 --0.005506982026165508 --0.01674136864098932 -0.01020920282104112 --0.008875559230809248 --0.007370471144570866 -0.01253139647828153 -0.002695980001513695 --0.0006757054769491802 -0.004964574977104456 --0.01408464144738169 -4.952085915429198e-05 --0.009006168754575231 --0.002701926070958608 --0.03652006717782153 -0.00352810499820388 --0.03176354969543605 --0.006602237593244335 --0.008497205511120157 --0.02910878561275678 --0.005132216800594898 --0.01483773259105742 -0.001634658535703456 -0.00443287022948464 -0.01489563581253084 --0.003697830224897859 --0.01795687552894865 -0.01100850139522968 -0.01926404402098334 -0.01338736178114266 --0.001893031564882353 --0.003607267989229011 --0.009045911040115959 -0.0001338731318121252 --0.0005282693316574292 -0.001023639970315132 -0.01392749750217165 -0.01033420203439172 -0.005417066174624127 -0.0007537700818801907 -0.004432917114122274 --0.005334675763975693 --0.01565126773071325 --0.02731810662705819 -0.00291330405708042 -0.02421584151004475 --0.005249598455701276 --0.01226079962354958 -0.008546356057942133 --0.009246286439145173 --0.004109753261901383 --0.0024803664445579 --0.01844028177342592 --0.02206502169415311 --0.01501350500373293 --0.008289712471360374 -0.02888004605814559 --0.02365459934338274 -0.02534272022897188 -0.02061740623520734 -0.007184995361966554 --0.01013127271287851 --0.02192806487418398 -0.01498574666009387 --0.01514422347708445 -0.002152694875570596 --0.0005536661968098869 --0.02192714860170471 -0.01126830570741071 --0.01171952232304339 -0.009814866225505788 --0.001308025889672113 -0.02976895882458623 --0.02468174944609434 --0.0163768052140718 -0.00219581912761048 -0.02975168356536123 -0.02554194968222021 --0.004868350948090618 -0.004045195907803895 -0.01043056049576934 --0.000926534232991738 -0.004354649707789 -0.001226545839203135 -0.01560319963806674 -0.001905144611664917 --0.006326733869281486 -0.006569985477555002 -0.01157829013057778 -0.01957635534935027 --0.003321375280374849 -0.008416917690371759 --0.01003682501591048 --0.02776346586252576 --0.01152164576963457 --0.009744685466522959 -0.002071920485550573 --0.002979984701550322 --0.005479651753659253 --0.01364616209480277 -0.005988516978321402 --0.01538178215270365 -0.01758859567402726 --0.007290021252996312 --0.0106408859464586 -0.006720670206756741 -0.01431381187586868 -0.008090427295510915 -0.002720632529275261 --0.008710135586119407 -0.01636155692790863 -0.02049819960779948 -0.006206926831703229 --0.01508181677025995 -0.02031963757079138 --0.00648705750455322 --0.001608184223053293 -0.003556902509380737 --0.0082359740514686 -0.00212032554698236 -0.01652830061843327 --0.0002580693519802035 --0.0004127808258744676 -0.01613807090585076 --0.0246365408723604 -0.002117096890481068 --0.01063398364787518 -0.01759033003990421 --0.01240796838544492 -0.008783998410565892 --0.005568712386780652 -0.02764591914784978 --0.01108040913587555 -0.006797108011556103 --0.001327266061640904 -0.01205256256503283 -0.0139698404902154 -0.01516507736363942 --0.006898339858677156 -0.0212124928519463 --0.000148445693949533 --0.001895614077862456 -0.007632918467954866 --0.0004807396485253313 -0.0103066822307925 -0.003916236093863992 --0.009276712758796796 -0.00720428133283992 -0.0003994497827654003 --0.01667932575420029 -0.005148294190774466 --0.01406916688560416 --0.007982165164102308 --0.004712129143940626 --0.001515507636347674 -0.004540869651158856 -0.02274961027329835 -0.002268591438389791 --0.01079677399552864 --0.005959473329797133 -0.01319665351608812 --0.003658540995209761 --0.005158410699229659 --0.0008904921892527685 -0.01120458585360172 -0.009013221956260373 -0.002634157991232431 -0.0127132795061336 -0.004015897215154004 --0.001097823140122553 -0.009089997065789109 --0.005479713173394797 --0.01370333603244495 --0.006419213778338505 --0.006295421397127702 -0.00385608027579798 --0.01424023232732749 -0.0003384557981018936 --0.01006837056229665 --0.008717334337668801 -0.002936647617698697 -0.001239956879051464 --0.01746058354760543 --0.01871604598745298 --0.007320731364577541 -0.007779248230564683 --0.002467991322152353 --0.008672840961702172 -0.009631914420758723 -0.004896726634768131 --0.01576727848157885 --0.003573193571067725 --0.04192338953196287 --0.01527484967141659 -0.002072438515832884 -0.003424345102880843 -0.001994470215648774 -0.01466509385798341 --0.004415147584698019 --0.003145019022950545 -0.002095842840257558 -0.0133516797018277 -0.001493321829783797 -0.003069021280439011 -0.01714674764595616 -0.01353612282972051 --0.005082987813406201 --0.02672833333266446 --0.008488830239328666 -0.005640163781592418 -0.004109339175898203 -0.002481593591264732 -0.007660783872947382 --0.02313767904605801 --0.008787522946113163 -0.001583216750771078 --0.02147026299250385 --0.007796454695051316 --0.02037903170225247 --0.005047036581321928 --0.006620068624280741 --0.01242103455226167 -0.008387904339089664 -0.03371612071087385 -0.01176073241466659 -0.003636957329555025 --0.01972449564460617 --0.00322155193966083 --0.009339150369214354 -0.0005341079914695783 -0.005236183094495943 -0.005967084342315498 --0.002666981256748724 -0.01532543628646216 -0.010622684972155 --0.02175488913460132 -0.0009980325881792627 -0.005448783860387378 --0.01742789152095397 -0.04272284150790095 -0.005218702343931118 -0.01031743025332115 -0.0004207338245167571 -0.005953948859266752 -0.01116728542750235 --0.007658999606593614 --0.03799733527470842 --0.004217275268056678 --0.02327466359328111 --0.003540099153485855 --0.01355939863373944 --0.006081097456144878 --0.008182434877587901 -0.00288968943246931 --0.02879825862531147 -0.007406723912783224 --0.01397462016687308 --0.0198093785865078 -0.01383852211211272 -0.005208812763819525 -0.008619897198737169 -0.03086765013553723 --0.0006232975408835002 --0.009190095984739119 --0.001703885701654368 -0.007822859004768382 --0.01414732369915412 --0.007004530865133945 -0.01297750573911034 -0.002383433752758294 -0.01882511659263764 -0.02113437827974255 -0.001837737216603922 -0.03802063019626707 --0.004471401036174494 --0.02922211819090566 -0.001729673334381762 --0.006929139894733195 --0.004668969225044269 -0.0001286217071147185 --0.0002413885637529605 --0.003331467266312645 -0.004202900577206381 --0.02981849785327401 -0.008493475006931095 --0.01231716500203459 -0.0196896631964582 -0.002970411328770317 -0.01795934445082835 -0.01214209041458672 --0.01276243229978402 --0.0159461186524732 -0.007208098642611125 --0.03674408387931711 --0.003145622692913188 --0.01275860825691216 -0.008936599217483573 --0.0180704963035322 --0.02086921240022233 -0.002973712992810211 -0.01467132235696355 --0.02380029879126567 -0.00513482249627457 --0.02399715324972109 -0.01534672681064307 --0.003128532518688702 --0.007377468881844495 --0.0127545989370851 -0.005256913081541892 --0.004244761040882263 --0.004747073092273296 -0.01203554954684451 -0.006623621211960685 --0.04140957275759271 --0.0145725606288553 --0.01635225794479575 -0.01460143669157566 -0.009470800817233819 --0.02248572013527337 --0.02397058337937009 --0.0008486383658285704 -0.001024265886541806 -0.002143125438135531 --0.01062443874511401 --0.0009706296318111102 -0.004753473842249189 --0.0008420742830849998 --0.02022177760170625 --0.01931781893121642 --0.006529523714771198 --0.001884601106828844 --0.01943815748134014 -0.02886135044550207 --0.003305818953922596 --0.00272159641495474 --0.008870605242995076 -0.01362004369209151 -0.02043286039385004 --0.002175003186002545 --0.006954305622970268 --0.0005679007296741869 -0.00139035663462976 -0.005714924887406452 -0.001938363463719039 --0.0002415994783482895 -0.007000962088272354 -0.01123427563609114 -0.00287038214416711 --0.01798911025299659 -0.00370779670711086 --0.01084514546214282 --0.004012904287551436 -0.02873892138717055 -0.007137954627948636 --0.003694132171820698 --0.03835879195254621 --0.005902904884355925 -0.01205224112261373 --0.00974347336546042 -0.01001140888025003 -0.01432189225272032 --0.0139857508016259 -0.002438549314738905 -0.02809062366245842 -0.01235049867233432 -0.009663211759146596 -0.001410053869368667 -0.009973731580857582 -0.008742875282310604 -0.02041620113860615 --0.01064458245250706 -0.001514612074283982 -0.001365081156937389 -0.005933235598170941 --0.002691460823918697 -0.0370619414005915 --0.003854611648674897 --0.01553803661692626 -0.0007097145667704935 -0.004063903429233576 -0.02584296083319736 --0.004871916226947137 -0.01469468647295661 --0.0009054671156323789 -0.005135410684124127 --0.0113408257339239 --0.003796779329546391 -0.02440607481538242 -0.03401237765906474 -0.0103505746824679 -0.004260912786952374 -0.00577738114309503 --0.008000564086254861 --0.008689354599246638 --0.001704719191427385 -0.002522142952408068 -0.01252347922889033 -0.01037006316462945 --0.008133283249006716 -0.01849096318793482 -0.01279757683894496 --0.0002704084718143122 --0.01120057811504341 --0.01787424597016895 -0.01869016827400498 -0.01234369184454925 --0.002879643894441999 --0.01258183881033513 --0.002365820803913704 -0.002509934654712851 -0.001436991636734602 --0.02568024372509321 -0.00589528438123128 -0.01545851285764664 --0.00523896531027182 --0.03234347961174518 -0.007674725815789449 --0.009964328240874839 -0.007004386712421436 --0.01286763108334246 -0.0077721110681705 --0.004028054843890901 --0.002454777369002381 -0.009104579021518823 --0.01082336420502796 --0.0002165925288060717 --0.01844996898769538 --0.02792388284432292 --0.002975364873161067 --0.01037771469820443 -0.03359172335176542 --0.001311061554399674 -0.001743989928088901 --0.006306447835338422 --0.007961936756730558 -0.002065816620806474 -0.002647816307532489 --0.00276987187710808 --0.03278090589524882 --0.01490168637781322 --0.001515279675662869 --0.003049714925619508 --0.009284162714922474 -0.002878156169090059 -0.02084557097978784 -0.0003488664199960495 -0.0001635344928407963 --0.002844925069185727 -0.002069375412611047 --0.002465447579877238 --0.0002159478585181012 --0.007040160067226106 -0.01506935382181274 --0.02219040644910598 --0.004310026809657723 --0.001781670803243691 -0.003122239159630988 --0.005315849392103794 --0.006532599706156394 --0.001715932076123513 --0.002989108350223651 -0.02248270114473491 --0.006556181485085518 -0.004763862999797742 -0.02302189563103465 --0.02257955159853654 -0.004689299277101635 -0.0131821245905344 -0.003571745364035062 -0.01670166832866063 --0.0130637249930093 --0.008195505799059753 --0.007353624524849719 --0.01435806091140487 --0.005137334649216573 -0.02349783639382982 --0.006275925886844006 -0.01680073955923369 -0.008266812278978172 -0.004092963157955242 --0.01694838256425278 -0.03141164399251254 -0.01142088548669545 --0.01487351305868063 -0.001363795308014595 -0.005752476200893491 --0.01690086877198604 -0.003812042980651998 --0.01804164739435991 --0.02732359173193372 --0.0009347281646605042 -0.003713435883542896 -0.005064994812885675 --0.002106702516469806 --0.003349116128602967 -0.009566176231663646 --0.03354338041739061 --0.02090600631328638 -0.02128738093029466 --0.02786290924500091 -0.02347670925869287 -0.0176447655789603 -0.0005314377759297441 -0.003637824055762669 -0.00144811417567656 -0.02332289574525618 --0.001327568899650631 -0.0185338738268935 -0.01684697013622281 -0.00179317680439744 --0.01136914971660081 --0.008941787952923025 --0.01458483182229048 --0.0004492068077432991 -0.001495671322347571 --0.02059100197001574 -0.001083798881128133 --0.006659824070091525 -0.02488264676319637 --0.02021072513467227 --0.01989333499382044 --0.004279845029171672 --0.006086477171847801 -0.03113427268186063 -0.001172737935519379 -0.01874969540991632 -0.009014176341452485 -0.02444500859854239 --0.008700978998431089 --0.0115791402119921 --0.0108153142388073 --0.01912867813771941 -0.01018198323463959 --0.03393925330587132 --0.003924252520350437 --0.01034969958728167 -0.0009883829025064195 --0.01326250910790931 --0.005621689071267985 --0.009609510787098128 -0.007688016945296672 -0.004227157306997512 --0.02859549725762468 -0.01489192943066743 -0.02355293658754843 --0.02291419801680763 -0.01822321108532695 --0.01312570986555983 -0.01604906574506344 --0.0004595975531300679 -0.01899561490727598 --0.01462427248272488 -0.001494705441768974 -0.001905964683011385 --0.009799758105250618 -0.005547047278518652 -0.01382905931200318 -0.02092439496440041 -0.01936652207303139 -0.01550535030417885 -0.02674497747620155 -0.00576166763560173 --0.0115034015636466 --0.02112856402008893 --0.01482567728319138 -0.017082803519069 --0.01142404230521733 -0.004433560589238124 --0.004890778947616842 --0.03614331575040152 --0.001341824267321704 -0.001255218186316121 -0.01671012672273671 -0.01692900460602901 --0.02414695624971315 --0.005743197880363125 --0.0002696316502599029 --0.00450614971519081 -0.01831264206976355 --0.02556687993608042 --0.01770022394029044 -0.001507431986734305 --0.0004980745879944637 -0.01960154327174893 --0.004138551468099692 -0.001042239161567194 -4.909094144944038e-05 -0.006556915547313093 -0.01921241259466239 -0.01932421529795036 -0.001468164299560825 -0.01202916679468099 -0.01704819829638292 --0.0005507839172033823 -0.00381351811638083 -0.01118379326287371 -0.0315133145196759 -0.006386302086683933 -0.01407903384944601 --0.01081601079837569 --0.02129290823586634 --0.03664599377734008 --0.02667161218242034 --0.0007517421980233682 --0.02463666520626446 --0.006943864820063279 -0.007209044052348279 -0.0137080993207799 -0.01607735220295615 -0.01113602924837804 --0.006511092572930015 -0.003684141681970768 -0.001743380799568595 --0.007586067955615654 --0.009986408993960114 --0.01669451928853517 --0.02106918531968227 --0.008536992795825487 --0.004945685919596834 --0.01298872537500636 --0.02125024060177954 --0.006973566227285793 --0.01316817465346896 -0.003647933498417234 --0.01837289002544488 -0.001039968298474646 -0.02537865064065581 --0.01151045353743365 --0.008962012292139632 -0.02208625915567814 -0.02077239022858686 --0.01247060708682805 --0.007375083252304015 -0.01019843303946165 --0.009851862438267881 -0.000642808463346124 -0.0144460312764683 -0.005277248176850801 -0.001361616457871382 -0.01316088269705898 --0.00381459564002949 -0.01049048807091046 --0.003074273827120378 --0.01430489427954377 -0.001467040414201362 --0.005113496867326737 -0.0162397340962202 -0.02002709024923622 -0.01218471109199535 -0.01178019565942995 --0.01113540820875802 --0.003424481377394616 --0.0003671240081975226 -0.0004908632740117324 -0.002877624349510244 --0.00110181654274505 -0.003298956723275216 --0.0005889029162330185 --0.005032345698888414 -0.00191515320573489 --0.003198517958139457 --0.005703876267251325 -0.004371478694364967 --0.001911275604830361 --0.004036154421795037 -0.00535327169693817 --0.003423116494465768 --0.003803607899312536 -0.001550362546360488 -0.001621908303313092 --0.01070987768074877 -0.01142652274928564 -0.00988786219077363 --0.004655446689595956 --0.004913188418852635 --0.0120095913903339 --0.01335663139272611 --0.006026432003878769 -0.01131679237011094 --0.000444383493994854 --0.006418259543750929 --0.000392158564922129 --0.0172140138063681 -0.02020775528350636 -0.01470010924276834 -0.02792409533037354 -0.01528434207411887 -0.001330581468934865 --0.003442839349525545 -0.002911491826165634 -0.01689270901883589 --0.02864694838057828 -0.005697941117327089 -0.01725514154576722 --0.01994990488241509 --0.00692013997374557 -0.00944546517676562 --0.01652806061813834 -0.001746650279986182 -0.01190569412286772 -0.0006285132602118713 --0.004245878672251296 -0.003667277194884902 -0.01761684798198465 --0.004465864511546299 --0.00669851043596429 --0.02088558533532674 --0.005031008291812541 --0.020970298515525 -0.004675627562426433 --0.004281105090615566 --0.02024099186944913 --0.0209487674779752 --0.0005513734193942432 -0.02015265789923825 -0.01676482523689774 --0.01275346754330884 -0.02048546144781847 -0.009429682315738862 --0.008250894424783745 -0.02804283978831124 --0.02148751097779556 -0.006426382591364275 --0.005522955270290042 --0.0183250066012254 -0.01607746427600082 -0.01151173227955941 --0.02331610971392275 -0.02250211941681595 --0.01336649583204941 -0.0183500590129814 -0.02698680465312318 -0.00382174566798805 --0.003662191859213944 -0.0002390458208330781 --0.01652459740186936 --0.009364726479549161 --0.004274920298827001 -0.02534583897073507 -0.003265016781284869 -0.02497132380111935 -0.01612273873124122 -0.01095409046093517 --0.007928166644929851 --0.007249353817561233 -0.01260615720364627 --0.004932984372119232 -0.006796271592265942 --0.01100581095961108 --0.03936977704377553 --0.03020498529667122 --0.001125999161456072 --0.01506575858722887 --0.006704391195025735 --0.005842307861477143 -0.02658941391225035 -0.01624030078807339 -0.0002577229843212264 --0.01141418194098192 --0.009459311024563842 -0.0228155209868675 --0.01682692921879555 --0.004669168446513064 -0.002147956248980857 --0.009373288602506503 --0.0050570706205701 --0.01512786462362374 -0.002538098226317524 -0.02562269625152127 --0.006477718818795826 -0.03428520743774296 -0.003682234075161793 -0.004235193288878322 -0.007700114136755443 --0.01644923922407888 --0.01423582528584173 --0.0005338099463999129 -0.00301266958065962 -0.02277146963689098 --0.01397974234436716 --0.01431385347125994 -0.006079978950344602 -0.006532303026167034 -0.01361059425985696 --0.01296307116666921 -0.004676795868620871 -0.007564666580555421 --0.0146067307154655 -0.01352904425861247 --0.01632135002848036 --0.01149095548675444 --0.02965070117058712 --0.006913962784835835 -0.007919949437343057 -0.0002065301072555029 --0.01079563904850628 -0.01050195489500416 --0.01610434409957887 --0.01310679571637003 --0.02108069010253484 -0.007440627853076287 --0.001895800912861425 -0.003244746412517607 --0.00527074935348076 -0.02070222376186465 -0.03157770167961717 -0.01257504255850474 --0.03833464348626855 --0.01179891071944893 -0.00834370985776867 --0.0102066009217699 --0.01836784677466994 --0.007333155314888711 --0.01033989419356877 --0.01415856161195885 -0.04020795633287545 -0.01173107530763114 -0.01257040458696674 --0.01474852413533504 --0.01857237816250542 --0.01238911148251291 --0.01976794537252115 --0.02818178317146671 --0.004388787103571509 --0.01452629266449142 --0.03236570441782424 --0.0003941767401386946 -0.001719612835356063 --0.005894317606336506 --0.003810321954825299 -0.0005270276250813289 -0.007071992595765255 -0.01268028427941534 -0.01647945268154318 --0.01234377636327738 --0.007301472587575512 -0.0134270428797852 -0.01099398280772294 -0.01337798898201532 -0.02261132292152309 --0.01981982720803956 -0.00911311948697417 --0.00488981582279104 --0.02459900028421479 --0.01162682738004301 -0.01905702570511612 -0.00132393551515848 --0.01852679635813066 --0.01313961727286282 -0.001058958596005187 -0.01654553861510839 --0.0105108483148453 --0.003746959666241295 --0.0009723873728074475 -0.01762208224252926 -0.02584669267620573 -0.01335083561993331 --0.01082044217718478 -0.006492036234966164 -0.0006907878440447861 --0.009804570661036108 -0.01120854162861286 --0.001606561870553593 -0.02216596997103981 --0.009884699179391988 --0.01528922406773776 -0.008842022145278718 -0.01692303892803204 -0.02759571569424609 -0.0101765816686874 --0.01265316152996029 --0.0171890437074447 -0.009033905980518489 --0.004304614271996819 --0.00826650627476927 -0.006071479791476846 --0.01711988614818794 --0.03087169602841707 -0.01960013349083772 -0.02114130624773448 --0.003698143477789018 -0.01665571582999152 -0.006419193632957428 --0.01276808744431999 --0.01603943710194419 -0.01643459129875542 -0.003922889186219592 --0.01584155772301158 -0.02311905166087386 -0.0109385645740239 --0.005376403351910834 -0.009109733875422256 --0.0157785477878026 --0.007099614080983856 -0.01439959140438157 --0.002298661294363577 -0.02349746992508149 -0.003629503400692517 --0.01003649050670035 --0.01468978140989673 --0.009341402997386252 -0.01539382858813153 -0.007056184629012798 --0.02424052449330511 --0.01013438349156317 --0.0161106189463998 --0.008968145175644135 --0.008580799396574867 -0.007217677857083818 --0.001505020796593376 --0.009252048759125216 -0.0006322572879615543 --0.01362873965177152 --0.0006068447899512543 --0.02218425759213502 --0.03344566508308651 --0.01639386391818827 -0.001962087768299741 --0.03526650794905884 --0.02716860274747279 --7.252873791058854e-06 --0.03158285291418419 --0.001623340225867352 --0.01104157106194296 --0.005447367061230906 --0.02012298785820303 --0.01959336549695371 -0.01271556607725358 -0.007037157897222413 --0.002965922500334253 -0.00590758125794512 --0.01364028263341433 --0.02010108074600562 -0.009709222349498857 --0.01423346720587442 -0.009726284718385703 --0.01668415456711171 -0.006154823850582735 --0.02534602587405186 -0.01703970992267478 --0.0002098898215236721 -0.01301996193443654 -0.006127027443035371 -0.01244127461428899 --0.01432956237763387 -0.01395119683912871 --0.01568464085482351 -0.0001176104337549285 --0.02421691145033501 -0.000133025446500447 --0.01005712408048885 --0.01507405279576225 --0.001758876654180785 -0.007298171501762781 --0.003581791363545802 --0.02358688361607457 --0.002590266760278887 --0.01509102745872397 -0.03346706732342922 --0.01504549584126259 --0.001771791367793212 -0.002671613222433593 --0.0003709529221979855 -0.008728933944250549 -0.01391004449228192 -0.0075713781798582 -0.001815263478631578 --0.019966802603665 --0.01691576503225619 --0.01226879362884551 -0.02739827640152937 -0.008247124735032448 -0.02076070437183108 -0.0005848872978915303 --0.009779930206619349 -0.005675720692849909 --0.01951816439149086 --0.01056015423804004 --0.02534574544743919 --0.01925142490658781 -0.009922212784173618 --0.007176253680480551 -0.005451578879695868 -0.009918531347128976 --0.0006720105208877023 -0.0105588960696659 -0.02066111321455487 -0.008288703841042079 -0.005288264043898389 --0.01387705399840257 --0.0009930899310949576 --0.01056924902037246 --0.01238941323100972 -0.001870119050933335 -0.02686588041397876 --0.002065211177161746 --0.01278292221842857 --0.005142943319099935 -0.02377251988108977 --0.01159082718201512 --0.03379098646124078 -0.008318353679237245 -0.0114529071095676 --0.01645100703285619 --0.003181099628004045 --0.006916245828306857 --0.01278138467840095 -0.02146752707008293 -0.007600035285103075 -0.0526377486572026 --0.008154331199861043 --0.003390502996875069 -0.002062008044862195 --0.01517865902805408 --0.01389728504977737 -0.01137834139620722 -0.02036474884454893 -0.01386567122466357 -0.01310006832275501 --0.0004114574493389831 -0.01493565630573209 -0.00266181772025316 --0.003002376039593435 --0.02235784650481129 -0.01080210963443454 -0.02893646779230268 -0.02079502145671575 -0.01696665427415874 --0.00821198743310983 -0.03252951745743831 --0.0002119602316261224 --0.009151221812743944 -0.04070257868877229 -0.02967613461586078 --0.0009392779784078375 -0.008278877278034502 --0.003997352635048516 -0.003299601677460417 -0.002812284173243165 --0.01667055712361534 -0.01004118163730036 -0.003220893810245169 --0.02054611297250312 -0.005300592867177189 --0.0276434851611295 -0.01770331833063773 --0.01266179054227942 --0.02204107898268882 --0.002784037837270497 -0.004173796282149081 --0.008632584948110981 --0.001095515482764154 -0.02128074900090392 --0.001254958849304849 --0.004419810348378892 --0.005555478186945212 -0.01846776530131816 -0.001540673626872334 -0.002921914811082011 -0.005206986742777751 --0.002498625282395165 -0.01568001231641996 --0.02064791536508081 --0.009384359278438937 --0.01846263293103973 --0.004279961640969861 -0.0310155959502296 -0.02278584020863179 -0.004456847712152289 --0.0248668651197845 -0.006045076994333196 --0.01990997783962895 --0.01395455659290044 --0.006382026172753417 --0.003684809762670747 --0.01466019534038032 -0.01176884283707358 -0.01028352761420503 --0.01709943596191365 --0.004433992865450464 -0.007714383803794786 --0.03251271274410604 --0.02218674442464503 --0.0199451163926457 -0.01156248548342475 -0.003535659786422155 -0.00833831371179951 -0.0007474594260389695 -0.02810276146438476 -0.005727505750275822 --0.02458364128489382 --0.01062632241752211 -0.01141852620411286 -0.02165817996009011 --0.002370292424771842 --0.00243632094707594 -0.01249269639012427 -0.02516895808140605 --0.003309003623402556 -0.001559284947320723 --0.00194578392965551 --0.009251044517080588 --0.009362909904812894 --0.02417237177206746 --0.001254690935436669 --0.02288845194202567 --0.005734752371341872 --0.01427563410971964 --0.006363852293372386 --0.001562287672018273 --0.003412395608508961 -0.01296654411079786 --0.02383050116092834 -0.01944708601240925 -0.004642619424617453 --0.002601398101052739 --0.008971464113034624 -0.03676503580484968 -0.01423068213421022 --0.01773655798797057 -0.03871908578672768 -0.03964360236200849 -0.004463109524221587 --0.0002361359174218638 -0.005107668418078351 -0.003323590770881154 --0.005589388807401902 --0.02238946932039637 --0.003899800171947515 -0.01524521454188645 -0.01218266976100981 --0.01792749546122066 --0.01503548223806147 -0.002024560014768462 -0.01831192662299935 --0.005966062397026387 --0.009829197509904162 --0.004129005126445169 --0.0005858952377448667 -0.02452327675567211 -0.01329272263209953 --0.005371391037680929 -0.01818144228686881 -0.005330406867795841 -0.001644018928360227 --0.01326050890070832 -0.004065850555653653 --0.02741948362604604 -0.008848844008885852 -0.009685013981666823 -0.02299485400376819 --0.02062581336312209 -0.003570478582776307 -0.005109054188545755 -0.01525225932418012 -0.003300509886728114 --0.01039101723639359 -0.001478894165331377 --0.01944028958477118 --0.01697254376310092 --0.002141629122872058 -0.003301696199506949 --0.01434591013438162 --0.005107999497058222 -0.02256102406857514 -0.005657765061814224 -0.008780011300034423 --0.02241746509261875 --0.002272780186270637 --0.008657542563293784 -0.0021575973694357 -0.01336349975417421 --0.005981873037479388 -0.003790736331877263 -0.004140872679474091 --6.670914454933108e-05 --0.007315457382532402 -0.01215815364411283 -0.0008535743517559006 -0.0103060191291738 --0.003171904507291982 --0.001799830570586872 --0.01009750820301736 --0.003234315392605725 --0.01667326546333653 -0.001985844093464942 -0.0008755570258095779 --0.03324142561190395 --0.00513346728883581 -0.01349198768168489 --0.001884629120528639 --0.01067455799128833 --0.009052577826730036 --0.00963686447285496 --0.008426317920287769 -0.00222868039115213 -0.01425161561675471 -0.001152318369255607 -0.001019207509525064 --0.01823198609418282 --0.006527777887195718 --0.00230102788369858 -0.007513348468336135 --0.003576762697547948 --0.0006925910562061161 --0.006151036374173463 --0.02967808628872888 --0.01495870810410679 --0.02829215203691276 --0.01307077424018101 -0.004145194466820171 -0.01301554679763758 -0.01204314662106529 --0.007896881182351928 --0.002936518892835794 --0.01584137234858014 -0.03248689768717838 --0.01298404267246572 -0.0136470362798298 --0.003320805140709715 -0.01048840042848568 -0.007979589599270533 --0.00992977835896376 --0.005357187132347961 --0.01968606290952043 --0.01365581478873764 --0.01653047617420842 -0.008186062101038196 -0.001422792317902872 -0.003430948564170666 --0.006445923005789379 -0.02519639187680115 --0.00269855712628907 -0.01446977037462762 -0.002906802859013142 -0.01169415983924786 --0.01991106777674354 -0.02295310523935646 --0.005966728869365576 --0.009527416101862083 -0.02032987404184939 --0.02727924303001762 -0.03562935648936395 -0.01580005948870444 -0.02457018169518309 -0.01736100079211771 --0.0001498850191639035 -0.002718131521107321 -0.0002293909632381129 -0.01533726591844334 --0.0002921174741558058 --0.008789265837034863 --0.01214631799276147 --0.005771828433829178 -0.04187522792145641 --0.01012376786305625 -0.01861931501416082 --0.01438323107707888 --0.01342155661166992 --0.03357526386382455 --0.006826237914237056 --0.0006356986310774942 -0.007961888727519739 --0.01119856504631344 --0.01322733321158712 --0.004877102552970699 -0.00346087396311695 -0.03108907365246578 -0.01786536291866745 -0.009064996929934961 --0.01782528745427776 -0.02346156672659736 -0.01800870908899703 --0.0103503940354268 --0.001120251800766291 --0.0114589689458703 -0.04093179967132143 --0.002716911827795133 -0.0006807472810140623 --0.02841189917233079 --0.006436509718522184 -0.009020525383371787 -0.0112531976885015 --0.002530892831712752 --0.01497003470457219 --0.008304741511837474 -0.008732684668980653 -0.002339282367122902 -0.01081516002532559 -0.006101295963644212 --0.0006083981545125659 -0.0111925919478086 --0.01817106018404243 -0.006247508107651983 --0.0005560937016598665 -0.01593001697870242 --0.01399127181184955 -0.007103197798536229 -0.01590357633458265 --0.02817126311954438 --0.001907328250532535 --0.007421590158099848 --0.03286112748140223 -0.002060921833398145 --0.00550142042672082 --0.00807426328750442 --0.02792579795212676 -0.01743142669567604 --0.003818186494500163 --0.003613771667183527 --0.01449346711149582 --0.01230035645351027 --0.007890448296507955 --0.006916513211149906 -0.0005896081008539403 -0.006745808773798426 -0.0162682831645454 --0.003785729549068591 -0.008153100619575486 -0.006161850662829519 --0.004112142720619922 -0.0102169740638249 -0.03163672955152776 -0.01095923541816692 --0.01306363175868779 -0.01841526260512721 -0.02318835426168499 --0.008247187269686236 -0.01611268793272442 --0.009982654485706281 --0.01965346314290377 --0.04392394587071819 --0.01543615116627523 -0.00619010939060881 --0.01661978469937312 -0.002749501975780859 -0.01034152522149821 --0.01621423901008867 -0.002852204862551527 -0.001788157176179739 -0.01035698672559023 --0.02437197578331224 -0.007842483333716017 -0.009730880603382702 -0.00310695488466267 --0.007923273442411087 --0.01852459513463494 -0.001205385575680988 --0.006926671146404534 -0.01285900954481521 -0.006028116230868197 -0.01022213514487542 --0.01968123654387025 -0.007467184794166561 --0.001513862107688405 -9.275203202469809e-05 --0.02160593420112384 --0.003037808331528349 --0.00463388021826113 -0.0009072008471823982 -0.01209908437075692 --0.006763663457900014 --0.001432962532834532 --0.02637618313141538 -0.02145385516254548 -0.007823962878214012 -0.01297784112901031 --0.01681282127108752 --0.01081137084825162 --0.03057891160642245 -0.02703611698715908 --0.0004326449471321721 --0.0009016799908266695 -0.02096619150384676 -0.002862586767818165 --0.008249325788232904 -0.04259944984787371 -0.0128484749708137 -0.006286325770660806 --0.02675904010794916 -0.006881454635124842 -0.01763054342473057 -0.007041496630679498 --0.003859050967826861 -0.0003920201767116419 --0.01725842822724766 --0.004114684348636946 -0.00827885858990788 --0.01260824093519398 --0.007796541933741968 -0.002328399831708919 --0.0008823971650326642 -0.008185900051901292 --0.0004760619302936898 -0.006146148117364696 -0.020578570662521 --6.06603232904655e-06 --0.002161307033339793 --0.0007347499353844648 --0.01535776236513778 -0.02115795781593732 --0.004727285679441861 --3.076978446484957e-05 -0.01466515273507737 --0.007337214381672696 --0.005955293505640252 --0.009211815153339794 --0.001013830480485472 -0.003165817197710146 --0.02194133528525962 --0.0123339807218651 -0.0008037768341064717 --0.0181025967329025 -0.009644795261822022 -0.005900597676539191 --0.002868240527846985 -0.005740122848112462 --0.003141714655261093 --0.01328320993236815 --0.007019741866511205 -0.00131086464379034 -0.003449584783044305 --0.01108276947437757 -0.0004426494666975864 --0.01419997745867885 --0.001921288544665169 --0.00769478223964514 --0.004233784825981601 -0.01264627952885882 --0.0193665875821791 --0.004125892601370766 -0.01541290020980129 -0.008922721224138303 -0.0103141522715134 -0.002178373620125354 -0.01437701846676927 -0.03763652844020955 --0.005601920961801258 -0.002958666561788732 -0.00740438301661268 -0.003610226248119495 --0.007494497981039089 --0.0001244141391152115 -0.02262157542949992 -0.04267373566853217 -0.01047898258888232 --0.03100259525061034 --0.01330382121075038 --0.009646182802211604 --0.008507711599268539 -0.003047233632048879 -0.009104007732643219 -0.02704983410222673 -0.0139480306243001 -0.0008023522213313902 -0.01342774015015171 -0.04680392766486464 -0.001977258586009847 --0.01731072095950852 --0.0008673468987175747 -0.01024074315277151 -0.0220290881744565 --0.05461936562487078 --0.01671330146150685 --0.008816271755744957 -0.01875693996529748 --0.001970044567113991 -0.009553301991468191 -0.01405532688010382 -0.004772411399556049 --0.0300463536860414 -0.01515476181038647 -0.0161372997685075 -0.02163034003237584 -0.02347144630322073 --0.01676585880107048 -0.009572572885280394 --0.02296600424521639 --0.03113955917342857 --0.009365906472692302 --0.0002414886769392478 -0.01408412000351894 -0.007212212726331374 -0.02293836666086397 -0.004255111232987584 -0.02233744968861413 --0.01139204486677259 --0.003624043687740727 --0.0259464460035085 -0.0101768713611255 -0.008540364807789969 -0.006309910032365329 --0.004563537595672934 --0.04162241540206459 --0.001550925795703522 --0.01212730648130652 -0.001191433310164514 --0.006663741736751594 -0.01515059071752698 --0.007381250912364885 --0.002563720727718177 -0.02743224106869607 -0.01753686274227351 -0.001125710999688163 --0.02145905499244218 --0.01175833854148734 --0.003451049855824337 -0.001609729619164475 --7.249892792516692e-06 -0.01800621050569225 --0.004535380153641234 --0.02370092072119814 -0.004645685978706206 -0.01213633954531436 --0.02335332813744484 --0.001299381309864154 -0.00822616576228976 --0.009813973890719092 -0.01815210347300254 -0.01693559653205544 --0.001925541532559558 --0.02973922734869056 -0.02047630700354801 --0.006405079313002251 -0.007180420361159274 --0.01481911587333832 --0.02518710764752834 --0.007727895804268055 --0.002302905369232538 -0.01787154743983526 --0.005083889956903552 -0.0004807035131491862 --0.02367260047144851 --0.01148439108201316 --0.0004393562687057712 --0.02914766188081922 --0.01461389976664723 -0.001686048624952834 --0.003363138925213664 --0.02468060677073512 --0.002926810475046166 --0.003614934843680914 --0.02106436685202944 -0.01293251507906198 --0.005788114416657888 -0.007664502480287444 -0.006752197222882973 -0.01188513116288301 --0.00390192259270895 -0.02870095252493179 -0.002242517502101011 --0.0008154859464253555 -0.006271615538385315 --0.01364760442718507 --0.01422010075670395 -0.0001550506524105195 --0.01475633963484422 -0.0008581519030434652 --0.01655298564424954 --0.01291285653545685 --0.006600723488608474 -0.0006973498150875454 -0.001734889691953508 --0.02211454170288848 --0.007935948464175037 --0.009471362064735549 --0.00687801906152408 --0.002653541828557804 --0.02376741796206714 --0.001105150099200283 -0.005154064259733975 --0.0211389747315035 -0.01286504133121527 -0.006289622872198105 --0.002084011605783891 -0.02556985365021749 -0.01709252360864669 -0.00899275825243172 --0.009646607879679289 --0.021292184559867 --0.01125586041827413 --0.003274819727009699 --0.007649013867020772 --0.01888188905379376 -0.0002759683653543373 --0.008804193348626088 --0.01622478875444812 --0.01369160134034805 --0.01467412908456944 -0.009722947259323964 --0.02001782820039825 --0.003887019984632322 --0.04530632592307829 --0.007589493706426229 --0.004452622527134434 --0.01366356670992284 --0.0002516596596313096 --0.007881804552822187 --0.02320950635931417 --0.03198442479497952 -0.005096871985900889 -0.02774636879513193 --0.01742563267868617 -0.008826341668986208 -0.002801899089232056 --0.0121109875560514 -0.01281107657366471 -0.02722660206699005 --0.0002022642756702222 --0.003287871939784436 -0.003197845880977589 -0.007504731957759122 --0.01436268216965977 --0.01570014644383539 --0.00531481962300456 -0.007394798354655848 -0.007660398940911749 --0.002518748256518284 -0.002158094551403613 -0.001927191062784752 -0.01587267449450228 --0.02147868260069313 --0.01182043690984726 --0.000755724298548532 -0.01129132224356749 --0.007111148048983773 -0.0001758388475596971 -0.01152460459957085 --0.0006074430644429766 --0.006179619010560642 --0.01284124050767895 -0.01989345910197729 -0.01216053407628957 --0.008723770548105952 -0.003302631478968715 --0.005851925788608017 -0.004012092778994189 --0.03144821740774612 -0.01260273039001092 --0.0018103057931378 --0.003654124763568644 -0.02204958846307291 --0.005232378583426781 -0.01578578188444911 --0.001609639660602506 -0.005581637100705215 -0.01075980082732734 -0.004830613046387652 -0.002698585502203707 -0.0007461177046031344 --0.001466751889450543 -0.006153517336300792 -0.01329167176568332 -0.01450319940031771 -0.009234553114465877 -0.002810021747574173 --0.01697727141816338 -0.01123665760246998 -0.002124541022159736 -0.01390262972590621 --0.008649847934861591 -0.006162335713658087 --0.01941795756677263 --0.00580267354868882 -0.008216971894694714 -0.00442225946396887 -0.00133411786699842 -0.009998450828092406 -0.003232814125072982 --0.01004382385357007 --0.004005857570312589 --0.02379997202040748 -0.02119102728938049 -0.01622496974620829 -0.001558310490635856 -0.02363735930049632 --0.01517977909569882 -0.01442111699139319 -0.01054632306282705 --0.000900653503022452 --0.004289166731162807 --0.004706816699441552 --0.02273224442566374 --0.01347603156775597 --0.009755575988894637 --0.001127324985399928 --0.01684842293377311 --0.01588462186460805 --0.004123798643170593 --0.0186785359848796 --0.01132920220753525 --0.007452140775776656 -0.01731083234666636 -0.008636518802689895 --0.02666957377180495 --0.002043238853550058 -0.001232595802033744 -0.003183197424533352 --0.01780330883560586 --0.006888540169121596 -0.003640440225692836 -0.01249085235475565 -0.003573853789403236 -0.007649679457840198 -0.01694682211513468 --0.001076736228272598 -0.006806709232433727 --0.02516933522587362 -0.02571300823666357 -0.006469843965520342 --0.005217354467404563 --0.0007524223755534171 -0.01914259072937686 -0.0001635210972976692 --0.01886973417647719 --0.03283691601227646 --0.01021106291512091 --0.01039358306730022 --0.003844912336835711 -0.01505341621310096 --0.003733100581145783 --0.02908710666976869 --0.02079821849208126 --0.01128264392467088 -0.001484439146734521 -0.005376665337948546 --0.004552458380303557 -0.02505142806555408 --0.01791762140020132 --0.02816102674309326 -0.004765186734834283 --0.02377897226450733 -0.01124661755027366 --0.02291496848647871 --0.006378570573125721 --0.004212955056218151 --0.005172464100505938 -0.01951956329371419 -0.001067032040447101 --0.0212337355673045 -0.0104500378355902 --0.002750313408159631 -0.01838138546231393 -0.01103588447240928 -0.00294278862716998 --0.001496456938573537 --0.01235825304694326 --0.004784973133995887 --0.0145304553450953 --0.020140106162011 --0.00649299751854693 --0.0152953012393497 -0.004514143495064866 -0.0109016072304059 --0.03960501905355521 -0.01371169420850173 --0.001320239186767634 --0.01484213184204995 -0.0327029092061576 --0.0141335010090685 --0.0210443609576398 -0.007665197539278155 --0.01264553457679247 -0.02006843789864211 -0.005441667170899345 --0.01744461041429433 -1.212419434708521e-05 --0.01368567040520507 --0.009016701264038045 --0.003186756251476529 --0.02006309634706837 -0.007404114542599849 -0.002352436630585836 --0.0005338109721588705 -0.01167361284963523 -0.0421586983484245 -0.01003739211331729 --0.007773518035069656 --0.01152677320402499 --0.006671082036985199 --0.01759431528179283 -0.01360307666384341 --0.0140734535058952 -0.014236486843532 -0.009841414804657682 --0.003375232676774457 --0.01019207015367098 --0.005440720867744793 -0.02433834590656121 -0.00990296526148814 --0.01271482717545408 -0.005362931905873316 -0.01261851604816073 --0.009574888468903956 --0.01769048824441234 -0.01336024193461339 -0.004748150017263107 -0.01330626206249999 -0.002034255390311251 -0.004672331522973912 --0.02156332159651577 --0.0153669229298173 -0.01527014545682585 -0.02363608432652282 -0.001384455399370721 -0.0240363662792935 -0.02552393067600009 -0.02709365475569762 -0.02132684391082558 -0.004483419169985984 --0.01649861493134095 --0.0092342473152426 --0.01319064630755653 --0.003643793071693718 --0.0197475437691385 --0.003994801452147038 --0.002990360912840994 --0.0192888727616264 --0.001339941451199551 --0.01642196751266858 -0.01420953146836242 -0.008974932029288436 --0.02989795692915964 -0.008872894513016804 --0.006297590180550504 -0.02321590822087837 --0.01665498703114366 --0.005698120037312774 -0.007280931316962126 -0.003080904727680463 --0.00557740634035294 --0.00104246952627834 -0.005454012809754877 --0.01002492099601569 -0.01161941809066553 -0.02508609347111236 -0.005223515892492482 --0.00565139552149169 --0.01872817280569484 -0.009067802423220996 --0.006998996227259443 -0.008089984496130858 -0.02991334290998401 --0.03548166835152131 -0.01230134601422771 --0.01737064963703465 --0.02903886569650859 --0.0111393877203405 --0.02943279990176062 -0.03418693352944115 -0.003478428452752899 -0.004683991658266416 --0.008906813376685654 -0.006422704118738241 --0.009676341937862504 --0.02280210770554284 -0.005542359814264647 --0.01127976221935154 -0.02272517058639634 --0.01269946835307127 -0.01411583442236518 --0.0173203586910601 -0.003339116194443834 --0.01861329775144972 -0.02212966193136476 --0.02781329139154201 --0.0218128275638751 --0.003932053941037814 -0.008630890620953497 -0.01254089155308593 -0.006213317909976061 -0.01787667051215017 -0.02332843671447739 -0.01243436196543425 --0.01087863795629823 --0.01393953653606182 --0.009671820651194133 -5.478519393532387e-05 --0.02210024177452707 --0.001677496270470337 -0.0001319310649757755 --0.001108146910662826 --0.004201542780059761 --0.01429468455378747 -0.001875512661577212 --0.01976629591679434 -0.007199624538408438 --0.005513546013732728 -0.009493122547765961 --0.02931475960931559 --0.006557188016138213 --0.01624226463318285 -0.02012401505150916 -0.002224455263804069 --0.004055509930986369 -0.01669411745556907 --0.0008404923721063164 --0.00876071204872711 -0.02300731209100981 -0.02201575999693554 --0.008442528219364627 -0.01964005993379015 -0.0245259293798195 -0.02799786667924047 -0.04034048816718881 -0.002514677989239008 -0.003631306723336363 -0.01333130541788209 -0.005180309066577605 -0.006955112522422015 --0.002695423313747646 --0.0006143999353419546 --0.01183784297265191 --0.0221996880760404 --0.001661762667092851 -0.01620421957371512 -0.02058047856314743 -0.030121781673893 -0.01047755222995705 -0.004972500150327923 -0.006228687546233684 --1.831830180524466e-06 --0.004762067055073417 --0.001387774325787077 -0.02173331418537729 -0.01450610332824094 -0.01825236961214413 -0.01902660218284419 -0.01466500002943218 --0.01454984103556262 --0.02807121070706109 --0.008205880419726067 --0.02584023988698189 --0.005786006907455564 --0.006783572237874262 -0.018241250159699 -0.008978942917308512 -0.007576223407926604 --0.01600248188990005 --0.01105825845290509 -0.01612700074643601 -0.01125907293498918 --0.009110189134051008 -0.008700281467609835 --0.009502920889178647 -0.006117562896580875 --0.0006047785607954177 --0.02669981570177031 -0.02434079283924822 --0.005506521563884174 -0.02686315485546403 --0.0152260676896741 -0.006281130847753235 --0.01525627770873395 -0.002957363135672856 -0.008850973430756822 -0.029219269716664 --0.009433200414891815 -0.008761774574679807 --0.006912381117449397 --0.007585344647579727 -0.002493671630715789 --0.0008302299480667347 --0.01227293371713308 -0.02197281511938133 -0.008025919878023631 -0.02317062505097985 -0.01060004539960217 -0.02509688296574898 -0.01199354119820134 --0.007935417009972547 --0.01199306432812687 --0.00845302805208145 --0.0193208375784711 --0.01248185619191264 --0.0007155242533027175 -0.0007975345939231534 --0.02153598688329926 -0.002616140522354241 --0.002535882972465923 --0.00493178622533052 -0.01009581914072191 --0.01524790727401618 -0.02019304186943155 -0.00185997119107519 -0.03316813552620992 -0.01457439505917512 -0.004702630442664113 --0.01091187629139023 --0.03084893234114522 --0.009449966937694232 -0.008612008022292332 --0.01818182241816406 --0.000235980792110004 --0.005215899153847526 --0.003967029025564525 --0.009299318893942161 --0.002992213855126377 --0.008762814327358856 --0.01266358097640896 -0.01790981181088667 --0.02301842801614264 -0.00282181826052129 -0.007767915818340168 --0.002783168328430195 -0.007593271724295097 --0.007063224755874797 --0.007070914542405755 --0.01889520980484604 --0.0135606436224545 --0.006020044478371998 -0.002016663023160911 --0.0005593598848843508 -0.01715254605549726 -0.007520546776152653 --0.01048236268201505 -0.01662582819107709 --0.001240268176235298 -0.01369983726345297 --0.007764922224304761 -0.004756785630950981 --0.009559163166284973 --0.008521872296596622 --0.002001679010345239 -0.03532387456455704 -0.002980804622552097 --0.02234976350513048 -0.0007075110002783091 -0.0004313369817162948 -0.007654225103055262 -0.002241243007702808 --0.01120319555780888 -0.01644861239972022 --0.02280169648725505 --0.0004781473630653622 --0.01939394294887693 -0.02122874721007592 -0.009466348344938489 -0.01784227081438027 -0.006808143404756192 -0.003071810821036979 -0.0126904977064881 -0.0004121479581607766 -0.01098106728195575 -0.003284240020346807 --0.003065731711405691 -0.001013642973547149 --0.002350337867709835 --0.01158058981202177 -0.006006171849806169 -0.008467614363005631 --0.005834008160083576 -0.003572628868997989 -0.01067747594807222 -0.007703974237387842 -0.008326276449521991 --0.02139878594709162 -0.005913546278723272 --0.01015850345672509 --0.01228838108842892 -0.003964836139591737 --0.01131810730877458 -0.003350255335217148 -0.01378071730595013 -0.01582855502367351 -0.002270173149936549 --0.02753099762346006 --0.05458642059081979 -0.02096032105811956 --0.001335484491201703 -0.01423010597891592 --0.02489862457580549 -0.01558284862868952 -0.02820669000635973 -0.01158249832417735 --0.01860121434540492 --0.0006092550771201546 --0.01747960795757261 --0.0005409875725104656 -0.0136153622100675 --0.02373777414829764 --0.0008395239705694436 -0.00385574667561625 --0.008115589329708314 -0.001348610617313697 -0.001566003639002294 -0.01011761843041649 --0.0006608921203804334 --0.002575911857168541 -0.01815302480945557 --0.02072552125018492 --0.0262641385888572 --0.00442329267669539 -0.003623058058477043 -0.01736958407660402 -0.01185339056892922 -0.01491089713277747 -0.003033433952060501 --0.03566189430730117 --0.03240925406491323 --0.0239731766402716 --0.01742046142700509 --0.01467664260449347 -0.0008938419992220022 -0.01209376378045557 -0.02441103157422871 --0.01066050878697695 -0.006635337459409193 -0.001579031591803346 -0.0007379584919571555 -0.01442697709865477 --0.0009201054111061487 -0.006343522036326627 --0.008046016315949046 --0.006297029607812271 --0.0172546954889419 -0.006460912369517464 --0.002329618393333419 --0.007863998213912805 --0.02000387018598969 --0.01260559724438364 -0.01921327072230643 --0.01463553140805201 -0.002326277422815354 --0.02020158891088118 -0.01269230619097776 -0.02476304290947576 --0.00517040377964231 --0.002465953442932902 -0.001409118310045361 -0.01324025677522835 --0.03281199315770576 --0.0003423238073269405 --0.001246177163486365 --0.02748829694390831 --0.03015794723933947 --0.01924801816383726 -0.01222660916097782 --0.02055139124712056 --0.0008745854164056718 --0.005685244557756954 -0.003017674225079332 --0.01568443385868898 -0.01706211403336673 -0.01712415562152072 -0.01905680635399063 --0.002342804049826326 -0.002391042279268207 --0.01842818413682125 --0.02642391961824708 --0.001360081998066609 --0.01204509975338573 --0.01797442524509399 -0.00650415321361745 --0.008096629155731604 -0.001851714696659468 -0.007486337247737841 -0.01441049324431237 -0.00387699768868964 -0.006844537297344711 -0.02736233468481648 -0.005215242408168589 -0.001455948881156338 -0.01745843090745991 -0.000599199008415539 --0.02575695642395088 -0.009299947670920622 -0.003162467775286378 -0.02242416348286005 --0.001568095452381308 -0.0002971744930163637 --0.001554683234122159 -0.01059006605789259 --0.006310478860653798 --0.00161541665782159 --0.03068336323756268 --0.02630841528345237 -0.0126561169042398 -0.006227509667878234 --0.02257396240385347 --0.002011813484823319 -0.01180103911245726 -0.01581621539440026 --0.01823070604310181 --0.00362716938467956 -0.007187084995237632 --0.005141851659345853 -0.03045397549861853 --0.006370914531422539 -0.00143230184826514 -0.01771523209263294 -0.001877022396101989 -0.01812318513711229 --0.001567799541643825 --0.009594502271964017 --0.01065181315969197 --0.009723825357177508 -0.0068462349771462 -0.002674864853343341 -0.01675687399552197 --0.002999728454018915 -0.0003320632504074073 -0.0089567877274079 -0.006995749589962106 --0.01751345653549044 --0.0122589339318042 --0.02114035527388586 --0.0234242870460309 -0.005067914168254901 --0.002015454890077364 -0.001570858608644338 --0.01440096517550899 --0.0002569490148673067 -0.002590762395233006 -0.003113197084896254 -0.008888832357408378 --0.007309055974670796 -0.008546472979519104 --0.007487408644024259 -0.01468467143550598 -0.02721778477071241 --0.003169289676179141 --0.009660119064975006 -0.005542655758731487 -0.003041099659027408 --0.01387152297798735 -0.01154306930722966 --0.01624432639396148 --0.005179414522033523 --0.01296095317884778 -0.001035686504454403 --0.01458542986173656 --0.005672881234826287 -0.005871984424042715 -0.01222314070247131 -0.01065564032360763 -0.002873094970990704 --0.003544858241490187 --0.01986275069875775 -0.01770282214844496 -0.01518346418469071 -0.003598029123158551 --0.009484972074418648 --0.01980104566195168 --0.01864140429493547 --0.006366600431551619 --0.003590152107738395 --0.01108666093078196 --0.02363230447683965 --0.02810397154050185 -0.04358161260104806 -0.02965577143514629 -0.01631307634090015 -0.004265813328068092 --0.008949948667113976 --0.01540487984287483 --0.02324373613255554 --0.01294605666121041 --0.004757469994201534 --0.008748337048483219 --0.009029178788397516 --0.01940666705744033 -0.007194241069759199 -0.01336974079073218 --0.0173986693048557 --0.001462381680950147 -0.02418683813024882 -0.01007430439882672 -0.004293174649509116 -3.279015482495487e-05 -0.006536096835981952 -0.01661222391274818 --0.01218046237060773 --0.0008894670166556978 -0.02865334347744229 --0.002510828997796783 --0.02014045359714705 -0.0105902378128561 -0.006907431795473882 --0.001386094893788052 -0.0107862469842047 -0.02556772409882847 --0.01940556284588086 -0.01055544497566266 -0.01529272022451759 -0.01632223204583959 --0.003723701382305042 -0.005292373282768331 -0.01074647097007686 --0.001390334696726014 --0.005000550204930666 --0.0005861474952635248 -0.02356111316306296 -0.004516693101181987 -0.01189796409315707 --0.02393858303140722 --0.01005755066991063 --0.01592947074190488 --0.009134093320879519 -0.00399485919732284 --0.02000856405988809 --0.01631490561543362 --0.01711089392143549 -0.007982513069088572 --0.005608303054401505 --0.001226671687094277 -0.02425915836168496 -0.02189034170776282 -0.01369454436504142 -0.0002951253392119872 --0.006798025116779333 -0.007069106856879686 -0.003445530439520182 -0.02315414291550566 --0.03782637388154714 -0.02758922392078583 --0.004608301044793327 --0.01393077428919685 --0.02632151122798797 --0.01337028202518651 --0.01237240072137181 -0.007479449823903507 --0.01702435150545808 --0.03433728306255194 -0.002677277160745685 --0.00775994661867491 -0.007685157309950867 --0.01448886736735199 -0.03646831627829175 -0.02412834577327096 --0.00186146673470772 --0.01432617387416049 -0.004667873461486125 --0.01588261038629788 -0.01245513660661845 -0.01007868826382511 --0.005797379878496465 -0.007947415650676722 --0.01753389710271067 --0.0220967068975579 -0.01538763476728527 -0.009218232705024164 -0.0008123266863471926 --0.02827824033239022 --0.008422050289485233 -0.008962310576136234 -0.01472770980815467 --0.03380801125873714 --0.009181217601688781 --0.007142760118595578 --0.01070595596621364 --0.03629006350402003 -0.005854884624560524 --0.003434943149016071 --0.02495769520534488 -0.01806811216454718 --0.04743243101334736 -0.02633535657875922 --0.01054902999928291 -0.00187995928445413 --0.01367431870181536 -0.005509459995039222 --0.002760966171878608 -0.03524267084662847 -0.0001756655692120215 --0.01530821546171491 --0.002619454016890088 --0.01545706825884323 -0.00244194615771597 --0.02878119278148453 -0.0005359223490676475 -0.002632718091934924 --0.004484495000386859 --0.007731445899600694 -0.01714452039528974 -0.01311772616138143 --0.01239191064266938 --0.02193009895020458 -0.006765129257295823 --0.008511224096674316 -0.01258806145873188 -2.953615389715363e-05 -0.005391084544388754 --0.01324152141232201 --0.00851487749723602 -0.0002372532523585972 -0.01030696936019442 -0.001968084975612464 --0.009312935994278413 -0.011102823379852 -0.02196030089626533 --0.008711335438126414 -0.01209803597862764 -0.01430330899640266 -0.02233083074595976 -0.001386279969065225 --0.001458734387335184 -0.0133535942670621 --0.007584654867509442 -0.01773134799349694 -0.02446612947189461 -0.01506638421894652 --0.007371600543009406 --0.01342256893681092 -0.01985098440127005 --0.006546874618847636 --0.005446174244860969 --0.006875102331625598 -0.003389365849001732 -0.002199948611427924 -0.01459801647749705 -0.01810308200644735 -0.01117981661778893 --0.01268673001858681 --0.008473161743006311 --0.01374317576208295 -0.00163481134757355 -0.005278705326246254 -0.008968380644583222 --0.0009769150019168244 --0.007505584793028684 --0.01117219315462986 -0.01541792336983751 -0.02710125843852925 -0.02269429583280697 --0.008660284125027055 -0.00483634511207379 -0.008010010174108883 -0.004919172184792992 --0.005033643658121242 --0.009361103140145611 -0.01039952920978106 -0.0168728134498484 --0.002210214303858658 -0.001644286017232677 -0.0016473883694918 --0.02544694756751488 -0.0002627462135695244 --0.01250130016965456 --0.01509447931709746 --0.002253091948850594 -0.002679284372520129 --0.02236065939064861 --0.006587697464090558 -0.01956082453616368 -0.003462133301381847 --0.02279282515307312 -0.0008983647168733348 --0.00214573760881331 -0.01174230450208774 --0.005147334127329778 -0.01145394383815482 --0.0008223154777248628 --0.002003896027831352 --0.00429962105385158 --0.003645408838351193 -0.01003791709976675 -0.003755589830439826 --0.009057414797964661 -0.00108378385016625 --0.001485256100679439 -0.004725659052103081 --0.01725916348334064 --0.01465034209488935 -0.002365079669621194 --0.008163719172141724 --0.001075605502041322 --0.01354100851725906 --0.02337594152975484 --0.01339871784440387 --0.003773593071622333 --0.01505060873464348 --0.003026768159962125 --0.01258011873354536 --0.02214482680948268 --0.01574664087772463 -0.03217252443288242 --0.01298603111777673 --0.005925822839875057 --0.001644087676671262 --0.01903739968696957 -0.02133855878579407 --0.004167567535977313 --0.01629138495582334 --0.001457283029492609 --0.03232177514073072 --0.007571611519078271 --0.009901430500913444 --0.01316175359418615 --0.0008684598040789766 --0.005883452455566981 --0.006442656982238817 --0.01622374076624674 --0.004937040691055238 --0.006790018834121446 -0.005477333952238251 --0.01422204337088316 --0.0105162417604342 --0.01730872575917793 --0.001177343139364629 --0.01643161994344187 -0.02232543151929379 -0.004856239290661274 --0.004775805247646656 --0.01241966970821021 -0.001683854318048276 -0.01341142940483655 -0.01608338676543095 --0.01598837972712713 -0.006783714825382198 --0.01528601319027678 -0.0243970769979488 -0.03443117578194481 --0.009821883161118695 --0.005093588488883336 --0.008870239263593394 --0.006677370027695895 -0.009707389695374172 --0.01299853433879688 --0.01176897173115015 -0.01711791015402429 -0.005432066804389119 -0.0005383346873025084 -0.01867950508493068 -0.001138793808832343 --0.005788785401621202 -0.01779241122026541 --0.00426308758147475 -0.01206538862516047 -0.006870106993930029 --0.01377999600842015 --0.003745437525637571 -0.007999014320935204 -0.01050614247376416 -0.01843881599892026 --0.01717895313996071 -0.006596635273447197 -0.003579198577964199 --0.02543225406576396 --0.001557530080308451 -0.01674135968000739 -0.002447897979049682 -0.007542423975165838 --0.01611164034333274 -0.005487527810549575 --0.001741674390096337 -0.01203154859980562 -0.01594675409258664 -0.04287854779555862 --0.01031677214531021 -0.01525435020605757 --0.004472114677029778 --0.04314596815680957 --0.01878009034727854 -0.00842015633794331 --0.008560327238365216 -0.002417888044225988 -0.003141360129199047 --0.01327622919922524 -0.00240739148345492 -0.02110023806144208 -0.003981906198952117 --0.003498445488061459 -0.02833365247793092 --0.001051424494949802 --0.02430548692214859 -0.02727205402480347 --0.01205563256421332 --0.01131241017155675 -0.001442875242569484 -0.008945422549528429 -0.009785759157804294 --0.001988460840637024 -0.01321148050316419 -0.01086849952997028 -0.01556297420780715 -0.02806224765511247 -0.003984350793121086 --0.01035712191752304 --0.006517141308520551 --0.005397465057372651 --0.00713524924877088 -0.003705964327725333 -0.005001421080033161 --0.01339723674419992 --0.01515586829955307 --0.005718337347237418 -0.01100869280278039 --0.01053084979813007 -0.0004294393407962753 --0.01299248100449674 --0.02709292963770401 -0.02548358539644635 --0.02968152304922021 --0.0074779339971157 --0.004378392225775813 --0.00230931464809077 --0.02377674929568305 --0.007371895750751153 -0.01116199168577324 --0.006658593477453835 --0.02530835134582443 -0.02460182261648258 --0.02714180321875245 -0.01142929171019894 -0.02958991753091236 --0.003056752253358676 -0.03056121573801894 -0.009450681122954743 --0.003845920806718005 --0.01976281598517505 --0.01877940179195196 --0.009351956540127023 -0.00161613606744749 --0.01075110375017815 --0.002365227170694966 -0.006997163081335574 -0.02220070813143225 -0.003159546728129675 --0.02534661535995247 --0.005655320880152411 -0.001804959452269903 --0.03184139848659729 -0.01764925509598555 -0.003985458763753926 -0.009448410760337179 --0.01364018830925454 --0.001730736116933022 --0.01392567461441758 --0.009002500126537333 --0.007791264554680338 --0.002270507235609968 -0.004737090658872144 -0.01138363638236161 -0.00063287890843606 -0.008233619457178074 --0.02122013366262418 -0.01035269895196205 --0.003612956386870847 -0.005872440514353084 -0.02214910001406518 --0.004505210134343432 -0.007176145536187845 -0.03207861596905075 -0.03260694570918068 --0.02295940899589659 --0.0008044939896385729 --0.004902899406882458 --0.001803609086011628 --0.02547534369046859 -0.008339031455697046 -0.02896388500687931 --0.003480909616696143 --0.005124301767374085 -0.02158182571432071 --0.006246488857733734 --0.01051900619283845 -0.005503990374951596 --0.004782502640102333 --0.01363926271891486 -0.00216777907938039 -0.04326847417740708 --0.004524733702412898 --0.01456007053785066 --0.002449332959988761 --0.004036706496715323 --0.002183515300142529 -0.04280082678802109 -0.004082295245692164 -0.001265718213655355 -0.002079810562432237 -0.02691187644898443 -0.01944588618029608 -0.002007001024370076 --0.01697505232735191 --0.01989281786023239 --0.01109682663608693 --0.008696623144786482 --0.002413476953728136 --0.01258725869977341 --0.0007869444007620255 --0.02136730067890838 --0.0005813431717016943 --0.001041637439004884 -0.003695688302787591 -0.003799527982220035 -0.005864772640687918 --0.01250458247284068 -0.000882340298624268 -0.001590549631966471 --0.001807562204734891 --0.00242225688851789 -0.0161680297029911 --0.0003227593881921463 -0.009027993200080435 -0.002663507334251682 -0.01013530935833316 --0.001757586973210711 --0.005071872074905203 -0.009646760942940284 --0.005844214226176702 -0.02072028883714406 -0.004387147163223604 --0.001035153166440642 -0.009895163793190817 --0.01542652712175189 --0.00573718883872852 -0.003910540166469667 --0.01076148726055582 --0.005529488557247855 --0.005426325425868439 -0.00937907442924997 -0.002067897034571891 --0.004744997394110944 -0.002482888192322983 -0.01192812133997755 -0.007083367042630865 --0.02237825349496664 --0.01508654185335042 --0.004005457554853573 --0.02358368343891624 -0.01108524725337704 -0.006116881774818506 --0.009398793642311838 --0.01018379395836166 --0.000678896746564428 --0.001592775653576762 --0.01417088081526732 --0.01426928793113908 --0.003338173046620963 --0.01061186951088806 -0.01329776926039383 -0.01745906839500359 --0.01404291805352927 -0.01169437928079268 -0.001476824819444713 -0.03492567432903454 --0.008694675538107566 --0.005812747141314726 -0.006613300432141644 -0.01955078728853462 -0.03943271650417808 --0.02194225877669898 --0.01379058451679611 --0.002262888437192467 -0.01233424019896446 --0.01110852723794907 -0.008627369668247765 -0.0007782601368858096 --0.008054627048642932 --0.01774333590391123 --0.02643061485626071 -0.02592073180573467 --0.008426225971986683 --0.01297293591143285 -0.0004450720069415193 --0.008466169550768312 --0.0008731601680611137 --0.01938335746283689 -0.02876193742223214 --0.004524913700150957 --0.007260165219141731 --0.003262254581556876 --0.006295607198167523 --0.02871747077627873 --0.01710499229869638 --0.02109506399447368 --0.03614772513035996 --0.02329646209318838 -0.01153965983578244 --0.00158295408790223 -0.01470146677673169 -0.00794657514368333 --0.003101550423983673 -0.01651225477300172 -0.002014460695806933 --0.02098140841733181 --0.01973511613420898 -0.01476854480204782 --0.004416668526445178 --0.01543455572510525 -0.004693658975196252 -0.02717703528680578 -0.005441847157597294 -0.008941613860259212 --0.0001016496505559675 -0.02404397915330471 --0.01462995627876619 -0.01562843130123212 -0.006711945995703162 -0.01083955876779178 --0.002344214847598842 --0.01053824849008605 -0.01672011915740813 --0.007123418455373212 --0.01050276814996309 -0.01181375034352936 --0.003692388209212168 --0.006197181110515579 --0.004990649119706141 -0.02763406435365599 -0.008232879264365207 --7.635851920595113e-05 --0.01647957879344179 -0.004970544262147969 -0.02077511132500649 -0.02080228248128793 --0.02063231699555177 -0.03271939607800944 -0.01852565967928637 --0.01205664064847238 --0.002116742984091494 -0.01648815822592242 -0.006502062292723928 -0.004528820298251461 --0.02329025006564391 --0.002621260927044112 -0.008036775972731546 -0.003957689350239761 --0.01223085875126413 --0.004807480994937861 --0.00937288435718546 -0.009426976729854139 --0.004073623135211561 -0.01394869170258077 --0.003032907588016949 -0.01419920849776457 -0.003981445505857536 -0.03149207149963506 -0.02697533682541672 -0.0196505493889447 -0.003886927523359138 --0.0226550509588688 --0.001765852859099732 -0.02130096097811054 --0.01301965201122001 -0.006647781978185874 --0.004788550144350071 --0.02032918057752187 --0.02555113320794481 --0.006146132438767107 --0.03168984208800225 -0.01050926998081637 --0.01585244611803002 --0.01209930568144516 --0.01840396673303841 --0.01999030789127674 --0.007683178144557741 -0.005653841389420187 -0.00236062896936699 -0.005920526889319284 --0.004126061987838854 -0.008224602789655505 -0.003248361158424754 -0.009314744093099998 -0.02531155387347262 -0.001531360989604527 -0.004463419424068961 -0.01416045722933502 --0.006762527169707457 -0.008424776039206612 --0.01683109910378435 -0.03131674407955989 --0.0130605616982668 -0.009390660446481751 --0.003684966548829124 --0.03608880972776977 --0.03041779807728703 --0.0260813638574111 --0.01675681278014084 -0.02551667361835429 --3.307672422964662e-05 -0.04072899607500657 --0.004531213531825409 --0.003722029193317969 -0.0154853732717035 --0.009592785684314202 --0.006183420058898514 --0.01329990856859712 -0.01466750377679854 -0.02399693781801437 --0.0239621108032971 --0.004792716694845249 -0.01718289590602544 --0.01796587828695473 -0.00708122170691673 -0.006075358410119343 --0.01460363626244955 --0.02748912592714141 --0.03287920546697236 --0.002065223209649254 --0.004585338533037592 -0.01561895620428921 --0.01666166601362881 --0.008200336685154779 --0.01462885583440839 --0.01096387838293024 --0.004602835535997034 --0.001689067646043055 -0.009666802210291965 -0.03131502598052375 --0.02044321346140698 --0.001179923980752694 --0.01003304782932002 -0.03387629132001316 --0.009114524827415087 -0.02215591348240966 --0.003821432102033054 --0.00943066274026 -0.001087823722885975 -0.02175491923844605 -0.02227818411209811 -0.006847173497328751 -0.01838326704215108 --0.009626729277977619 --0.008736830558426214 --0.01289355891377251 -0.02107202375374004 -0.003024271435519285 --0.004782145792186998 --0.01090849840996144 -0.003576632097989232 --0.03271904084894355 --0.0313139365512832 -0.002646443621973686 --0.01755448948010632 -0.03310408083293312 --0.01471532685340964 --0.01941310925610028 --0.003047302499689319 -0.01367455474940252 --0.002630247304288531 --0.01889738361337345 -0.01092461422459755 -0.01378318728680117 --0.00653632733768279 -0.006088416912183969 -0.01632965665578229 -0.0009866326553690226 --0.02332315402704107 -0.01864112773492806 -0.007866838427917895 --0.003652488995947925 --0.00880963610108703 --0.00848970771541353 --0.003937148817801966 --0.0004248936383170823 -0.009166496794338596 -0.005525471080202236 -0.01163784259895482 -0.0003348339349303169 -0.007355973953225249 -0.0003971880607078939 --0.001721615327122501 --0.007567700675988665 --0.004511033218695818 --0.002928729319244283 -0.00236767622222564 --0.0009532957669714733 --0.002138588304012223 --0.003093597316895234 -0.003311886601406318 --0.001982453113180975 --0.000329968500908672 -0.0091042366072486 --0.003635810031537688 -0.003004991136891852 --0.001994148201600247 -0.01859402779087027 -0.01594096662220327 -0.006878030954645049 -0.004852622790295839 -0.01994901004475198 --0.01818286534983383 --0.006689212813708048 --0.002044798789832125 -0.03396037549765166 --0.000602469054994144 -0.003228718150871455 -0.005952119832391481 -0.006888000529981863 -0.00526410760350293 --0.006629374501131399 -0.01236076768832963 -0.00702823308362652 --0.00874362741435118 --0.005358545752747608 -0.03076653403219983 -0.0002369909265770311 -0.02838557326916515 --0.006665819007913285 -0.006317726449988297 -0.01960441897107203 -0.004993839926528477 --0.002889967825507399 -0.02230006788489563 --0.0009233096934082925 --0.01119466785607565 -0.00280794739247922 --0.01902413311459814 --0.02527669742720819 -0.002432645124033397 --0.01636941269283271 -0.0257974572286534 -0.01518472597594466 -0.01070374598739892 --0.009891866339335222 -0.04255417860420893 -0.02599083373080074 --0.0004157584544795538 --0.0128130626293219 -0.007653001052072896 --0.0150933395864396 -0.007619158139307343 --0.01913140679717263 -0.0009898521820700466 --0.008415772150386566 -0.004416067517567056 --0.008669761904759293 -0.023336874350295 -0.00575133334216248 -0.0310844029079565 --0.006442081159825758 --0.03252429226292493 -0.01055111653330694 --0.01180807811106285 -0.006522368417279286 -0.01511683051685856 --0.02091707201178297 --0.005873488721834425 -0.008505664137965467 --0.01946698883408938 --0.01176387850802354 --0.004836880973877548 --0.005845131634876481 -0.00165331302136902 --0.01412895836607164 --0.002181823201348984 -0.01145087120642928 --0.002202471684578441 -0.009869959778725517 --0.01769314022173792 -0.01251498185552966 -0.004251982453711679 --0.02344066626918898 -0.01990935720259963 -0.01467414885566977 -0.007499117997281543 -0.01115831595653478 -0.01125447223768158 --0.002132607272848243 --0.002151235416793215 --0.001386119560109006 --0.00435548160591074 -0.004902627347076239 -0.01662244889469332 -0.0182680604713647 --0.02257500506198227 -0.005955829773188407 --0.000448391655007923 --0.0246226618708224 -0.02595620539088966 -0.002626608588674199 --0.02991068494682555 --0.02110726292343404 --0.00447906804660547 --0.01446562822060928 --0.004355788986039104 --0.0161426678213275 --0.02482698838603063 --0.01531017461922219 -0.01234987262140467 --0.001575369163503677 --0.01393896567663532 -0.01579347583076785 --0.01195873615966028 -0.005356924621168618 -0.001391912030398659 --0.0004860039571481506 --0.002503961399176677 --0.002582375058873727 -0.0004347066589999512 --0.01575358655589022 -0.00837336221465078 -0.008463652655045579 --0.001022982430272171 -0.00520501696632679 --0.002986895043934904 -0.002267202740410702 --0.000432410593425114 -0.006481056569952309 --0.04800938712283203 -0.01219744499076798 --0.02286896996470082 --0.01295446358153783 --0.00544114002278997 --0.0001237358077866703 -0.002240164344840785 --0.0101150514317291 --0.0137522725123128 -0.004994765815622544 --0.01677559357707318 --0.01760740783336645 --0.003318770647864493 -0.006712631032641739 --0.004255388525951637 --0.004520004778856928 --0.02519599531229698 -0.00780982026567117 --0.01910927649965499 -0.004117824410622747 -0.008841719538471255 -0.00726362410036519 --0.004976155618507604 --0.03155080029538329 -0.009227914946593412 --0.01856140545211758 --0.02448406066441343 --0.004746170577440117 --0.001214472009549104 -0.0535408578276222 --0.008528038111435741 --0.03816144774023091 --0.01649557904580179 --0.02873313963071926 -0.01500359724942046 --0.01411336024963264 --0.005592538642348038 --0.009835830861223027 -0.03528496331221245 -0.02976552324709967 -0.01156566379187163 --0.0111850286336193 -0.001463332609201945 --0.009075223628426341 -0.0009346363071058567 --0.01234165908453529 -0.002968012104723452 --0.005533038806710751 --0.02688319393112949 --0.001325419476030691 -0.0330680224329899 -0.02629958558733903 --0.008662718845278909 --0.003625173293204199 -0.01425165352679195 -0.001228533672495057 --0.02565148184069205 --0.007080142837340462 -0.0004223911227202333 --0.003211110733183318 -0.008968612154802437 --0.01479846927038774 --0.008247930482367419 -0.002771963800329455 -0.01534408484507895 --0.01710026840391086 -0.01171027959991574 -0.02402212406967619 -0.00309137136966967 --0.002117538536186037 --0.00911457082057051 --0.009459703268057902 --0.00152315496863822 -0.01888945130102366 --0.01160647459368317 --0.01100378495199468 --0.02095568323355821 -0.006988652683639423 --0.01162141982520174 --0.01173569026504158 --0.007727105389662799 --0.008184212682712695 --0.0004380835180553959 -0.01326999051884771 -0.01728955878481326 --0.01452513473934265 --0.004537318772464566 -0.03443853622286151 --0.008004819950932249 -0.002580874765181795 --0.01721603326898202 --0.005988701343349047 -0.01356391900301972 --0.01899990136776278 --0.004660266012002088 --0.0001799670267742414 --0.008337724791892032 --0.02093198881690757 -0.00497643027829724 --0.004655166729874216 -0.008374116030766564 -0.01708520938317977 -0.00127074445223101 --0.007500741631608927 -0.01104679027628023 --0.0103847644255375 --0.004404265213231994 --0.005093248577204034 -0.01522986715527878 --0.001245939435174563 -0.000137615131160662 --0.000901876929547232 -0.01132340024071861 --0.004626502286594386 -0.01099961713290055 -0.01042898780140086 -0.01447321282904225 -0.01318539289919886 --0.0007025928114086782 -0.00592733002265691 --0.005005985573309787 --0.01841661831486491 --0.01090873985961376 -0.002457154252750759 --0.006182142241504851 --0.00593633374450929 --0.008204129405485656 --0.0005513062353826733 --0.0003262053347149774 --0.001484593273448234 -0.0002399278071498377 --0.004181326911945497 -0.02086970560028008 --0.001363182231838604 -0.01285457275618442 --0.01314161227296116 -0.005123954462881402 -0.01174799808671217 -0.008254679318684049 --0.0003914701295366052 --0.01448323127632071 -0.004590350196914486 -0.007429014750571237 --0.01791216865507756 -0.0155450637293779 --0.008386293179615549 -0.01389566374229496 --0.0005751971093523717 -0.001119223160355214 -0.006774709885436895 -0.004472819097868453 --0.02334784004813516 --0.02504836807289502 --0.009107378114486461 --0.002536504695090088 --0.02249429671421361 -0.02445384844701983 --0.009281353515978785 --0.01388931651242649 -0.005935502043677677 -0.01376845454522151 -0.005748472504045479 -0.01241346713199773 -0.007028821055044452 --0.0113789323718302 --0.01712669390800247 --0.02199995653215517 --0.002026079142969788 --0.006815008606571565 -0.03019077003408841 -0.005111824232890477 -0.0001240762395036108 -0.01747625822036971 -0.01709098435014246 -0.0006495272773432004 -0.02355623116398139 --0.01006215624589642 --0.0006836519330520807 --0.00664700668111633 -0.01335862796887182 --0.0071346556844671 --0.02733985624559693 --0.003758098950371275 -0.005455161445328613 -0.03297825037012105 --0.009420977780484646 --0.007229741488679471 --0.02141001566652911 -0.01145420506435428 --0.0224997140962453 --0.01104964908943273 -0.0027379471540166 -0.00170041668178301 --0.01090790133826591 -0.01409202623647622 --0.009751673412586452 --0.01223534940035067 -0.02305520640284937 --0.01524771581617245 --0.005324409813668551 -0.005939939252526662 --0.02414897280790787 --0.004958173675856237 -0.001229356921646565 --0.01438180073891094 -0.01628735103828915 -0.005323494255030514 --0.00390812841417758 -0.001609492620279846 --0.01996789587248918 -0.01219210112435778 --0.003871994096287004 --0.02889881872572414 -0.002482564292332902 --0.006975013135280087 --0.04634255691284854 --0.008395110552666082 -0.002831146147286676 --0.006162989723940918 --0.002421235527046069 -0.004394359287582313 -0.00433073776109897 -0.008412281327709688 -0.004734982682449173 --0.01660159032085854 -0.005488952525395874 --0.001791074044562602 --0.01946984491131826 --0.01652989235304309 --0.04273075468829583 -0.01055478124066546 --0.01166553623699939 -0.008995214246888105 --0.01706998461498297 -0.004376089678065407 --0.01543549035866707 -0.0008877688943729031 --0.01145316127346453 -0.002464920648666707 -0.001817785128128876 -0.03177326911342972 -0.00880651483187408 --0.006200743706968137 -0.01837332126698483 -0.00882725848335923 -0.004308607847147712 --0.00842877843306402 -0.02836981078349971 -0.00967868298921829 --0.006026203216130596 --0.002261321533324721 -0.004599563773545781 -0.01068700777622773 -0.008045841156592263 -0.01573259378650451 --0.005199937854318985 -0.008360536610402224 -0.002906926106021127 --0.007616712177906346 -0.001145348909207857 --0.005693724542258696 -0.003292062325396654 --0.01315248213028009 -0.02408815247506037 --0.009160904044120528 -0.01122126414768435 --0.02941265319724936 --0.01148169994412605 --0.02853228928863357 -0.000673452068606376 --0.01169284783012576 -0.005560227317419792 -0.005210142034995962 -0.01079073695426922 -0.001901969110087884 --0.005147450736950603 -0.02222156551038382 -0.01025434227612661 -0.002524536626112206 --0.005679254439708144 --0.02572794438290521 -0.01031382728177885 --0.01470150749362793 -0.03543497475775316 -0.005689245719326667 --0.01067330551251063 -0.0009548414424256995 --0.03095938558167714 --0.01635261658949197 --0.02308912696253445 -0.004674143378091358 --0.007621953397951528 --0.01230600455385311 -0.01936752839456654 -0.04728568573305469 --0.001895846479870579 -0.006036893799525046 --0.004638500528102567 -0.002640158175632735 --0.01767818562214534 -0.02752327226573516 -0.007193348964005182 -0.00489373436986091 --0.02426392040323166 --0.006217780241019454 -0.0008083012162578878 -0.01572494836014236 -0.0006097627854191757 -0.01936893065581738 --0.01218275403279498 -0.0165999979175938 -0.005329791590545539 -0.03533524460165303 --0.02272071365749606 -0.003316187937271407 -0.03236487003305031 -0.02143594909215428 --0.01218811504635551 -0.01877557477323152 -0.0042731381318537 --0.01955734781922343 -0.002925113006907796 --0.02323024950985484 --0.01345202502011725 --0.003304058746681566 --0.008684056156435452 --0.002163862574983402 --0.01643966156834209 -0.02586055388112574 -0.01407859308697153 --0.01337850116415801 -0.02324349341892464 --0.003671188223633858 --0.0103631765367267 --0.01555510748278809 -0.002404649550303531 --0.0163750566359102 -0.001903041260215059 -0.00722207833076819 -0.007501112406203057 -0.003803629459414235 -0.005714272791965208 --0.009968392639959861 -0.03006014022281655 --0.009070210931690463 --0.00814483621414277 -0.01770472018781694 -0.01367959534308426 -0.003604521091145355 -0.001271993741419705 -0.007133286519030656 --0.01315810057552531 --0.0155367824108035 --0.02106282389481737 --0.0244814282829759 -0.002209596062493842 --0.004584221608802652 --0.009438099820647092 -0.02583005464061603 --0.009909457090528598 -0.007113654579060055 -0.007227665055871958 --0.006484712434597325 --0.01189933289208502 -0.005590310231099386 -0.007397985380134291 -0.005407842712607397 --0.01920591786135048 --0.01171918300484283 -0.0007105742507445786 -0.006260336976728494 -0.006950419449576898 -0.002058232310930714 --0.0363614566676785 -0.00455735504751413 --0.006400843946043929 -0.01036767868636081 --0.0194957407002138 --0.007482184923385937 -0.008179379923398717 -0.01091666165169481 --0.01124579885012563 -0.006574801684584704 --0.001619829484791423 -0.003114017131408277 -0.01431117041012382 -0.03253441069271527 -0.006997404858053531 --0.01988037024675691 --0.001231390228950152 -0.001929071545877891 -0.005649390895935678 --0.02401916653708915 -0.009234650316530748 -0.01202399533808105 -0.0007777404826719666 --0.004244523168833211 -0.01542517568911507 --0.007607221462152986 -0.006415157018443484 -0.006113978962449352 -0.01767650506593157 -0.0280726424721713 -0.006039569056600746 -0.003089935519962547 --0.003097886759974776 -0.006985924919270068 --0.01964202528004993 --0.00404512804437 -0.004926470494239515 -0.002955021776779445 --0.01208045608229583 --0.0008870961228251745 -0.002872188482398446 -0.004583935690550723 --0.003858163788996736 --0.008541089535762022 -0.01232799719527246 -0.01198496114626416 --0.0376487804658333 --0.01239740283997382 --0.001438273787931985 -0.0130484721531434 -0.00392882918265642 -0.0004624613723549903 -0.00988422733329984 -0.01432758687115645 --0.01295527112561802 -0.01071001769917615 --0.01760074102594351 -0.01025089082363851 --0.02234255213784205 --0.02173085608298476 --0.00360001702717 -0.007860628749591106 --0.03032262509308284 --0.01032386846149099 -0.00864622049949451 --0.01486429206589223 --0.02464569526697027 --1.356897650041551e-05 --0.0249802268672495 -0.02073118207115261 -0.007877261053166643 -0.004734312137613272 -0.00281568530988095 --0.003626684377528272 -0.006663645507143537 --0.002417990464944067 --0.04259101424467835 --0.007744765423826181 -0.007762924695421096 --0.02871628090518566 -0.0004086141444095896 --0.005958034132690692 --0.005117810226449309 -0.00676070352035596 --0.007756069061809423 -0.004817211425157856 --0.008848955771684316 -0.002933174450045206 -0.007958756317302619 --0.00951207819635976 --0.01412364017349722 -0.004052790938513488 --0.006021536665858019 -0.01417334775672426 --0.005440701623951991 --0.01212959036890946 --0.01123115931482615 --0.02153961841801609 --0.0007934487767917474 --0.01056882475536789 -0.003049040040623498 -0.01078974910962729 --0.02259810865999345 -0.01180398841472394 -0.01907616534136599 --0.01337347782632039 -0.01352076439601524 -0.03841108368346392 --0.04055373316825232 -0.02833531234145752 -0.01652709019531644 --0.02033993480304984 -0.003655060252318861 -0.000786771673395233 --0.008266414290187582 -0.01362906475767354 --0.007772942977004248 --0.009794519653819026 --0.01081830980746562 -0.01778198066175413 --6.337367060934039e-05 --0.0003400641738031175 --0.006781242610329956 --0.004072983842581091 --0.001164080705672999 -0.006908706997562371 --0.004993150015659275 --0.002177302292586101 --0.0069359300816773 -0.008593972743989814 -0.02775783108677 -0.008631506504358042 -0.00413099363578831 --0.004145777660538971 --0.02612339911197775 -0.02261321343630619 --0.01097753142264771 --0.006255561837063394 --0.001702905788777154 --0.01414948821366367 -0.01492285574237289 --0.003126796773281403 -0.01731028146504614 --0.01007660250033313 -0.005940126251586111 -0.02830321696088971 --0.01657982983551986 --0.03820714029272418 --0.008129723342072553 --0.0171744786367911 -0.003839906906576135 --0.01716711541643853 --0.009369421885099248 -0.01876499888644826 --0.006730907102688891 --0.02222333069040138 -0.01602411831999552 -0.007773288259687514 -0.03012405961732158 -0.0199150608714509 -0.001048116846537224 -0.004190428790468613 -0.006068646508991573 --0.006948025164089164 -0.008448157587918515 -0.0005037567595575689 -0.01941094340009932 --0.01558262823935828 -0.02441847459204936 --0.01034157579081853 --0.01439937828896192 --0.02047233347858545 --0.01527119985291525 --0.02420322278461007 --0.003134143733260424 --0.01572790692753894 -0.0244147616010543 --0.005072830696996081 --0.01764259376087315 --0.002444796668407607 -0.02337880541947749 -0.008268183448937707 --0.0472520816506342 --0.006198419892204437 -0.01533382492127159 -0.03157494244398008 -0.02066852643023912 -0.0005212499234999194 --0.02832043108514444 --0.003024176325080968 -0.01826422943706914 -0.02325718918458554 --0.003337195695177221 -0.008751006585410372 --0.01109095382046884 --0.004918274026477905 -0.02318414903935491 --0.01577811276111911 --0.009481758579210874 --0.003271246810777286 -0.005759151212969622 --0.004913793396815016 -0.007348137944217296 --0.002091022978430097 -0.01574210259428629 --0.009943784653053926 -0.008028408046474443 -0.009954774769305598 -0.0282150308612504 -0.02308067747978576 -0.005954108039567689 -0.005619699463107813 --0.01249247942439769 -0.01290003048818869 --0.0003265508120333372 --0.01469157886755291 --0.006378406991335836 -0.008942414711110406 --0.01630974723025268 --0.01142928556562286 --0.006460002000714391 --0.01338443104625616 --0.005828552013528361 -0.01026697958827046 --0.02132481693647283 --0.007603400019158524 -0.005358107271568497 --0.01139980533626015 -0.008149538920396923 -0.0008710539203635368 -0.006156161791800552 --0.01961807017007714 --0.009556160215132187 -0.0002905488407600563 -0.002685614344771768 --0.00453110306326527 -0.001141345204497309 -0.02929899422818204 --0.000146685257744896 --0.01917343489512589 --0.01344873978246047 --0.002472150161016636 --0.004213550411267318 -0.02462283972800554 --0.01367361473040458 --0.008504043237079829 -0.01705807719727934 -0.00983585751175377 -0.01124340753305263 --0.001757970096786753 --0.0001334327687674705 -0.009646428776346975 -0.005015378952103714 -0.005265539624014826 -0.009735755869555203 -0.0026793006113741 -0.007716179608036353 -0.002667543106616402 -0.00196322262018457 --0.01755616568027096 --0.03105504367818732 --0.01852146545840607 --0.007892178156758462 --0.006344020793244809 -0.001736492655039864 -0.0005654292706995542 --0.01171120395627112 --0.01190598522173246 --0.009074451498291242 --0.007594742078175147 --0.008955932701465347 -0.002909371466195072 --0.02191025028141997 --0.02194379277787434 -0.000192683116072489 --0.009148711567351076 --0.001862945744971001 --0.002179597969434772 --0.01273896768328057 -0.008431554601103078 -0.00106024741333531 --0.005934655764505666 --0.002196371321038518 --0.0100894212552339 -0.02700045869582356 -0.007334900402754162 --0.009897847296833684 -0.00715976102733476 --0.01991733939380202 --0.007357571038926293 --0.001319399661219862 -0.04652879838361375 -0.009973162015194711 -0.009618742335764058 -0.03057005098961838 --0.02818024699769371 -0.003189392647853853 -0.009030429141936552 -0.00251109153778454 -0.001308665377700117 -0.01606308181099008 -0.01438997033976475 --0.01463184399033823 --0.0009535991470952108 --0.02583281586964435 --0.01412254762049034 -0.01390209474623605 -0.004950827196089225 -0.03497512079680302 --0.01123471153187777 --0.006679336715318984 -0.004201188760536267 -0.02052605940156248 --0.003058915191952805 --0.0001681188276387851 --0.005376602520693313 --0.005257042703876781 -0.01975915040983996 -0.003320443697274082 -0.004696593320418577 --0.0403446072861945 -0.0008715451809642304 -0.02230239133717548 -0.01365825094038805 -0.01799221302687173 -0.01512857695194226 --0.004925035054470859 --0.01665001713451024 -0.01754550906569217 --0.008585444873006637 --0.007689722259475283 -0.03039968227985917 -0.03850410626190468 --0.00519157452897538 --0.01204823750018192 -0.02725250362549569 --0.006654209129966883 --0.01412857624485538 -0.02578844478895474 -0.00996881675076023 --0.007549656552828025 -0.03038992015884526 -0.02038503412032708 -0.003779836143993172 --0.01123360077986335 --0.002467890119529495 -0.01531501741348034 --0.007316801130663386 --0.009613193308891771 -0.003157725172311934 -0.01483373140697417 -0.007592517011473918 --0.01704261468175717 -0.02214190083770232 --0.004406747703956672 --0.0207663346317325 --0.003782296682800211 --0.007051686339933606 --0.005914969893877447 -0.03373725520739661 -0.002425119865916307 --0.00309265302130277 --0.009216635366219115 -0.002746039378600673 --0.00888515005715354 --0.01054683990350574 -0.003287695026993249 --0.01217806469614054 -0.006502264552732562 -0.02714031616947617 -0.01828334470794161 --0.004619290241341466 --0.0006677405313025577 -0.01077791261163933 --0.03120473967703216 --0.03683242118198414 --0.01832041251490778 --0.0003497210665655151 -0.01020025716042055 --0.01524897691181655 --0.01005985079402065 --0.01900942416893886 -0.02834803241375867 -0.01551767911573025 --0.02833643852533742 -0.009895908852247073 -0.004952556944618854 --0.02180069944933652 -0.008523656075957786 --0.01223104310666341 -0.02357472449337427 --0.007848588429341781 -0.01007407812277661 --0.01049423319536491 --0.003692773467291717 --0.04100564678520459 -0.000269864666563532 -0.02038056679634657 -0.01114612403695811 --0.008490506292959653 -0.008225535377320379 -0.02192906253175392 -0.00240048131858734 --0.00402537600825677 -0.01547775852489192 -0.001991643746644618 --0.009955977451476189 --0.01752304483853321 --0.0004880264940159554 -0.00184148271379681 -0.0107713907643284 --0.02565781489879055 -0.003523787900757492 -0.03150256804863619 -0.01440471598178603 -0.01188344506108061 --0.007597927891781663 --0.005688475332986122 -0.008368961302056166 --0.03406343617413088 -0.007651773068034115 --0.002810614992210151 --0.01442355023193317 --0.00930942517908496 --0.01738687851762602 --0.01496750270809569 --0.001880131533996531 --0.02150940258425937 -0.01891878116566312 -0.02535369682026802 --0.008001460217392713 -0.01457954926397509 --0.005996851935756954 --0.02645879905527309 --0.01925532918353922 --0.03035450009468155 --0.004498623530743501 --0.03764008086239421 --0.004510430910161323 -0.002778516395608248 --0.004863356269083184 --0.002861916086107435 --0.03010558923798578 --0.009612790808077698 -0.006870374459038819 -0.007183679358267206 --0.01056143179122944 -0.009246213763820533 --0.03407414026039592 -0.006536109577288808 --0.03570880650006806 --0.00256972215619462 -0.02308572980782077 --0.02264905849994069 --0.05410349133434062 --0.03065995798434372 -0.031863893106199 --0.01789927994077019 --0.01578340952287412 -0.01612195199705951 --0.0161754669172831 -0.008967087146923434 -0.01951791168127714 --0.005550980727729709 -0.01593155394105586 --0.006737185408648629 -0.02658820277524416 -0.01964556236776288 -0.02800367959005816 -0.02928421563736574 --0.003406911554392224 --0.004683231814117317 -0.01469177006074971 -0.01043533264507262 --0.001326671671339284 --0.01501005052017033 --0.03712251598512212 --0.00579401557140454 --0.02453559300724241 --0.02285450929285504 --0.0006336748959054387 -0.01518813684715994 --0.01484761422123549 -0.01312734447579811 -0.002708230628160069 --0.02006481914527003 -0.0229095791354312 --0.01674121776424106 --0.007448030816575247 --0.0129143579538611 -0.005898341603338111 -0.03940992889730704 -0.01563042402664403 --0.0112737737131252 -0.01313133291807826 -0.03056694985152921 -0.01277810382730765 -0.006074553003625459 -0.002643228556582045 -0.001852565306432142 -0.009950528115110007 --0.00978732767477377 --0.01330989069333403 --0.001847937864769756 -0.004673887153457299 -0.006673689168090389 --0.007797684895687047 -0.003895373914808362 -0.000877769969073182 -0.003234935159984071 --0.002382328483122484 --0.006838504395495584 --0.007109499654775752 -0.004690532455906133 --0.01578540762506189 --0.0105186108454337 -0.006449627651086042 -0.0008659955369537049 --0.01593994288097261 --0.004364885651002412 --0.00297714642303416 --0.003276880160015473 --0.002913579875677131 --0.005293863394846808 -0.01500060536879348 -0.02718197426782389 -0.001973063604433861 --0.01110063579036806 -0.01856553495664489 -0.001107601409016411 -0.00561501627705944 --0.005546774648695785 --0.002920830040983198 --0.001072365539073448 --0.02868215724871776 -0.01108259439364273 --0.001038948672440086 --0.002551597250200969 -0.006789073246382256 --0.0106527188330943 --0.01393504545568204 --0.008254621084635002 --0.0007596866344118802 --0.01074174093348666 -0.002094179527578489 --0.006852628810115411 --0.005958139833308673 --0.01720135294300625 --0.001156409360436312 -0.005544355991337468 --0.002706345139482663 --0.004001522067199958 -0.03657233067842677 --0.003180665729650278 --0.00671291974076794 --0.01252689920645897 --0.04176006895435073 -0.00300204829262944 --0.0019631064042179 --0.002887174967372167 --0.009633091089705224 -0.03197655319102453 -0.004677648190332648 --0.00422058801337834 --0.02329711356613009 --0.0001658912860526463 -0.01047949595913793 --0.001330315136965086 -0.03675530086562152 -0.01582561317980396 --0.002708693519195159 --0.04822072866946137 -0.01280724733588027 -0.004492782277935439 --0.02846603272897769 --0.01550594501095104 -4.245254987837467e-05 --0.006435485714250382 -0.01236203449806062 --0.02945947057757245 --0.01467583504710125 --0.009194771191684775 --0.030940241609674 -0.005702229936330789 --0.02845484858111282 -0.01556843813261503 -0.005253243002647325 --0.01533881042846362 -0.004638977649766434 --0.02164714998971094 -0.00361133622408166 -0.0371863405304098 --0.007207325197702643 --0.008072944543663023 -0.03145071816656336 -0.004279937779200758 --0.02439676903785956 -0.01570624486136686 -0.02049964857755283 -0.008571127046193158 --0.01569139109953498 -0.007010297565927171 -0.0137918003747383 -0.004474221166233592 --0.01946445121864369 --0.008979531198974339 -0.006114188637525397 --0.01433565214411659 -0.01401985074914269 -0.02418435815121221 --0.04947458859574772 --0.008662373286325537 --0.007518488780997615 -0.01254010445474588 --0.009068525930721901 --0.015428080901077 -0.001245014469365529 -0.01340376860400185 -0.007408507975101764 -0.01224980159270087 -0.01221519533041613 --0.002450309316354523 --0.01403446778394327 --0.01501718422290021 -0.01382119355322259 -0.006925837551820885 -0.01422543346146635 -0.003126316130391671 --0.009759507585943447 --0.004515081040667552 -0.0184211583778267 --0.00301334658383286 -0.03526382100861304 --0.005763125629352025 -0.003859628102802385 -0.01640992715249558 --0.0132724185004416 --0.005429495624950713 --0.01610039127796882 -0.0190666544080012 --0.006008169545401953 --0.001640952182593106 -0.008678963120391098 --0.006560033202091975 --0.01178731303978556 -0.01678707458057755 --0.003742623816834697 -0.01025773572604616 --0.007378540838533612 --0.003727362841688868 --0.000837522225570645 -0.005631750173079518 --0.02295009761451031 --0.02060608942868164 --0.01459792328485058 --0.01602339266022188 -0.004820403169663087 -0.01327368680494862 --0.02007515477855184 --0.01433959294531711 -0.02069041124632693 -0.005073165997318196 --0.01869705326717852 --0.00697138840158981 --0.0332817989791832 -0.006186898695763961 -0.007756986993487383 --0.01343075855582001 --0.01331099248621712 -0.03630076040567996 -0.001750181901429801 -0.008236691580402281 --0.007424815882232308 -0.01302245024972943 -0.03309769642738106 --0.01093175213848803 --0.002247245997031039 --0.02008036255533713 --0.00885610589240567 -0.005350029110215819 --0.007763626370236366 -0.001855312284918393 --0.04054569097076473 --0.01931669526263645 -0.01037077748590742 --0.01378737972509295 --0.003298226780031409 --0.01077276358634363 -0.003565177543258448 --0.007346028800042844 --0.004967302033390996 -0.01162919611371515 --0.003618604735717639 --0.01771246381439291 --0.02162694884119417 --0.03058398165228755 -0.01129803453831104 -0.01988557807548826 -0.003090755698382868 --0.01503023188282204 -0.0139015637894172 --0.003170569976254531 -0.01004330018724429 --0.005514814437924848 --0.006909174112045331 -0.009132437683185673 --0.01626133377305174 --0.003395230429404601 -0.01706234056137755 --0.0009556272437728052 -0.01032593776344388 --0.0161055690697245 --0.003802496858683152 --0.0001045289663565352 -0.009260188578330154 --0.007087580885692222 -0.006637454797923579 --0.002473127019125103 -0.02403494801378056 --0.01140655705142183 -0.03183263598949247 -0.007399362812465869 -0.0175512050714865 --0.01006068316617398 --0.007405734287376196 --0.01282018810092489 --0.007026390622589841 -0.009910072367183956 -0.003474072157141561 -0.01442648729935228 -0.005078431989435122 -0.01707525876406475 --0.004732839122433889 -0.02889917475456872 -0.003105141177924763 --0.005230209002812439 --0.007474395437483116 -0.005351739605678192 --0.00791345102367071 --0.00787197521927511 --0.01117473676391647 --0.03604038065176966 --0.007511897661885605 -0.001634010057073694 -0.01377114410533289 -0.00980477295699818 -0.02397216916237251 -0.006303681208571039 -0.001305973472988622 -0.003068833736754394 -0.009712442755140544 -0.02313812611247533 -0.01847771290869842 --0.01240904162976993 -0.00866447519406865 --0.02296742859721455 -0.01726599170420226 -0.007680741808977268 --0.02359946657768762 --0.002506896760679496 --0.0014444060339337 --0.01188211223242937 -0.008462957566127537 --0.02036825267874936 -0.007048906669455735 -0.007320347017932488 --0.003888724139715328 --0.0002038557167446097 -0.01076458425422057 --0.008849398556298934 -0.01750518473606481 -0.00649076115189502 -0.01632116394140625 -0.001588517511907531 -0.005394235147066633 --0.008584940276567648 -0.01751061734991783 -0.002200734330235819 --0.006876940316865636 --0.007860281268000924 --0.01195695109695417 --0.02303525308255487 --0.02193756919565023 --0.004417098812208356 --0.01299878072111846 --0.01500916082433785 --0.002569339668940366 --0.0119437556285678 -0.02292838197667446 --0.01398311344776766 --0.002480835206852796 -0.0144114349141262 -0.007404394114182884 --0.007804194318305033 --0.003257282807043618 --0.01506450993692678 -0.01866899190302616 --0.0008034727598605041 -0.003208336163053487 -0.008976540539303772 --0.01259311946237895 --0.001366530593811902 --0.001725063596972607 -0.02144484948185182 -0.01384025574023827 --0.009909140743035153 -0.03315125027395317 -0.006904412816946368 -0.01382324888550336 -0.001598431960756535 --0.003804017504204842 -0.01900910121709114 -0.01261570802457964 --0.001364268621036571 --0.02693978151524247 --0.004580673028005613 -0.01316716943583306 -0.02459615893625969 -0.01361500452629209 -0.008176704201722099 -1.197955201233251e-05 --0.01117539339090682 --0.03311034229307786 -0.005231735134011704 -0.02224010929835316 --0.004402460390882429 -0.01270930673348074 --0.01055248764250602 -0.007862828278494667 --0.004655738824427762 --0.03145101100301825 --0.01631270519561777 --0.01235817303394284 -0.02039030310521683 -0.003747146981692299 --0.01773495066741079 -0.0158880806014404 -0.02355239702142845 --0.02519791103684815 --0.03996701995438993 --0.009147440906650548 --0.01201076937582297 --0.001745933908305584 --0.01015953270929641 -0.05411292102502311 -0.005879375847221719 -0.007251730069490883 -0.01237500663803921 --0.0004249924873270862 --0.01475861942556251 -0.01072882758323287 -0.007858024885138997 --0.01625218026698153 --0.01604772525044638 -0.00581368634557428 -0.00638483819377105 --0.0006513546862175954 -0.009131134521716044 -0.03521455157619021 -0.01066692724542432 --0.007809868984940659 --0.01380670346623858 -0.01696569213572393 -0.019291133231876 -0.02999733046749321 -0.007424388248721526 --0.01468518254145522 --0.01057420252769083 --0.001453259656950157 -0.01933253108711584 --0.03081772101281013 -0.01548993995195879 --0.005072038094306734 --0.01232566549368926 --0.02525342900595093 --0.01495885097694772 -0.002118847028197435 --0.005895042441752004 --0.002981069043224838 --0.02312955791798903 -0.01384561741944177 -0.007893081889197811 -0.003310096499544184 --0.01948649592910526 --0.02592984286373955 -0.007827077285211401 --0.008427002587605257 --0.03852505771688302 -0.03489698776233709 -0.02654639265967004 -0.03590686222294073 --0.001167594158782169 -0.004948762328436335 -0.01542619570998532 -0.005237409335841976 --0.004680062638277922 -0.03064082517211047 -0.02407422111065966 --0.01835936629463496 --0.01940408814121492 --0.0003015836736504799 --0.003279187286941378 -0.003460404511508226 --0.03006130330703631 --0.002221712135099534 -0.003245402796957847 --0.01010417721497479 --0.02210106881780647 -0.02139571462286946 --0.01172203886870713 -0.01603489720504438 --0.01158060220004198 -0.01795321187941191 -0.005762843557234979 -0.01244513045636123 -0.02100765127278477 -0.002256507965716916 --0.04498735438632368 -0.00406366452792569 --0.004715770210993587 --0.0163223762337607 --0.005126678270572817 -0.01882227980030095 -0.009301602762092702 -0.0365317617045287 -0.02083200977701033 --0.002695492681371857 -0.003197993533616333 --0.001472975051209063 --0.0003186004996362475 --0.00246748496713778 --0.007494180230948377 -0.01472725543282279 -0.03665734325216793 --0.01458367360933156 --0.02006216193689288 -0.01709203912042638 -0.007299759554254884 --0.02550838494588182 --0.009175745773407242 --0.002439928449770857 --0.009460724405402654 --0.02733190418472063 -0.001382641863765436 -0.04172133512368545 --0.01891337623770762 --0.01567365458450751 --0.03432054990161081 -0.01188376276679091 --0.03056358240640782 --0.002841202340091693 --0.0004848467491689695 -0.002707310166625807 --0.001994689711391858 -0.001979987282358538 --0.02218750955138959 -0.003053599109053886 --0.009304206656967136 --0.02381657953166434 -0.03351834306207179 --0.03080237455344828 -0.002712576360957537 --0.01866048178070419 --0.04275923139936443 --0.007897959145482122 -0.01116667018229131 --0.02290031686912631 --0.0365918939021931 --0.007672177373576339 --0.008362136036287748 --0.003078938324631922 -0.007039215751315215 --0.009018467205375623 --0.00470860635890057 -0.003602678919302242 --0.003086681927234068 -0.002502598254855494 -0.01520604009601336 -0.002918008994150385 -0.003850689193926608 --0.006880675210700436 --0.02072870478100336 --0.01345486105727304 -0.01669076439246846 -0.006398660888555329 -0.005579883521651698 -0.01870131934365079 --0.0208097178659284 --0.0009436042167345623 --0.02532025226035112 --0.001544584588500054 --0.04155546179234096 -0.004861309078836543 -0.01204093956078467 -0.04162541559719533 -0.02050737042312399 --0.001214160382287964 --0.003688134921549865 --0.004908708211978426 -0.01719240141975271 -0.01234398515654684 --0.002730021609324972 -0.01128233844293456 -0.02054598684784238 -0.004588128805327625 --0.009030848883505915 --0.005912720340995263 -0.02797786574909942 -0.0009488273136703182 -0.0007460614501613194 --0.008079922249812769 -0.007042786142229772 --0.006725859535732895 -0.002884405307200951 --0.01861324621635662 -0.005654274981213094 --0.006496547564795704 -0.009422114284617372 -0.003891733699865319 --0.01937898149158729 -0.009824713927864904 --0.003928011245137781 --0.004047471340990934 --0.003145561644031582 --0.005145018171695522 -0.009935977007277299 --0.006413503944716811 --0.003993981527416234 -0.0284827621070262 -0.0006976840598726484 --0.01890689118680361 --0.006379917738488249 -0.001106944144783874 --0.001821220227014897 --0.0004432979499690127 diff --git a/data/combined.txt.model_ b/data/combined.txt.model_ deleted file mode 100644 index 90edbb7a9..000000000 Binary files a/data/combined.txt.model_ and /dev/null differ diff --git a/data/demo.py b/data/demo.py new file mode 100644 index 000000000..7f467e45e --- /dev/null +++ b/data/demo.py @@ -0,0 +1,103 @@ +import sys +import glob +import getopt +import numpy as np +import cv2 as cv +import pymtracking as mt + +print("OpenCV Version: {}".format(cv.__version__)) + + +def draw_regions(img, regions, color): + for reg in regions: + brect = reg.brect + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), color, 2) + + +def draw_tracks(img, tracks, fps): + for track in tracks: + brect = track.GetBoundingRect() + if track.isStatic: + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), (255, 0, 255), 2) + elif track.IsRobust(int(fps / 4), 0.7, (0.1, 10.), 3): + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), (0, 255, 0), 2) + trajectory = track.GetTrajectory() + for i in range(0, len(trajectory) - 1): + cv.line(img, trajectory[i], trajectory[i+1], (0, 255, 0), 1) + + +def main(): + args, video_src = getopt.getopt(sys.argv[1:], '', ['cascade=', 'nested-cascade=']) + try: + video_src = video_src[0] + except: + video_src = 0 + args = dict(args) + + cam = cv.VideoCapture(video_src) + + _ret, img = cam.read() + print("cam.read res = ", _ret, ", im size = ", img.shape) + + fps = cam.get(cv.CAP_PROP_FPS) + print(video_src, " fps = ", fps) + + configBGFG = mt.KeyVal() + configBGFG.Add('useRotatedRect', '20') + configBGFG.Add('history', '1000') + configBGFG.Add("nmixtures", "3") + configBGFG.Add("backgroundRatio", "0.7") + configBGFG.Add("noiseSigma", "0") + print("configBGFG = ", configBGFG) + mdetector = mt.BaseDetector(mt.BaseDetector.Detectors.MOG, configBGFG, img) + print("CanGrayProcessing: ", mdetector.CanGrayProcessing()) + mdetector.SetMinObjectSize((1, 1)) + + tracker_settings = mt.TrackerSettings() + + tracker_settings.SetDistance(mt.MTracker.DistRects) + tracker_settings.kalmanType = mt.MTracker.KalmanLinear + tracker_settings.filterGoal = mt.MTracker.FilterCenter + tracker_settings.lostTrackType = mt.MTracker.TrackNone + tracker_settings.matchType = mt.MTracker.MatchHungrian + tracker_settings.useAcceleration = False + tracker_settings.dt = 0.5 + tracker_settings.accelNoiseMag = 0.1 + tracker_settings.distThres = 0.95 + tracker_settings.minAreaRadiusPix = img.shape[0] / 5. + tracker_settings.minAreaRadiusK = 0.8 + tracker_settings.useAbandonedDetection = False + tracker_settings.maximumAllowedSkippedFrames = int(2 * fps) + tracker_settings.maxTraceLength = int(2 * fps) + + mtracker = mt.MTracker(tracker_settings) + + while True: + _ret, img = cam.read() + if _ret: + print("cam.read res = ", _ret, ", im size = ", img.shape, ", fps = ", fps) + else: + break + + mdetector.Detect(img) + regions = mdetector.GetDetects() + print("mdetector.Detect:", len(regions)) + + mtracker.Update(regions, img, fps) + tracks = mtracker.GetTracks() + print("mtracker.Update:", len(tracks)) + + vis = img.copy() + # draw_regions(vis, regions, (255, 0, 255)) + draw_tracks(vis, tracks, fps) + cv.imshow('detect', vis) + + if cv.waitKey(int(1000 / fps)) == 27: + break + + print('Done') + + +if __name__ == '__main__': + main() + cv.destroyAllWindows() diff --git a/data/dota/DOTA_v1.0.names b/data/dota/DOTA_v1.0.names new file mode 100644 index 000000000..adea76198 --- /dev/null +++ b/data/dota/DOTA_v1.0.names @@ -0,0 +1,15 @@ +plane +ship +storage_tank +baseball_diamond +tennis_court +basketball_court +ground_track_field +harbor +bridge +large_vehicle +small_vehicle +helicopter +roundabout +soccer_ball_field +swimming_pool \ No newline at end of file diff --git a/data/dota/DOTA_v1.5.names b/data/dota/DOTA_v1.5.names new file mode 100644 index 000000000..4d18c4f11 --- /dev/null +++ b/data/dota/DOTA_v1.5.names @@ -0,0 +1,16 @@ +baseball_diamond +basketball_court +bridge +container_crane +ground_track_field +harbor +helicopter +large_vehicle +plane +roundabout +ship +small_vehicle +soccer_ball_field +storage_tank +swimming_pool +tennis_court diff --git a/data/haarcascade_frontalface_alt2.xml b/data/haarcascade_frontalface_alt2.xml deleted file mode 100644 index b49cf5df3..000000000 --- a/data/haarcascade_frontalface_alt2.xml +++ /dev/null @@ -1,20719 +0,0 @@ - - - -BOOST - HAAR - 20 - 20 - - 109 - - 0 - 20 - - <_> - 3 - 3.5069230198860168e-01 - - <_> - - 0 1 0 4.3272329494357109e-03 -1 -2 1 1.3076160103082657e-02 - - 3.8381900638341904e-02 8.9652568101882935e-01 - 2.6293140649795532e-01 - <_> - - 0 1 2 5.2434601821005344e-04 -1 -2 3 4.4573000632226467e-03 - - 1.0216630250215530e-01 1.2384019792079926e-01 - 6.9103831052780151e-01 - <_> - - 1 0 4 -9.2708261217921972e-04 -1 -2 5 3.3989109215326607e-04 - - 1.9536970555782318e-01 2.1014410257339478e-01 - 8.2586747407913208e-01 - <_> - 9 - 3.4721779823303223e+00 - - <_> - - 0 1 6 2.3025739938020706e-03 -1 -2 7 4.4174338690936565e-03 - - 1.0183759778738022e-01 8.2190579175949097e-01 - 1.9565549492835999e-01 - <_> - - 0 1 8 2.2203210741281509e-02 -1 -2 9 -1.7283110355492681e-04 - - 2.2054070234298706e-01 7.3263257741928101e-02 - 5.9314841032028198e-01 - <_> - - 0 1 10 4.3567270040512085e-03 -1 -2 11 - -2.6032889727503061e-03 - - 1.8441149592399597e-01 4.0322139859199524e-01 - 8.0665212869644165e-01 - <_> - - 0 1 12 1.7309630056843162e-03 -1 -2 13 - -7.8146401792764664e-03 - - 2.5483280420303345e-01 6.0570698976516724e-01 - 2.7790638804435730e-01 - <_> - - 0 1 14 -8.7343417108058929e-03 -1 -2 15 - 9.4522320432588458e-04 - - 2.8899800777435303e-01 7.6165872812271118e-01 - 3.4956431388854980e-01 - <_> - - 1 0 16 4.9414858222007751e-02 -1 -2 17 - 4.4891750440001488e-03 - - 8.1516528129577637e-01 2.8087830543518066e-01 - 6.0277748107910156e-01 - <_> - - 1 0 18 6.0313619673252106e-02 -1 -2 19 - -1.0762850288301706e-03 - - 7.6075017452239990e-01 4.4440358877182007e-01 - 1.4373120665550232e-01 - <_> - - 1 0 20 -9.5083238556981087e-03 -1 -2 21 - 7.6601309701800346e-03 - - 5.3181701898574829e-01 5.4110521078109741e-01 - 2.1806870400905609e-01 - <_> - - 1 0 22 7.6467678882181644e-03 -1 -2 23 - -8.4662932204082608e-04 - - 1.1589600145816803e-01 2.3406790196895599e-01 - 5.9903818368911743e-01 - <_> - 14 - 5.9844889640808105e+00 - - <_> - - 1 0 24 -4.8506218008697033e-03 -1 -2 25 - -4.6141650527715683e-03 - - 1.8054960668087006e-01 2.1778939664363861e-01 - 8.0182367563247681e-01 - <_> - - 0 1 26 -2.4301309604197741e-03 -1 -2 27 - 4.1787960799410939e-04 - - 1.1413549631834030e-01 1.2030939757823944e-01 - 6.1085307598114014e-01 - <_> - - 0 1 28 1.0010929545387626e-03 -1 -2 29 - 1.0577100329101086e-03 - - 2.0799599587917328e-01 3.3020541071891785e-01 - 7.5110942125320435e-01 - <_> - - 1 0 30 1.2376549420878291e-03 -1 -2 31 - 3.5315038985572755e-04 - - 2.7682220935821533e-01 1.6682930290699005e-01 - 5.8294767141342163e-01 - <_> - - 0 1 32 -1.1953660286962986e-02 -1 -2 33 - 1.4182999730110168e-03 - - 1.5087880194187164e-01 4.3912279605865479e-01 - 7.6465952396392822e-01 - <_> - - 1 0 34 3.4642980899661779e-03 -1 -2 35 - -1.4948950149118900e-02 - - 2.6515561342239380e-01 2.2980530560016632e-01 - 5.4421657323837280e-01 - <_> - - 1 0 36 -1.0506849503144622e-03 -1 -2 37 - -4.0782918222248554e-03 - - 3.6228439211845398e-01 2.6012599468231201e-01 - 7.2336578369140625e-01 - <_> - - 0 1 38 5.4242828628048301e-04 -1 -2 39 - -7.3204059153795242e-03 - - 3.8496789336204529e-01 2.9655128717422485e-01 - 5.4803091287612915e-01 - <_> - - 0 1 40 1.1421289527788758e-03 -1 -2 41 - 1.1783400550484657e-03 - - 4.1047701239585876e-01 7.2390240430831909e-01 - 2.7872839570045471e-01 - <_> - - 0 1 42 4.4077109545469284e-02 -1 -2 43 - 3.7900090683251619e-03 - - 5.6405162811279297e-01 5.9475481510162354e-01 - 3.3120200037956238e-01 - <_> - - 0 1 44 -2.4291418958455324e-03 -1 -2 45 - 9.4262324273586273e-03 - - 6.6032320261001587e-01 4.6806651353836060e-01 - 2.0643380284309387e-01 - <_> - - 0 1 46 8.0630257725715637e-03 -1 -2 47 - 5.2240812219679356e-03 - - 5.2988511323928833e-01 5.2816027402877808e-01 - 1.9095499813556671e-01 - <_> - - 0 1 48 -7.0630568079650402e-03 -1 -2 49 - 5.6897541508078575e-03 - - 1.3806459307670593e-01 5.4906368255615234e-01 - 1.2602810561656952e-01 - <_> - - 0 1 50 1.2472929665818810e-03 -1 -2 51 - 4.9543488770723343e-02 - - 2.3726630210876465e-01 5.2401661872863770e-01 - 1.7692160606384277e-01 - <_> - 19 - 8.5117864608764648e+00 - - <_> - - 1 0 52 -4.9326149746775627e-03 -1 -2 53 - 2.7918140403926373e-05 - - 1.9980649650096893e-01 2.2993800044059753e-01 - 7.3932111263275146e-01 - <_> - - 1 0 54 3.0876200180500746e-03 -1 -2 55 - 7.4669660534709692e-06 - - 1.5338400006294250e-01 2.0368589460849762e-01 - 5.8549159765243530e-01 - <_> - - 0 1 56 1.8739729421213269e-03 -1 -2 57 - 9.3380251200869679e-04 - - 2.0498959720134735e-01 3.2341998815536499e-01 - 7.3230141401290894e-01 - <_> - - 0 1 58 1.9151850137859583e-03 -1 -2 59 - -5.9683797881007195e-03 - - 3.0451491475105286e-01 2.9321339726448059e-01 - 5.6212961673736572e-01 - <_> - - 0 1 60 -7.2115601506084204e-04 -1 -2 61 - -5.9663117863237858e-03 - - 3.6580368876457214e-01 2.7121558785438538e-01 - 7.2263348102569580e-01 - <_> - - 0 1 62 3.0874179676175117e-02 -1 -2 63 - -1.1099710129201412e-02 - - 4.4198378920555115e-01 3.6129769682884216e-01 - 5.2514511346817017e-01 - <_> - - 0 1 64 2.1164179779589176e-03 -1 -2 65 - -9.4317439943552017e-03 - - 3.6286169290542603e-01 1.6010950505733490e-01 - 7.0522767305374146e-01 - <_> - - 0 1 66 -3.5266019403934479e-03 -1 -2 67 - -1.6907559474930167e-03 - - 1.3012880086898804e-01 1.7863239347934723e-01 - 5.5215299129486084e-01 - <_> - - 0 1 68 4.6470930101349950e-04 -1 -2 69 - -1.0215570218861103e-02 - - 3.4873831272125244e-01 2.6739910244941711e-01 - 6.6679191589355469e-01 - <_> - - 1 0 70 1.2634709710255265e-03 -1 -2 71 - -1.1875299736857414e-02 - - 3.4378638863563538e-01 5.9953361749649048e-01 - 3.4977179765701294e-01 - <_> - - 0 1 72 -1.0732339695096016e-02 -1 -2 73 - 7.1836481802165508e-03 - - 2.1504899859428406e-01 6.2714362144470215e-01 - 2.5195419788360596e-01 - <_> - - 0 1 74 -2.8340889140963554e-02 -1 -2 75 - -4.5813230099156499e-04 - - 8.2411892712116241e-02 5.9100568294525146e-01 - 3.7052011489868164e-01 - <_> - - 1 0 76 4.2940340936183929e-03 -1 -2 77 - 1.0751079767942429e-02 - - 1.5947279334068298e-01 5.9804809093475342e-01 - 2.8325080871582031e-01 - <_> - - 1 0 78 2.2465119138360023e-02 -1 -2 79 - -5.7988539338111877e-02 - - 7.8770911693572998e-01 1.5557409822940826e-01 - 5.2396571636199951e-01 - <_> - - 1 0 80 7.2110891342163086e-03 -1 -2 81 - -4.8367571085691452e-02 - - 6.6203659772872925e-01 1.4247199892997742e-01 - 4.4298338890075684e-01 - <_> - - 0 1 82 -1.4418059960007668e-02 -1 -2 83 - -2.3156389594078064e-02 - - 1.5885409712791443e-01 2.3757989704608917e-01 - 5.2171349525451660e-01 - <_> - - 1 0 84 7.6985340565443039e-03 -1 -2 85 - -5.6248619221150875e-03 - - 1.9417250156402588e-01 6.2784057855606079e-01 - 3.7460449337959290e-01 - <_> - - 1 0 86 -7.2936748620122671e-04 -1 -2 87 - 6.1783898854628205e-04 - - 3.8409221172332764e-01 3.1064930558204651e-01 - 5.5378472805023193e-01 - <_> - - 1 0 88 -4.5803939428878948e-05 -1 -2 89 - -1.4719359569426160e-05 - - 3.4444490075111389e-01 2.7295520901679993e-01 - 6.4289510250091553e-01 - <_> - 19 - 8.4680156707763672e+00 - - <_> - - 0 1 90 -1.3469370314851403e-03 -1 -2 91 - -2.4774789344519377e-03 - - 1.6570860147476196e-01 2.2738510370254517e-01 - 6.9893497228622437e-01 - <_> - - 0 1 92 5.2632777951657772e-03 -1 -2 93 - 4.9075339920818806e-03 - - 1.5120740234851837e-01 5.5644702911376953e-01 - 1.6054420173168182e-01 - <_> - - 0 1 94 -2.3254349362105131e-03 -1 -2 95 - -1.4665479538962245e-03 - - 1.8802590668201447e-01 3.1224989891052246e-01 - 7.1653962135314941e-01 - <_> - - 1 0 96 -1.2311690300703049e-01 -1 -2 97 - 2.2108340635895729e-03 - - 3.8595831394195557e-01 2.4552939832210541e-01 - 5.6957101821899414e-01 - <_> - - 0 1 98 2.0661531016230583e-03 -1 -2 99 - 3.6130280932411551e-04 - - 2.7165201306343079e-01 2.2933620214462280e-01 - 7.2086298465728760e-01 - <_> - - 1 0 100 7.9957872629165649e-02 -1 -2 101 - 2.6064720004796982e-03 - - 7.8336209058761597e-01 5.5452322959899902e-01 - 2.5506898760795593e-01 - <_> - - 1 0 102 6.5699010156095028e-03 -1 -2 103 - 1.6259610420092940e-03 - - 1.8193900585174561e-01 3.5298758745193481e-01 - 6.5528190135955811e-01 - <_> - - 0 1 104 3.6204981151968241e-03 -1 -2 105 - -4.4391951523721218e-03 - - 5.4623097181320190e-01 1.3598430156707764e-01 - 5.4158151149749756e-01 - <_> - - 0 1 106 -9.0540945529937744e-03 -1 -2 107 - -4.6067481162026525e-04 - - 1.1151199787855148e-01 5.8467197418212891e-01 - 2.5983488559722900e-01 - <_> - - 0 1 108 -5.6621041148900986e-03 -1 -2 109 - 5.1165837794542313e-03 - - 1.6105690598487854e-01 5.3766787052154541e-01 - 1.7394550144672394e-01 - <_> - - 0 1 110 -2.1362339612096548e-03 -1 -2 111 - -5.4809921421110630e-03 - - 1.9020730257034302e-01 3.2720080018043518e-01 - 6.3648408651351929e-01 - <_> - - 0 1 112 -8.1061907112598419e-03 -1 -2 113 - 6.0048708692193031e-03 - - 6.9148528575897217e-01 4.3273261189460754e-01 - 6.9638431072235107e-01 - <_> - - 0 1 114 -8.7028548121452332e-02 -1 -2 115 - -4.7809639945626259e-03 - - 8.5941338539123535e-01 9.7394466400146484e-02 - 4.5870301127433777e-01 - <_> - - 0 1 116 -2.2166660055518150e-03 -1 -2 117 - 1.3642730191349983e-03 - - 2.5546258687973022e-01 3.3190909028053284e-01 - 5.9641027450561523e-01 - <_> - - 0 1 118 -9.0077864006161690e-03 -1 -2 119 - -1.5494120307266712e-02 - - 2.6665949821472168e-01 1.8481859564781189e-01 - 6.2459707260131836e-01 - <_> - - 1 0 120 -4.2165028862655163e-03 -1 -2 121 - 4.3249759823083878e-02 - - 5.3799271583557129e-01 5.1830291748046875e-01 - 2.1704199910163879e-01 - <_> - - 1 0 122 2.8786511393263936e-04 -1 -2 123 - 1.2373150093480945e-03 - - 2.6133841276168823e-01 2.7865320444107056e-01 - 5.9089881181716919e-01 - <_> - - 1 0 124 1.9528300035744905e-03 -1 -2 125 - -1.4947060262784362e-03 - - 2.6128691434860229e-01 5.9154129028320312e-01 - 3.4557819366455078e-01 - <_> - - 1 0 126 3.5878680646419525e-03 -1 -2 127 - -2.5938691105693579e-03 - - 1.5870520472526550e-01 1.2704110145568848e-01 - 5.9794288873672485e-01 - <_> - 27 - 1.2578499794006348e+01 - - <_> - - 0 1 128 3.5810680128633976e-03 -1 -2 129 - -2.8552350122481585e-03 - - 1.9951049983501434e-01 7.3730701208114624e-01 - 2.9217371344566345e-01 - <_> - - 0 1 130 1.9758539274334908e-03 -1 -2 131 - 3.2583118882030249e-03 - - 1.9564199447631836e-01 5.6920468807220459e-01 - 1.8390649557113647e-01 - <_> - - 0 1 132 2.3711679386906326e-04 -1 -2 133 - 2.5942500215023756e-03 - - 2.1716670691967010e-01 2.7199891209602356e-01 - 7.1502441167831421e-01 - <_> - - 0 1 134 -2.5032449513673782e-02 -1 -2 135 - 6.3087949529290199e-03 - - 1.8251839280128479e-01 5.6998378038406372e-01 - 3.5098528861999512e-01 - <_> - - 1 0 136 -3.2494920305907726e-03 -1 -2 137 - -1.4885730110108852e-02 - - 4.0239268541336060e-01 3.6040958762168884e-01 - 7.2919952869415283e-01 - <_> - - 1 0 138 8.0623216927051544e-03 -1 -2 139 - 2.7405679225921631e-02 - - 6.4914900064468384e-01 5.5189931392669678e-01 - 2.6596811413764954e-01 - <_> - - 1 0 140 3.4368600696325302e-02 -1 -2 141 - -2.7292970567941666e-02 - - 6.7125129699707031e-01 1.6913780570030212e-01 - 4.3262779712677002e-01 - <_> - - 0 1 142 7.4452121043577790e-04 -1 -2 143 - 7.0336280623450875e-04 - - 3.4051001071929932e-01 5.5167931318283081e-01 - 3.3113878965377808e-01 - <_> - - 0 1 144 -1.2275460362434387e-01 -1 -2 145 - 3.2559928949922323e-03 - - 1.6753150522708893e-01 3.6157518625259399e-01 - 6.4207828044891357e-01 - <_> - - 0 1 146 -3.2090399414300919e-02 -1 -2 147 - 3.2957999501377344e-03 - - 2.9210790991783142e-01 5.6130319833755493e-01 - 3.3578601479530334e-01 - <_> - - 0 1 148 -3.2273170072585344e-03 -1 -2 149 - 1.1171669466421008e-03 - - 6.9706428050994873e-01 3.5411500930786133e-01 - 6.1440062522888184e-01 - <_> - - 1 0 150 -1.7279950901865959e-02 -1 -2 151 - 1.1741200461983681e-02 - - 5.5371809005737305e-01 5.3419572114944458e-01 - 2.7571049332618713e-01 - <_> - - 1 0 152 4.6405228786170483e-03 -1 -2 153 - -1.6913030296564102e-02 - - 2.4895210564136505e-01 1.7119289934635162e-01 - 5.5239528417587280e-01 - <_> - - 1 0 154 1.0060169734060764e-02 -1 -2 155 - -6.0715491417795420e-04 - - 8.2734507322311401e-01 3.7793910503387451e-01 - 5.4762518405914307e-01 - <_> - - 1 0 156 -1.0865400545299053e-03 -1 -2 157 - 8.9362077414989471e-03 - - 3.2965409755706787e-01 6.0628837347030640e-01 - 2.4342200160026550e-01 - <_> - - 1 0 158 -2.6372660067863762e-04 -1 -2 159 - 1.3110050000250340e-02 - - 3.8140949606895447e-01 5.5176162719726562e-01 - 3.7268930673599243e-01 - <_> - - 0 1 160 -2.9806280508637428e-03 -1 -2 161 - -4.1619571857154369e-03 - - 1.2296640127897263e-01 7.2522747516632080e-01 - 4.9734550714492798e-01 - <_> - - 0 1 162 3.3842328935861588e-02 -1 -2 163 - -1.2564560165628791e-03 - - 5.3483128547668457e-01 5.8519148826599121e-01 - 4.3841668963432312e-01 - <_> - - 0 1 164 -1.9635230302810669e-02 -1 -2 165 - -9.9625496659427881e-04 - - 2.2978340089321136e-01 6.2959378957748413e-01 - 4.1315990686416626e-01 - <_> - - 0 1 166 -2.3127110674977303e-02 -1 -2 167 - 2.3525709286332130e-02 - - 1.6954590380191803e-01 5.1741302013397217e-01 - 5.9519391506910324e-02 - <_> - - 0 1 168 -1.9356520846486092e-02 -1 -2 169 - -4.1787112131714821e-03 - - 1.3572479784488678e-01 2.9966288805007935e-01 - 5.7916951179504395e-01 - <_> - - 1 0 170 3.1488779932260513e-03 -1 -2 171 - 7.3972279205918312e-03 - - 6.5925890207290649e-01 5.3071719408035278e-01 - 3.7951210141181946e-01 - <_> - - 0 1 172 7.1955118983169086e-06 -1 -2 173 - 4.7114409506320953e-02 - - 3.1283149123191833e-01 5.5378931760787964e-01 - 1.0273090004920959e-01 - <_> - - 0 1 174 7.2878710925579071e-03 -1 -2 175 - -6.1887511983513832e-03 - - 4.6608591079711914e-01 7.1588581800460815e-01 - 4.7244489192962646e-01 - <_> - - 1 0 176 2.9757320880889893e-03 -1 -2 177 - -1.8449809867888689e-03 - - 5.9345688670873642e-02 7.0273017883300781e-01 - 4.7187310457229614e-01 - <_> - - 0 1 178 1.0239540279144421e-04 -1 -2 179 - 2.4277009069919586e-03 - - 5.8947342634201050e-01 4.8623558878898621e-01 - 5.2475881576538086e-01 - <_> - - 0 1 180 -6.4751312136650085e-02 -1 -2 181 - 3.9380151429213583e-04 - - 6.9174712896347046e-01 4.6696171164512634e-01 - 2.3824059963226318e-01 - <_> - 31 - 1.4546750068664551e+01 - - <_> - - 0 1 182 1.4397440245375037e-03 -1 -2 183 - -5.4068560712039471e-04 - - 2.7734708786010742e-01 7.4271547794342041e-01 - 2.4797350168228149e-01 - <_> - - 1 0 184 -7.1237959673453588e-06 -1 -2 185 - -2.3661039303988218e-03 - - 2.1995030343532562e-01 5.8899897336959839e-01 - 2.5957161188125610e-01 - <_> - - 0 1 186 1.7343269428238273e-03 -1 -2 187 - 1.5874590026214719e-03 - - 1.8601259589195251e-01 4.1518709063529968e-01 - 7.1034741401672363e-01 - <_> - - 1 0 188 3.7285638973116875e-03 -1 -2 189 - -1.2883819639682770e-01 - - 2.5279670953750610e-01 1.3930009305477142e-01 - 5.2545148134231567e-01 - <_> - - 1 0 190 7.9412180930376053e-03 -1 -2 191 - -1.2661729939281940e-02 - - 2.4877290427684784e-01 2.7107000350952148e-01 - 6.6188377141952515e-01 - <_> - - 0 1 192 3.0146789868013002e-05 -1 -2 193 - -1.6330160200595856e-02 - - 3.8128259778022766e-01 2.3264320194721222e-01 - 5.2630108594894409e-01 - <_> - - 0 1 194 1.4622770322603174e-05 -1 -2 195 - -2.0858660340309143e-02 - - 4.2933320999145508e-01 1.6004039347171783e-01 - 6.7823147773742676e-01 - <_> - - 1 0 196 2.8194559272378683e-03 -1 -2 197 - 3.7899368908256292e-03 - - 6.6792941093444824e-01 4.5877051353454590e-01 - 7.1762388944625854e-01 - <_> - - 1 0 198 3.5344641655683517e-02 -1 -2 199 - -1.1571600334718823e-03 - - 1.8640750646591187e-01 5.5382597446441650e-01 - 3.1504508852958679e-01 - <_> - - 0 1 200 -5.8742752298712730e-03 -1 -2 201 - -1.5201780115603469e-05 - - 2.8287911415100098e-01 5.8702242374420166e-01 - 3.7048238515853882e-01 - <_> - - 1 0 202 -2.2681879636365920e-04 -1 -2 203 - 3.7845689803361893e-03 - - 4.2189309000968933e-01 6.6670012474060059e-01 - 2.4611820280551910e-01 - <_> - - 1 0 204 -8.5295992903411388e-05 -1 -2 205 - -4.4394891709089279e-02 - - 3.5575878620147705e-01 1.6655470430850983e-01 - 5.2348488569259644e-01 - <_> - - 0 1 206 1.0126030538231134e-03 -1 -2 207 - -7.6327780261635780e-03 - - 2.8846129775047302e-01 2.9693400859832764e-01 - 6.0801112651824951e-01 - <_> - - 0 1 208 4.0330411866307259e-03 -1 -2 209 - 1.3676689565181732e-01 - - 4.5363900065422058e-01 5.1772642135620117e-01 - 1.4491820335388184e-01 - <_> - - 0 1 210 -5.0060478970408440e-03 -1 -2 211 - -1.2475839816033840e-02 - - 7.6169097423553467e-01 2.1597060561180115e-01 - 5.4601877927780151e-01 - <_> - - 1 0 212 -9.4012258341535926e-04 -1 -2 213 - -1.2191980145871639e-02 - - 3.9262959361076355e-01 3.4788811206817627e-01 - 5.5426627397537231e-01 - <_> - - 0 1 214 -5.4959481349214911e-04 -1 -2 215 - -2.1802430273965001e-04 - - 6.0642760992050171e-01 5.6974071264266968e-01 - 1.7797139286994934e-01 - <_> - - 0 1 216 6.9115799851715565e-03 -1 -2 217 - -9.7631698008626699e-04 - - 5.3793722391128540e-01 3.3278390765190125e-01 - 5.4615312814712524e-01 - <_> - - 0 1 218 -8.7870173156261444e-03 -1 -2 219 - -1.6761029837653041e-03 - - 2.1161609888076782e-01 6.6358232498168945e-01 - 4.3658590316772461e-01 - <_> - - 1 0 220 -5.5694948881864548e-02 -1 -2 221 - -1.9844379276037216e-02 - - 5.3874248266220093e-01 1.6028049588203430e-01 - 5.3304588794708252e-01 - <_> - - 0 1 222 -7.4751611100509763e-04 -1 -2 223 - 2.3032890632748604e-02 - - 2.9174768924713135e-01 5.6081241369247437e-01 - 1.9979810714721680e-01 - <_> - - 1 0 224 -3.0700280331075191e-03 -1 -2 225 - -1.1636839481070638e-03 - - 3.9383140206336975e-01 5.7574361562728882e-01 - 4.2394569516181946e-01 - <_> - - 1 0 226 2.2464339435100555e-01 -1 -2 227 - 1.4412109740078449e-03 - - 7.6765531301498413e-01 5.3538662195205688e-01 - 2.5147768855094910e-01 - <_> - - 0 1 228 -3.0011249706149101e-02 -1 -2 229 - -5.3078960627317429e-02 - - 2.3649039864540100e-01 2.3858639597892761e-01 - 5.4146647453308105e-01 - <_> - - 1 0 230 2.0800929050892591e-03 -1 -2 231 - -4.0738182142376900e-03 - - 6.5116149187088013e-01 6.0304141044616699e-01 - 3.5877010226249695e-01 - <_> - - 1 0 232 -1.9529370591044426e-02 -1 -2 233 - -5.3309470415115356e-02 - - 5.4235929250717163e-01 2.3609539866447449e-01 - 5.4017579555511475e-01 - <_> - - 0 1 234 -3.4849561750888824e-02 -1 -2 235 - -1.2658450007438660e-01 - - 2.8369858860969543e-01 1.8135160207748413e-01 - 5.4210460186004639e-01 - <_> - - 0 1 236 7.3325118137290701e-06 -1 -2 237 - -1.1843870393931866e-02 - - 3.9803659915924072e-01 2.6163849234580994e-01 - 5.2377301454544067e-01 - <_> - - 0 1 238 -4.8470678739249706e-03 -1 -2 239 - 8.1693977117538452e-03 - - 2.4381080269813538e-01 5.3271460533142090e-01 - 8.1903767585754395e-01 - <_> - - 1 0 240 -6.4716790802776814e-03 -1 -2 241 - -1.5188479665084742e-05 - - 4.6796938776969910e-01 5.5639117956161499e-01 - 4.3675860762596130e-01 - <_> - - 1 0 242 3.0696711037307978e-03 -1 -2 243 - -1.6296720423270017e-04 - - 6.6643488407135010e-01 5.5946111679077148e-01 - 3.0427119135856628e-01 - <_> - 39 - 1.8572250366210938e+01 - - <_> - - 1 0 244 -9.8275858908891678e-03 -1 -2 245 - -4.1693858802318573e-03 - - 2.1160189807415009e-01 6.9246852397918701e-01 - 3.0437770485877991e-01 - <_> - - 0 1 246 3.5341319744475186e-04 -1 -2 247 - 4.8054549843072891e-03 - - 3.1832858920097351e-01 5.4565590620040894e-01 - 2.5222688913345337e-01 - <_> - - 0 1 248 2.1071180526632816e-04 -1 -2 249 - -2.8318869881331921e-03 - - 2.9026180505752563e-01 3.1304559111595154e-01 - 6.8849372863769531e-01 - <_> - - 1 0 250 -7.5633679443853907e-06 -1 -2 251 - -8.2888139877468348e-04 - - 2.9624658823013306e-01 3.0996260046958923e-01 - 5.7525151968002319e-01 - <_> - - 0 1 252 1.6209259629249573e-03 -1 -2 253 - 9.1338958591222763e-03 - - 3.9931958913803101e-01 4.8273721337318420e-01 - 7.5378328561782837e-01 - <_> - - 0 1 254 -4.1212290525436401e-03 -1 -2 255 - -2.5447290390729904e-03 - - 2.6169270277023315e-01 3.1087028980255127e-01 - 5.4912358522415161e-01 - <_> - - 0 1 256 -6.2652782071381807e-04 -1 -2 257 - -3.6596331483451650e-05 - - 3.2396918535232544e-01 6.5174108743667603e-01 - 4.1789120435714722e-01 - <_> - - 1 0 258 1.3882719911634922e-02 -1 -2 259 - 1.0493700392544270e-03 - - 6.7712038755416870e-01 4.1595110297203064e-01 - 5.6528919935226440e-01 - <_> - - 1 0 260 1.8215360119938850e-02 -1 -2 261 - -1.1334580369293690e-02 - - 7.6896011829376221e-01 2.8733238577842712e-01 - 4.9889329075813293e-01 - <_> - - 1 0 262 -4.1097560897469521e-03 -1 -2 263 - 4.2612891411408782e-04 - - 5.4630082845687866e-01 3.6312350630760193e-01 - 5.5125522613525391e-01 - <_> - - 1 0 264 6.0301548801362514e-03 -1 -2 265 - 3.3587709185667336e-04 - - 1.1437670141458511e-01 2.8910788893699646e-01 - 5.4473417997360229e-01 - <_> - - 1 0 266 6.2279507983475924e-04 -1 -2 267 - -2.5837119668722153e-02 - - 3.0234318971633911e-01 2.1670059859752655e-01 - 5.2781528234481812e-01 - <_> - - 1 0 268 2.1774910390377045e-02 -1 -2 269 - 1.7682299949228764e-03 - - 3.2548341155052185e-01 5.2630507946014404e-01 - 7.5263291597366333e-01 - <_> - - 0 1 270 -1.3793810270726681e-02 -1 -2 271 - -5.0852829590439796e-03 - - 7.4103301763534546e-01 6.8366098403930664e-01 - 4.5790711045265198e-01 - <_> - - 1 0 272 6.1795017682015896e-03 -1 -2 273 - 1.0030319914221764e-02 - - 7.4499362707138062e-01 4.8607799410820007e-01 - 2.3614570498466492e-01 - <_> - - 0 1 274 -6.4201927743852139e-03 -1 -2 275 - -5.6961281225085258e-03 - - 1.4673270285129547e-01 2.3478199541568756e-01 - 5.3233772516250610e-01 - <_> - - 0 1 276 -7.1498160250484943e-03 -1 -2 277 - 2.4450740311294794e-03 - - 1.4770570397377014e-01 3.4985339641571045e-01 - 5.8035618066787720e-01 - <_> - - 1 0 278 -3.7503410130739212e-02 -1 -2 279 - 4.7799441381357610e-04 - - 5.2595508098602295e-01 4.3628829717636108e-01 - 6.2089228630065918e-01 - <_> - - 0 1 280 -7.0806080475449562e-03 -1 -2 281 - 3.2818000763654709e-02 - - 2.0394609868526459e-01 5.1983588933944702e-01 - 1.3711960613727570e-01 - <_> - - 1 0 282 6.5188988810405135e-04 -1 -2 283 - 4.6485587954521179e-03 - - 6.3234299421310425e-01 4.7201630473136902e-01 - 6.5670871734619141e-01 - <_> - - 0 1 284 -1.9827929791063070e-03 -1 -2 285 - -1.6011310508474708e-03 - - 6.0530602931976318e-01 5.0905191898345947e-01 - 3.1169331073760986e-01 - <_> - - 0 1 286 -3.0539939180016518e-03 -1 -2 287 - 4.3212040327489376e-04 - - 3.4298041462898254e-01 3.8384029269218445e-01 - 5.7755982875823975e-01 - <_> - - 0 1 288 -2.7452120557427406e-02 -1 -2 289 - 9.3099439982324839e-04 - - 2.1434690058231354e-01 5.9529662132263184e-01 - 3.7601581215858459e-01 - <_> - - 0 1 290 6.7144189961254597e-03 -1 -2 291 - -3.3701690845191479e-03 - - 5.6926268339157104e-01 5.7843041419982910e-01 - 3.9742821455001831e-01 - <_> - - 0 1 292 -1.8903959542512894e-02 -1 -2 293 - -6.5850871615111828e-03 - - 1.8188929557800293e-01 6.8491101264953613e-01 - 4.3515840172767639e-01 - <_> - - 1 0 294 5.8810501359403133e-03 -1 -2 295 - 8.0092082498595119e-04 - - 2.7266609668731689e-01 4.2364311218261719e-01 - 5.8446758985519409e-01 - <_> - - 1 0 296 1.8510579830035567e-03 -1 -2 297 - 6.3273650594055653e-03 - - 3.3713209629058838e-01 5.2702218294143677e-01 - 8.0536508560180664e-01 - <_> - - 0 1 298 -3.3820930402725935e-03 -1 -2 299 - -1.9292969955131412e-03 - - 2.8660181164741516e-01 5.8889460563659668e-01 - 3.8957870006561279e-01 - <_> - - 1 0 300 1.4995220117270947e-02 -1 -2 301 - -2.6330750435590744e-02 - - 2.1778169274330139e-01 1.7753170430660248e-01 - 5.6714701652526855e-01 - <_> - - 1 0 302 -4.1734222322702408e-03 -1 -2 303 - 2.7268350124359131e-02 - - 4.6529620885848999e-01 4.7683110833168030e-01 - 5.6952387094497681e-01 - <_> - - 1 0 304 9.8880263976752758e-04 -1 -2 305 - -1.0528849670663476e-03 - - 3.3974018692970276e-01 6.2500411272048950e-01 - 4.2884120345115662e-01 - <_> - - 0 1 306 5.2288072183728218e-03 -1 -2 307 - 3.0395459383726120e-02 - - 5.3477621078491211e-01 4.1155189275741577e-01 - 5.6607538461685181e-01 - <_> - - 0 1 308 -7.9113930463790894e-02 -1 -2 309 - 1.8231669440865517e-02 - - 7.8813230991363525e-01 3.6043399572372437e-01 - 5.5695050954818726e-01 - <_> - - 0 1 310 5.2288072183728218e-03 -1 -2 311 - 4.3922828626818955e-04 - - 5.4166442155838013e-01 5.5071568489074707e-01 - 3.8822770118713379e-01 - <_> - - 0 1 312 -8.6501962505280972e-04 -1 -2 313 - 1.0326979681849480e-03 - - 3.1858509778976440e-01 5.5783641338348389e-01 - 3.2192459702491760e-01 - <_> - - 0 1 314 -7.2997747920453548e-03 -1 -2 315 - -9.3629042385146022e-04 - - 7.0732331275939941e-01 5.5580157041549683e-01 - 4.6138420701026917e-01 - <_> - - 0 1 316 -6.0483231209218502e-03 -1 -2 317 - 6.7529221996665001e-03 - - 6.8692898750305176e-01 4.8703178763389587e-01 - 2.6503708958625793e-01 - <_> - - 0 1 318 5.3078029304742813e-02 -1 -2 319 - -1.0225810110569000e-03 - - 5.2815151214599609e-01 6.0858821868896484e-01 - 4.3048679828643799e-01 - <_> - - 1 0 320 3.1270649284124374e-02 -1 -2 321 - -6.3522169366478920e-03 - - 5.4458320140838623e-01 5.3283357620239258e-01 - 2.3643240332603455e-01 - <_> - 45 - 2.1578119277954102e+01 - - <_> - - 1 0 322 -6.2215630896389484e-03 -1 -2 323 - 2.1097389981150627e-03 - - 2.6255810260772705e-01 1.5649929642677307e-01 - 6.7928832769393921e-01 - <_> - - 0 1 324 1.0845859535038471e-02 -1 -2 325 - 6.4230401767417789e-04 - - 3.4858089685440063e-01 3.6982551217079163e-01 - 5.9216582775115967e-01 - <_> - - 1 0 326 7.3311722371727228e-04 -1 -2 327 - 1.0134200565516949e-03 - - 3.0070841312408447e-01 3.6249229311943054e-01 - 7.0724260807037354e-01 - <_> - - 0 1 328 1.1093559674918652e-02 -1 -2 329 - -7.9127531498670578e-03 - - 4.4167020916938782e-01 3.0287081003189087e-01 - 5.4173761606216431e-01 - <_> - - 0 1 330 1.2905309908092022e-02 -1 -2 331 - -4.2430912144482136e-03 - - 4.3745040893554688e-01 4.4015899300575256e-01 - 7.5651907920837402e-01 - <_> - - 0 1 332 -2.1304309484548867e-04 -1 -2 333 - -2.2308640182018280e-03 - - 2.3107869923114777e-01 3.5681959986686707e-01 - 5.7499992847442627e-01 - <_> - - 0 1 334 2.6400520000606775e-03 -1 -2 335 - 7.5101032853126526e-02 - - 3.5936889052391052e-01 6.3635677099227905e-01 - 2.3270289599895477e-01 - <_> - - 0 1 336 -7.7012968249619007e-03 -1 -2 337 - 1.5588370151817799e-03 - - 7.0746237039566040e-01 5.7002371549606323e-01 - 3.5904508829116821e-01 - <_> - - 0 1 338 -4.7687938786111772e-04 -1 -2 339 - 8.4234727546572685e-04 - - 2.8054410219192505e-01 4.1254189610481262e-01 - 6.1779958009719849e-01 - <_> - - 1 0 340 -1.2825109995901585e-02 -1 -2 341 - -6.5156567143276334e-04 - - 5.4030781984329224e-01 5.6336438655853271e-01 - 3.3565390110015869e-01 - <_> - - 0 1 342 -1.2006159871816635e-02 -1 -2 343 - 1.3213419588282704e-03 - - 7.1095108985900879e-01 4.9038508534431458e-01 - 2.8245830535888672e-01 - <_> - - 0 1 344 -2.0307440310716629e-02 -1 -2 345 - 4.0180929936468601e-03 - - 1.8913699686527252e-01 5.3779661655426025e-01 - 3.1194949150085449e-01 - <_> - - 1 0 346 4.5315311290323734e-03 -1 -2 347 - -4.4381739571690559e-03 - - 7.2067582607269287e-01 1.8546679615974426e-01 - 4.9817329645156860e-01 - <_> - - 1 0 348 1.5692010056227446e-03 -1 -2 349 - -4.9516442231833935e-03 - - 2.6382741332054138e-01 6.8710672855377197e-01 - 4.7146868705749512e-01 - <_> - - 0 1 350 -2.7429679408669472e-02 -1 -2 351 - 1.4181969454512000e-03 - - 1.5482850372791290e-01 4.3768429756164551e-01 - 6.3273680210113525e-01 - <_> - - 0 1 352 -1.3078940100967884e-02 -1 -2 353 - -3.5092779435217381e-03 - - 3.1668141484260559e-01 6.1997437477111816e-01 - 4.3796870112419128e-01 - <_> - - 1 0 354 1.8920730799436569e-02 -1 -2 355 - 2.1683350205421448e-03 - - 1.4707140624523163e-01 5.8094590902328491e-01 - 3.4319490194320679e-01 - <_> - - 0 1 356 1.6401590546593070e-03 -1 -2 357 - 1.4005920093040913e-04 - - 3.9594578742980957e-01 3.2400250434875488e-01 - 5.6466472148895264e-01 - <_> - - 1 0 358 -3.3137591090053320e-03 -1 -2 359 - -2.9459029901772738e-03 - - 4.2745280265808105e-01 3.3416679501533508e-01 - 6.6279602050781250e-01 - <_> - - 0 1 360 1.3612229668069631e-04 -1 -2 361 - 6.0512032359838486e-04 - - 4.0469279885292053e-01 5.4840582609176636e-01 - 3.5699409246444702e-01 - <_> - - 0 1 362 -1.7513990402221680e-02 -1 -2 363 - -1.8735030665993690e-02 - - 1.8241509795188904e-01 7.9718202352523804e-01 - 5.0685691833496094e-01 - <_> - - 1 0 364 1.2065649963915348e-02 -1 -2 365 - -2.6544178836047649e-03 - - 2.1670070290565491e-01 6.5841788053512573e-01 - 4.6282431483268738e-01 - <_> - - 1 0 366 1.4501289697363973e-03 -1 -2 367 - 1.0954019613564014e-02 - - 2.0902520418167114e-01 5.1123052835464478e-01 - 7.7845758199691772e-01 - <_> - - 0 1 368 1.5771709382534027e-02 -1 -2 369 - -1.4252689667046070e-02 - - 5.1323592662811279e-01 1.7424149811267853e-01 - 5.2671480178833008e-01 - <_> - - 0 1 370 3.0411860279855318e-05 -1 -2 371 - 2.3486299440264702e-02 - - 3.4184479713439941e-01 5.6312650442123413e-01 - 2.0063939690589905e-01 - <_> - - 1 0 372 5.2205449901521206e-03 -1 -2 373 - -2.5812430307269096e-02 - - 6.2496489286422729e-01 3.2032281160354614e-01 - 5.1993298530578613e-01 - <_> - - 0 1 374 -1.9526650430634618e-03 -1 -2 375 - -8.1470049917697906e-03 - - 6.1407059431076050e-01 6.5928959846496582e-01 - 3.7111249566078186e-01 - <_> - - 1 0 376 3.2962448894977570e-03 -1 -2 377 - -1.3961310032755136e-03 - - 2.9521119594573975e-01 3.3208039402961731e-01 - 5.5284148454666138e-01 - <_> - - 0 1 378 -4.1055441834032536e-03 -1 -2 379 - -1.0888779535889626e-02 - - 1.7105500400066376e-01 3.3594349026679993e-01 - 5.6749051809310913e-01 - <_> - - 1 0 380 -7.6768421567976475e-03 -1 -2 381 - -9.7729787230491638e-03 - - 4.7732418775558472e-01 8.0810451507568359e-01 - 4.8458281159400940e-01 - <_> - - 1 0 382 6.0439710505306721e-03 -1 -2 383 - -4.6134641161188483e-04 - - 6.7840021848678589e-01 5.5146390199661255e-01 - 3.6423599720001221e-01 - <_> - - 1 0 384 5.7992361485958099e-02 -1 -2 385 - 5.9384980704635382e-04 - - 1.2544350326061249e-01 4.4248789548873901e-01 - 5.7284617424011230e-01 - <_> - - 0 1 386 -6.2353480607271194e-03 -1 -2 387 - -1.2784929946064949e-02 - - 2.8050419688224792e-01 1.9509120285511017e-01 - 5.6529247760772705e-01 - <_> - - 1 0 388 4.1973669431172311e-04 -1 -2 389 - 8.0646801507100463e-04 - - 6.1664837598800659e-01 4.5265799760818481e-01 - 5.9444868564605713e-01 - <_> - - 1 0 390 -1.6339010326191783e-03 -1 -2 391 - -4.8299999907612801e-03 - - 4.0869420766830444e-01 2.7935269474983215e-01 - 6.4449352025985718e-01 - <_> - - 1 0 392 -6.3992068171501160e-03 -1 -2 393 - 1.0819199681282043e-01 - - 5.6716561317443848e-01 5.3118121623992920e-01 - 2.6143568754196167e-01 - <_> - - 1 0 394 6.5056560561060905e-04 -1 -2 395 - 2.0611250773072243e-02 - - 2.9967740178108215e-01 4.4899430871009827e-01 - 6.8882799148559570e-01 - <_> - - 1 0 396 -2.5129050016403198e-02 -1 -2 397 - 1.7922939732670784e-03 - - 5.1968640089035034e-01 3.4669959545135498e-01 - 5.5335879325866699e-01 - <_> - - 1 0 398 1.5626220265403390e-03 -1 -2 399 - -6.1898730928078294e-04 - - 3.0814400315284729e-01 2.6938709616661072e-01 - 5.5444890260696411e-01 - <_> - - 0 1 400 4.8111421056091785e-03 -1 -2 401 - 2.2484229411929846e-03 - - 5.5878478288650513e-01 4.6721130609512329e-01 - 6.0908252000808716e-01 - <_> - - 0 1 402 -3.0147239565849304e-02 -1 -2 403 - 2.7548679709434509e-01 - - 9.0275919437408447e-01 4.7198349237442017e-01 - 2.1969200670719147e-01 - <_> - - 1 0 404 3.6894630175083876e-03 -1 -2 405 - 7.2957701049745083e-03 - - 6.2730091810226440e-01 4.8392179608345032e-01 - 6.9090622663497925e-01 - <_> - - 0 1 406 -5.6211069226264954e-02 -1 -2 407 - -2.6478560175746679e-03 - - 1.7384879291057587e-01 6.3041448593139648e-01 - 4.4743019342422485e-01 - <_> - - 1 0 408 -1.4534000074490905e-03 -1 -2 409 - 2.8540920466184616e-03 - - 5.3025382757186890e-01 5.3383970260620117e-01 - 3.7968829274177551e-01 - <_> - - 1 0 410 5.8243022067472339e-04 -1 -2 411 - 9.2509482055902481e-04 - - 3.2698369026184082e-01 4.5548120141029358e-01 - 6.3583481311798096e-01 - <_> - 47 - 2.2585290908813477e+01 - - <_> - - 0 1 412 1.9806440919637680e-02 -1 -2 413 - 7.0395611692219973e-04 - - 2.8097251057624817e-01 3.1198260188102722e-01 - 7.0903062820434570e-01 - <_> - - 0 1 414 2.5563780218362808e-03 -1 -2 415 - 1.0824160417541862e-03 - - 2.9819479584693909e-01 3.0205601453781128e-01 - 5.8088111877441406e-01 - <_> - - 1 0 416 -9.2893769033253193e-04 -1 -2 417 - -1.8009729683399200e-02 - - 3.7381029129028320e-01 2.1631260216236115e-01 - 6.6192537546157837e-01 - <_> - - 1 0 418 2.3500190582126379e-03 -1 -2 419 - 8.1822491483762860e-04 - - 2.9104039072990417e-01 5.5786228179931641e-01 - 3.3666279911994934e-01 - <_> - - 0 1 420 6.2095321482047439e-04 -1 -2 421 - 9.6780969761312008e-04 - - 4.0724259614944458e-01 6.8595957756042480e-01 - 3.1054618954658508e-01 - <_> - - 1 0 422 4.8000211245380342e-04 -1 -2 423 - 9.0538640506565571e-05 - - 3.3373329043388367e-01 3.3709588646888733e-01 - 5.4512107372283936e-01 - <_> - - 0 1 424 -4.3914798647165298e-02 -1 -2 425 - -5.6501338258385658e-03 - - 2.6256701350212097e-01 6.0504627227783203e-01 - 3.2324150204658508e-01 - <_> - - 1 0 426 3.8661491125822067e-03 -1 -2 427 - -6.3069426687434316e-05 - - 3.2626131176948547e-01 5.8173078298568726e-01 - 4.1643899679183960e-01 - <_> - - 1 0 428 5.2533738315105438e-02 -1 -2 429 - 1.3818660518154502e-03 - - 7.0953989028930664e-01 5.2928757667541504e-01 - 2.5413888692855835e-01 - <_> - - 1 0 430 -8.9264067355543375e-04 -1 -2 431 - 8.5579507052898407e-02 - - 4.0853410959243774e-01 5.2632361650466919e-01 - 3.0032029747962952e-01 - <_> - - 1 0 432 -1.8343339615967125e-04 -1 -2 433 - -9.7924815490841866e-03 - - 4.0292051434516907e-01 3.5213199257850647e-01 - 6.6640049219131470e-01 - <_> - - 0 1 434 1.4428620226681232e-02 -1 -2 435 - -4.5687001198530197e-02 - - 4.5935660600662231e-01 1.4747560024261475e-01 - 5.1786321401596069e-01 - <_> - - 0 1 436 -2.5763090234249830e-03 -1 -2 437 - -3.8301859050989151e-02 - - 1.8372780084609985e-01 8.0826580524444580e-01 - 5.1666879653930664e-01 - <_> - - 0 1 438 2.8978290501981974e-03 -1 -2 439 - -2.5165060069411993e-03 - - 4.7980138659477234e-01 3.3462959527969360e-01 - 5.4444491863250732e-01 - <_> - - 0 1 440 5.6281982688233256e-04 -1 -2 441 - 3.6684391088783741e-03 - - 3.5890269279479980e-01 5.9831297397613525e-01 - 2.9839640855789185e-01 - <_> - - 1 0 442 2.1319789811968803e-03 -1 -2 443 - 7.6037310063838959e-03 - - 6.1632239818572998e-01 5.2171301841735840e-01 - 2.0541590452194214e-01 - <_> - - 1 0 444 -1.1668079969240353e-04 -1 -2 445 - 3.1659509986639023e-03 - - 3.4466689825057983e-01 5.5974847078323364e-01 - 2.6737868785858154e-01 - <_> - - 0 1 446 -2.2569499909877777e-02 -1 -2 447 - 2.7129601221531630e-04 - - 6.9002681970596313e-01 4.4866389036178589e-01 - 5.5087852478027344e-01 - <_> - - 0 1 448 -1.5434459783136845e-02 -1 -2 449 - -8.4861656650900841e-03 - - 2.0483230054378510e-01 1.2549529969692230e-01 - 5.0603562593460083e-01 - <_> - - 0 1 450 -1.1807470023632050e-01 -1 -2 451 - -1.2300079688429832e-03 - - 6.7633062601089478e-02 5.6607007980346680e-01 - 4.2922011017799377e-01 - <_> - - 0 1 452 -7.0290351286530495e-03 -1 -2 453 - 8.9325206354260445e-03 - - 7.1364039182662964e-01 4.3388760089874268e-01 - 7.0608752965927124e-01 - <_> - - 1 0 454 -4.7735981643199921e-02 -1 -2 455 - -4.4155579060316086e-02 - - 5.2686852216720581e-01 2.5805801153182983e-01 - 5.4069608449935913e-01 - <_> - - 0 1 456 -2.5983480736613274e-02 -1 -2 457 - -4.7885831445455551e-03 - - 1.9050540030002594e-01 2.5518929958343506e-01 - 5.3390771150588989e-01 - <_> - - 0 1 458 6.7423451691865921e-03 -1 -2 459 - 1.1654750443994999e-02 - - 4.6933099627494812e-01 5.2619642019271851e-01 - 3.1454348564147949e-01 - <_> - - 0 1 460 -5.6982729583978653e-03 -1 -2 461 - -7.2983349673449993e-03 - - 1.7568530142307281e-01 7.7747297286987305e-01 - 5.1242929697036743e-01 - <_> - - 0 1 462 7.9091778025031090e-03 -1 -2 463 - -1.5874979726504534e-04 - - 5.2845597267150879e-01 3.8878020644187927e-01 - 5.5011737346649170e-01 - <_> - - 0 1 464 -6.2235877849161625e-03 -1 -2 465 - 1.3308860361576080e-03 - - 2.4898290634155273e-01 4.2621460556983948e-01 - 5.9350621700286865e-01 - <_> - - 1 0 466 5.2055278792977333e-03 -1 -2 467 - 1.4065169729292393e-02 - - 2.5452229380607605e-01 4.8519900441169739e-01 - 7.0214188098907471e-01 - <_> - - 0 1 468 -6.7384149879217148e-03 -1 -2 469 - 3.3406780567020178e-03 - - 7.1432709693908691e-01 5.1757252216339111e-01 - 2.8086438775062561e-01 - <_> - - 1 0 470 -1.1880699545145035e-02 -1 -2 471 - 1.4226379571482539e-03 - - 5.1732218265533447e-01 4.5028659701347351e-01 - 5.7956951856613159e-01 - <_> - - 1 0 472 2.9858129564672709e-03 -1 -2 473 - -2.0481580868363380e-03 - - 1.9151160120964050e-01 6.5024322271347046e-01 - 4.5593151450157166e-01 - <_> - - 0 1 474 1.7122729914262891e-03 -1 -2 475 - -1.6980869695544243e-02 - - 5.3762471675872803e-01 7.0562332868576050e-01 - 4.9146059155464172e-01 - <_> - - 0 1 476 -1.1290470138192177e-03 -1 -2 477 - 2.8620059601962566e-03 - - 2.6787060499191284e-01 4.4108539819717407e-01 - 6.3683199882507324e-01 - <_> - - 0 1 478 -3.8065758999437094e-03 -1 -2 479 - 5.9090270660817623e-03 - - 2.7635639905929565e-01 4.8673018813133240e-01 - 6.7287760972976685e-01 - <_> - - 0 1 480 1.1004370171576738e-03 -1 -2 481 - -2.3396299220621586e-03 - - 4.0705141425132751e-01 2.6049488782882690e-01 - 6.1548602581024170e-01 - <_> - - 0 1 482 -3.6068160552531481e-03 -1 -2 483 - 4.0831189602613449e-02 - - 5.7319998741149902e-01 4.9733769893646240e-01 - 7.3870068788528442e-01 - <_> - - 0 1 484 -7.1082250215113163e-03 -1 -2 485 - -9.3759730225428939e-04 - - 6.9847512245178223e-01 2.6911678910255432e-01 - 4.7417798638343811e-01 - <_> - - 0 1 486 -1.6740820137783885e-03 -1 -2 487 - 8.8287703692913055e-02 - - 3.5510140657424927e-01 5.2446138858795166e-01 - 2.0966500043869019e-01 - <_> - - 0 1 488 8.2009629113599658e-04 -1 -2 489 - -7.6624617213383317e-04 - - 4.1310968995094299e-01 4.6202930808067322e-01 - 6.7754101753234863e-01 - <_> - - 1 0 490 6.5769668435677886e-04 -1 -2 491 - -2.1304790861904621e-03 - - 5.6282752752304077e-01 5.5768597126007080e-01 - 4.5776501297950745e-01 - <_> - - 1 0 492 -3.7317050737328827e-04 -1 -2 493 - -1.1172230355441570e-02 - - 4.9592560529708862e-01 5.6256359815597534e-01 - 2.0471079647541046e-01 - <_> - - 1 0 494 4.3435219675302505e-02 -1 -2 495 - 9.6736161503940821e-04 - - 2.2421480715274811e-01 4.5333439111709595e-01 - 6.1999320983886719e-01 - <_> - - 0 1 496 -3.1452889088541269e-03 -1 -2 497 - 1.5233129961416125e-03 - - 6.6627562046051025e-01 5.0079882144927979e-01 - 2.3849929869174957e-01 - <_> - - 1 0 498 2.0854279864579439e-03 -1 -2 499 - 3.6098200827836990e-02 - - 3.7535008788108826e-01 5.1771712303161621e-01 - 1.6344930231571198e-01 - <_> - - 1 0 500 1.6179570229724050e-03 -1 -2 501 - -6.2132300809025764e-04 - - 2.5873818993568420e-01 6.2995338439941406e-01 - 4.6587899327278137e-01 - <_> - - 1 0 502 7.1878539165481925e-04 -1 -2 503 - -3.9339520037174225e-02 - - 3.3540761470794678e-01 2.1541289985179901e-01 - 5.2357137203216553e-01 - <_> - - 0 1 504 -1.0988829890266061e-03 -1 -2 505 - 2.1191420964896679e-03 - - 6.4688968658447266e-01 2.8930890560150146e-01 - 5.2548158168792725e-01 - <_> - 53 - 2.5609300613403320e+01 - - <_> - - 0 1 506 5.2359891124069691e-03 -1 -2 507 - -2.2169889416545630e-03 - - 3.2997110486030579e-01 7.0415931940078735e-01 - 3.2354658842086792e-01 - <_> - - 1 0 508 -8.2303592935204506e-03 -1 -2 509 - -8.2303592935204506e-03 - - 4.9611708521842957e-01 7.1280431747436523e-01 - 4.9611708521842957e-01 - <_> - - 0 1 510 4.5343261444941163e-04 -1 -2 511 - -4.1777061414904892e-04 - - 3.2084721326828003e-01 6.6139167547225952e-01 - 3.5513329505920410e-01 - <_> - - 0 1 512 2.7823769487440586e-03 -1 -2 513 - -6.0361868236213923e-05 - - 3.7101349234580994e-01 5.7463937997817993e-01 - 3.8948801159858704e-01 - <_> - - 1 0 514 3.5061789676547050e-03 -1 -2 515 - 1.7013119941111654e-04 - - 3.0541029572486877e-01 2.8855779767036438e-01 - 6.4877450466156006e-01 - <_> - - 1 0 516 -2.3378930054605007e-03 -1 -2 517 - -2.1369170863181353e-03 - - 3.1744310259819031e-01 3.8209199905395508e-01 - 5.2328932285308838e-01 - <_> - - 0 1 518 1.0250400518998504e-03 -1 -2 519 - -4.4726220949087292e-05 - - 3.6227950453758240e-01 6.5389591455459595e-01 - 4.0036809444427490e-01 - <_> - - 1 0 520 5.7102291611954570e-04 -1 -2 521 - 5.7743012439459562e-04 - - 3.8931730389595032e-01 5.6145328283309937e-01 - 3.6876440048217773e-01 - <_> - - 1 0 522 7.9692091094329953e-04 -1 -2 523 - 3.5945948911830783e-04 - - 6.4430278539657593e-01 3.3808529376983643e-01 - 5.8246481418609619e-01 - <_> - - 1 0 524 4.3973900028504431e-04 -1 -2 525 - -8.9061429025605321e-04 - - 3.9387670159339905e-01 3.4279710054397583e-01 - 5.5156987905502319e-01 - <_> - - 1 0 526 5.4110242053866386e-03 -1 -2 527 - -8.5764907998964190e-04 - - 3.8035380840301514e-01 6.4395052194595337e-01 - 4.1683459281921387e-01 - <_> - - 0 1 528 -2.2000649943947792e-02 -1 -2 529 - -7.8731682151556015e-03 - - 6.6546010971069336e-01 4.1827228665351868e-01 - 5.6047242879867554e-01 - <_> - - 0 1 530 -2.7444459497928619e-02 -1 -2 531 - 1.9792269449681044e-03 - - 6.5868628025054932e-01 3.2449120283126831e-01 - 4.8828700184822083e-01 - <_> - - 0 1 532 -5.6783691979944706e-03 -1 -2 533 - 1.5057219570735469e-05 - - 2.2290790081024170e-01 4.1072851419448853e-01 - 5.7475912570953369e-01 - <_> - - 0 1 534 -5.4136710241436958e-03 -1 -2 535 - 5.3679239936172962e-03 - - 2.0657970011234283e-01 4.9264231324195862e-01 - 7.1394848823547363e-01 - <_> - - 0 1 536 -3.1426660716533661e-03 -1 -2 537 - 1.0907390154898167e-02 - - 6.7800867557525635e-01 5.2149301767349243e-01 - 1.1439959704875946e-01 - <_> - - 1 0 538 5.8436761610209942e-03 -1 -2 539 - 9.0507230197545141e-05 - - 1.9375260174274445e-01 3.8125771284103394e-01 - 5.5141878128051758e-01 - <_> - - 0 1 540 -1.6345789656043053e-02 -1 -2 541 - 1.5987500082701445e-03 - - 2.4740239977836609e-01 4.8177829384803772e-01 - 5.9230798482894897e-01 - <_> - - 0 1 542 -4.0257978253066540e-03 -1 -2 543 - -6.7750471644103527e-03 - - 7.5082087516784668e-01 2.8798109292984009e-01 - 5.1996952295303345e-01 - <_> - - 0 1 544 -3.2470689620822668e-03 -1 -2 545 - 1.5409620245918632e-03 - - 3.0449101328849792e-01 4.0634828805923462e-01 - 5.6765627861022949e-01 - <_> - - 0 1 546 -1.2858119793236256e-02 -1 -2 547 - -1.4824670506641269e-04 - - 9.6717558801174164e-02 4.5378330349922180e-01 - 6.1153751611709595e-01 - <_> - - 1 0 548 -9.0210810303688049e-03 -1 -2 549 - -2.8795029968023300e-02 - - 4.8077508807182312e-01 3.4037950634956360e-01 - 5.2555292844772339e-01 - <_> - - 1 0 550 9.0210810303688049e-03 -1 -2 551 - 7.4121179059147835e-03 - - 7.5058358907699585e-01 5.4554468393325806e-01 - 3.2260689139366150e-01 - <_> - - 0 1 552 -3.7217529024928808e-03 -1 -2 553 - 1.9865889847278595e-01 - - 2.3118489980697632e-01 5.2710479497909546e-01 - 1.4699299633502960e-01 - <_> - - 0 1 554 1.5208719560177997e-05 -1 -2 555 - -3.9089918136596680e-03 - - 3.6781388521194458e-01 7.1319299936294556e-01 - 4.9938669800758362e-01 - <_> - - 0 1 556 2.5106288958340883e-03 -1 -2 557 - 2.3921660613268614e-04 - - 5.3120541572570801e-01 4.6893781423568726e-01 - 5.7140219211578369e-01 - <_> - - 1 0 558 6.9443131797015667e-03 -1 -2 559 - 1.2065629707649350e-03 - - 6.9487977027893066e-01 4.0045049786567688e-01 - 5.8748817443847656e-01 - <_> - - 0 1 560 2.5106288958340883e-03 -1 -2 561 - 1.7514040227979422e-03 - - 5.3295719623565674e-01 5.5458492040634155e-01 - 3.4495818614959717e-01 - <_> - - 0 1 562 -4.1978210210800171e-03 -1 -2 563 - 1.3092850567772985e-03 - - 1.2171830236911774e-01 5.3750497102737427e-01 - 3.4156250953674316e-01 - <_> - - 0 1 564 6.7396182566881180e-04 -1 -2 565 - -1.0530710220336914e-02 - - 4.1951790452003479e-01 3.4607538580894470e-01 - 5.1558601856231689e-01 - <_> - - 0 1 566 -4.0672299265861511e-01 -1 -2 567 - -2.6314549148082733e-02 - - 5.8065678924322128e-02 1.4734490215778351e-01 - 5.5593782663345337e-01 - <_> - - 1 0 568 2.2557149641215801e-03 -1 -2 569 - 1.2154860422015190e-02 - - 5.4777151346206665e-01 4.2077910900115967e-01 - 5.6218808889389038e-01 - <_> - - 0 1 570 -1.8436539918184280e-02 -1 -2 571 - 5.3676147945225239e-04 - - 6.4471471309661865e-01 2.7651271224021912e-01 - 4.8885959386825562e-01 - <_> - - 1 0 572 -2.6265541091561317e-03 -1 -2 573 - -5.1119807176291943e-04 - - 5.2646911144256592e-01 5.7853102684020996e-01 - 4.2911028861999512e-01 - <_> - - 1 0 574 4.1454841266386211e-04 -1 -2 575 - -5.5028748465701938e-04 - - 3.4554108977317810e-01 6.0269188880920410e-01 - 4.1438931226730347e-01 - <_> - - 0 1 576 -1.0347720235586166e-03 -1 -2 577 - -3.3966631162911654e-03 - - 6.0952937602996826e-01 6.1082822084426880e-01 - 4.7077208757400513e-01 - <_> - - 1 0 578 3.1795909162610769e-03 -1 -2 579 - -1.6528950072824955e-04 - - 3.2443669438362122e-01 3.8307571411132812e-01 - 5.7343262434005737e-01 - <_> - - 1 0 580 8.3725210279226303e-03 -1 -2 581 - -2.5799809955060482e-03 - - 6.6109192371368408e-01 6.1393070220947266e-01 - 4.6861499547958374e-01 - <_> - - 1 0 582 9.0194388758391142e-04 -1 -2 583 - 3.6952210939489305e-04 - - 3.5200220346450806e-01 2.5787541270256042e-01 - 5.4672420024871826e-01 - <_> - - 0 1 584 9.9746137857437134e-04 -1 -2 585 - -3.6688039544969797e-03 - - 4.8201468586921692e-01 5.7101500034332275e-01 - 4.8319110274314880e-01 - <_> - - 0 1 586 -8.9501030743122101e-04 -1 -2 587 - 5.1904921419918537e-03 - - 6.1336791515350342e-01 4.9285829067230225e-01 - 2.5813090801239014e-01 - <_> - - 0 1 588 4.2274440056644380e-04 -1 -2 589 - 8.5176713764667511e-03 - - 4.4711241126060486e-01 5.1610249280929565e-01 - 3.3165338635444641e-01 - <_> - - 0 1 590 -3.6623608320951462e-02 -1 -2 591 - -4.1103712283074856e-03 - - 9.2606216669082642e-02 8.5221147537231445e-01 - 5.1379078626632690e-01 - <_> - - 1 0 592 -6.6017331555485725e-03 -1 -2 593 - 2.5578640401363373e-02 - - 5.4590600728988647e-01 5.2193528413772583e-01 - 1.9271859526634216e-01 - <_> - - 1 0 594 1.1447439901530743e-02 -1 -2 595 - 7.2427501436322927e-04 - - 1.9160020351409912e-01 5.2315711975097656e-01 - 3.5353401303291321e-01 - <_> - - 1 0 596 9.7127500921487808e-03 -1 -2 597 - -1.1337569914758205e-02 - - 6.4641010761260986e-01 7.3830378055572510e-01 - 4.9647438526153564e-01 - <_> - - 0 1 598 -8.1453882157802582e-03 -1 -2 599 - -8.5570756345987320e-03 - - 3.6117058992385864e-01 3.4219071269035339e-01 - 5.9435117244720459e-01 - <_> - - 0 1 600 2.2993308957666159e-03 -1 -2 601 - 3.8430930580943823e-03 - - 4.5501041412353516e-01 4.7168621420860291e-01 - 6.6561907529830933e-01 - <_> - - 1 0 602 -9.9116540513932705e-04 -1 -2 603 - 2.5496469810605049e-02 - - 4.5927169919013977e-01 6.5634012222290039e-01 - 1.2588350474834442e-01 - <_> - - 1 0 604 -1.5748359262943268e-02 -1 -2 605 - -1.8046120181679726e-02 - - 5.2395021915435791e-01 8.0158519744873047e-01 - 5.0079578161239624e-01 - <_> - - 1 0 606 1.0323390364646912e-02 -1 -2 607 - 1.6452240524813533e-03 - - 2.2748200595378876e-01 4.3519461154937744e-01 - 5.8676278591156006e-01 - <_> - - 0 1 608 1.5881149098277092e-02 -1 -2 609 - 1.0586519725620747e-02 - - 4.4650518894195557e-01 4.5444580912590027e-01 - 5.7071107625961304e-01 - <_> - - 0 1 610 -2.1531689912080765e-02 -1 -2 611 - 5.2480469457805157e-03 - - 6.5276437997817993e-01 3.4447279572486877e-01 - 5.3246361017227173e-01 - <_> - 67 - 3.2647129058837891e+01 - - <_> - - 0 1 612 1.8219340126961470e-03 -1 -2 613 - 8.1313941627740860e-03 - - 3.1087881326675415e-01 3.1332370638847351e-01 - 6.6458672285079956e-01 - <_> - - 0 1 614 1.7055979697033763e-03 -1 -2 615 - -7.4483548814896494e-05 - - 2.6401311159133911e-01 5.6472051143646240e-01 - 3.4853729605674744e-01 - <_> - - 1 0 616 3.8342390325851738e-04 -1 -2 617 - 3.1868910882622004e-03 - - 3.1406548619270325e-01 6.4891988039016724e-01 - 3.8877290487289429e-01 - <_> - - 1 0 618 1.6044320166110992e-01 -1 -2 619 - -6.7285560071468353e-03 - - 7.2165298461914062e-01 1.6531379520893097e-01 - 5.1398259401321411e-01 - <_> - - 0 1 620 7.2638481469766703e-06 -1 -2 621 - 5.5551197146996856e-04 - - 3.1406199932098389e-01 5.9936988353729248e-01 - 3.3173981308937073e-01 - <_> - - 0 1 622 -1.0822320356965065e-02 -1 -2 623 - -4.5834020711481571e-03 - - 2.6529380679130554e-01 1.8495689332485199e-01 - 5.3139579296112061e-01 - <_> - - 1 0 624 -3.0205070506781340e-03 -1 -2 625 - 7.7864617109298706e-02 - - 4.0400999784469604e-01 6.1581897735595703e-01 - 1.7864869534969330e-01 - <_> - - 0 1 626 2.6494380086660385e-02 -1 -2 627 - 3.6912109702825546e-02 - - 4.5110899209976196e-01 4.5282199978828430e-01 - 5.9722828865051270e-01 - <_> - - 1 0 628 5.7857790961861610e-03 -1 -2 629 - 9.3849771656095982e-04 - - 2.5338920950889587e-01 3.4104120731353760e-01 - 5.9236437082290649e-01 - <_> - - 0 1 630 -1.1003199964761734e-02 -1 -2 631 - -1.1737640015780926e-03 - - 6.9580441713333130e-01 3.8510841131210327e-01 - 5.4081892967224121e-01 - <_> - - 0 1 632 -3.6596669815480709e-03 -1 -2 633 - -2.4822750128805637e-03 - - 2.0093089342117310e-01 6.2953931093215942e-01 - 4.3950408697128296e-01 - <_> - - 0 1 634 -4.4606071896851063e-03 -1 -2 635 - -3.5969649907201529e-03 - - 2.4052999913692474e-01 5.4501742124557495e-01 - 3.7823578715324402e-01 - <_> - - 0 1 636 -3.6222559865564108e-03 -1 -2 637 - 1.2059339787811041e-03 - - 3.0338969826698303e-01 4.6337789297103882e-01 - 6.3359522819519043e-01 - <_> - - 1 0 638 4.3124938383698463e-03 -1 -2 639 - -4.4961250387132168e-03 - - 6.5988260507583618e-01 6.6216969490051270e-01 - 4.7552469372749329e-01 - <_> - - 0 1 640 -1.3860689941793680e-03 -1 -2 641 - -5.1588460337370634e-04 - - 2.8012010455131531e-01 3.8294890522956848e-01 - 5.6236267089843750e-01 - <_> - - 0 1 642 7.0330002927221358e-05 -1 -2 643 - -2.0976549421902746e-04 - - 4.5363429188728333e-01 5.6081390380859375e-01 - 4.2657798528671265e-01 - <_> - - 1 0 644 1.3642259873449802e-03 -1 -2 645 - 1.5483660390600562e-03 - - 2.6370918750762939e-01 4.1707509756088257e-01 - 5.9329879283905029e-01 - <_> - - 0 1 646 1.9179609417915344e-01 -1 -2 647 - -4.4776909053325653e-03 - - 5.2567642927169800e-01 6.6326218843460083e-01 - 4.8925888538360596e-01 - <_> - - 0 1 648 -1.2649179995059967e-01 -1 -2 649 - 6.5253327193204314e-05 - - 1.4997789263725281e-01 4.2333200573921204e-01 - 5.7560402154922485e-01 - <_> - - 0 1 650 4.1856421157717705e-03 -1 -2 651 - 2.7478230185806751e-04 - - 5.2888268232345581e-01 4.5240178704261780e-01 - 5.6041252613067627e-01 - <_> - - 0 1 652 -2.2906810045242310e-03 -1 -2 653 - 1.6744500026106834e-03 - - 5.5782741308212280e-01 3.3230578899383545e-01 - 5.5587881803512573e-01 - <_> - - 1 0 654 1.2349759927019477e-03 -1 -2 655 - -8.7158754467964172e-03 - - 3.6539471149444580e-01 1.9245339930057526e-01 - 5.3136497735977173e-01 - <_> - - 1 0 656 4.6613621525466442e-03 -1 -2 657 - -8.5815992206335068e-03 - - 2.0277309417724609e-01 7.6360601186752319e-01 - 5.1408261060714722e-01 - <_> - - 0 1 658 1.4352120459079742e-02 -1 -2 659 - -7.7948719263076782e-03 - - 5.2529758214950562e-01 2.6329371333122253e-01 - 5.3286892175674438e-01 - <_> - - 0 1 660 -3.4155680332332850e-03 -1 -2 661 - -4.2639090679585934e-03 - - 2.4160879850387573e-01 3.9365449547767639e-01 - 5.4787421226501465e-01 - <_> - - 0 1 662 8.7177697569131851e-03 -1 -2 663 - -3.2232629600912333e-03 - - 4.7881990671157837e-01 3.6316120624542236e-01 - 5.2883160114288330e-01 - <_> - - 0 1 664 -4.2188368737697601e-02 -1 -2 665 - 1.9875749945640564e-02 - - 6.9311392307281494e-01 4.5201000571250916e-01 - 6.8550550937652588e-01 - <_> - - 1 0 666 -3.1134510412812233e-02 -1 -2 667 - 5.7032387703657150e-03 - - 5.3004240989685059e-01 5.6068921089172363e-01 - 4.2306229472160339e-01 - <_> - - 1 0 668 5.2733682096004486e-03 -1 -2 669 - -3.1231069006025791e-03 - - 3.2472288608551025e-01 1.9856959581375122e-01 - 5.3498727083206177e-01 - <_> - - 0 1 670 4.6453849063254893e-04 -1 -2 671 - 3.0355889350175858e-02 - - 4.2075088620185852e-01 5.1534587144851685e-01 - 3.1181010603904724e-01 - <_> - - 0 1 672 -4.2992769740521908e-03 -1 -2 673 - 1.9509199773892760e-04 - - 3.2745069265365601e-01 5.9530782699584961e-01 - 4.2255210876464844e-01 - <_> - - 0 1 674 -7.7784480527043343e-03 -1 -2 675 - 1.6917599365115166e-02 - - 7.2111797332763672e-01 4.9365919828414917e-01 - 7.0302772521972656e-01 - <_> - - 0 1 676 -5.1948569715023041e-02 -1 -2 677 - -5.4751220159232616e-03 - - 1.4255349338054657e-01 6.0593318939208984e-01 - 4.3939951062202454e-01 - <_> - - 0 1 678 1.5210839592327829e-05 -1 -2 679 - 1.0235579684376717e-03 - - 4.4888499379158020e-01 4.2565500736236572e-01 - 5.7954382896423340e-01 - <_> - - 0 1 680 -1.0427719826111570e-04 -1 -2 681 - 8.7853781878948212e-03 - - 4.2460399866104126e-01 4.9580091238021851e-01 - 6.7594307661056519e-01 - <_> - - 0 1 682 3.4012699034065008e-03 -1 -2 683 - 5.8582378551363945e-04 - - 5.4234808683395386e-01 3.6365428566932678e-01 - 5.4643487930297852e-01 - <_> - - 0 1 684 -2.2973360028117895e-03 -1 -2 685 - -1.4330189675092697e-02 - - 2.5488188862800598e-01 6.5876567363739014e-01 - 4.5328021049499512e-01 - <_> - - 0 1 686 9.8565965890884399e-04 -1 -2 687 - -4.6640761196613312e-02 - - 3.8227710127830505e-01 3.0773219466209412e-01 - 5.2441328763961792e-01 - <_> - - 0 1 688 -1.1907300353050232e-01 -1 -2 689 - 1.9333280622959137e-02 - - 1.0338629782199860e-01 5.5547451972961426e-01 - 3.2213169336318970e-01 - <_> - - 0 1 690 3.1427849084138870e-02 -1 -2 691 - 2.0082130504306406e-04 - - 4.6823790669441223e-01 5.3730702400207520e-01 - 3.8006669282913208e-01 - <_> - - 0 1 692 -6.2584900297224522e-03 -1 -2 693 - 8.2861045375466347e-03 - - 1.7992070317268372e-01 5.0950688123703003e-01 - 7.5446051359176636e-01 - <_> - - 0 1 694 2.0529709290713072e-03 -1 -2 695 - 3.2524869311600924e-03 - - 5.6286448240280151e-01 4.8016890883445740e-01 - 5.8021020889282227e-01 - <_> - - 0 1 696 -3.1884901225566864e-02 -1 -2 697 - 1.8379340181127191e-03 - - 1.7427450418472290e-01 3.4665969014167786e-01 - 5.1071548461914062e-01 - <_> - - 1 0 698 -4.8512680223211646e-04 -1 -2 699 - -2.5407879147678614e-03 - - 5.3260862827301025e-01 6.3427752256393433e-01 - 4.9926930665969849e-01 - <_> - - 0 1 700 -5.1559060811996460e-03 -1 -2 701 - -4.4968750327825546e-02 - - 3.4334290027618408e-01 1.8681369721889496e-01 - 5.2154648303985596e-01 - <_> - - 1 0 702 5.8984281495213509e-03 -1 -2 703 - 3.2763120252639055e-03 - - 6.2293052673339844e-01 4.9357721209526062e-01 - 7.2179448604583740e-01 - <_> - - 1 0 704 -1.0161520185647532e-04 -1 -2 705 - -1.6290300118271261e-04 - - 5.0079762935638428e-01 6.0241490602493286e-01 - 2.3295080661773682e-01 - <_> - - 0 1 706 9.0541364625096321e-03 -1 -2 707 - 3.5398490726947784e-02 - - 4.5104169845581055e-01 5.1419967412948608e-01 - 2.8602918982505798e-01 - <_> - - 0 1 708 5.6469351984560490e-03 -1 -2 709 - -2.4807190056890249e-03 - - 4.7049251198768616e-01 4.1798511147499084e-01 - 6.7266470193862915e-01 - <_> - - 0 1 710 -4.1088787838816643e-03 -1 -2 711 - -2.0714469719678164e-03 - - 5.8098018169403076e-01 6.0747838020324707e-01 - 4.5240598917007446e-01 - <_> - - 0 1 712 -2.8939060866832733e-03 -1 -2 713 - 1.3467279495671391e-03 - - 3.3835199475288391e-01 5.6969100236892700e-01 - 3.9708450436592102e-01 - <_> - - 0 1 714 -9.0779133141040802e-02 -1 -2 715 - -8.3171762526035309e-02 - - 1.5027019381523132e-01 7.5736707448959351e-01 - 4.9364370107650757e-01 - <_> - - 0 1 716 -1.4107000315561891e-03 -1 -2 717 - 5.5668760091066360e-02 - - 3.3909329771995544e-01 5.0250971317291260e-01 - 7.4220830202102661e-01 - <_> - - 0 1 718 5.7701539248228073e-02 -1 -2 719 - -4.2503291368484497e-01 - - 5.1973718404769897e-01 9.7346916794776917e-02 - 5.1857399940490723e-01 - <_> - - 0 1 720 -4.4380719191394746e-04 -1 -2 721 - 1.7924769781529903e-04 - - 3.6493501067161560e-01 5.6192791461944580e-01 - 3.7602970004081726e-01 - <_> - - 1 0 722 5.0382469780743122e-03 -1 -2 723 - 1.5191170386970043e-02 - - 6.3284450769424438e-01 4.9360820651054382e-01 - 7.4265247583389282e-01 - <_> - - 0 1 724 -1.2300389818847179e-02 -1 -2 725 - 1.5168030513450503e-03 - - 1.3893499970436096e-01 5.0919622182846069e-01 - 3.4826481342315674e-01 - <_> - - 1 0 726 9.5754547510296106e-04 -1 -2 727 - -1.8962200731039047e-02 - - 6.0363167524337769e-01 2.3191730678081512e-01 - 5.1166528463363647e-01 - <_> - - 0 1 728 -2.2272260859608650e-02 -1 -2 729 - -2.5145230814814568e-02 - - 6.5550220012664795e-01 1.3260710239410400e-01 - 4.6740341186523438e-01 - <_> - - 0 1 730 1.9533900544047356e-02 -1 -2 731 - -1.1231349781155586e-03 - - 5.1820272207260132e-01 6.3182431459426880e-01 - 4.8255190253257751e-01 - <_> - - 0 1 732 -1.4861139934509993e-03 -1 -2 733 - 3.5002888762392104e-04 - - 2.9186710715293884e-01 5.6213712692260742e-01 - 4.2492130398750305e-01 - <_> - - 1 0 734 -1.1231349781155586e-03 -1 -2 735 - 1.0409739799797535e-02 - - 4.8137450218200684e-01 5.1840060949325562e-01 - 2.0512230694293976e-01 - <_> - - 0 1 736 -8.7832562625408173e-02 -1 -2 737 - 1.6584879485890269e-03 - - 1.1799219995737076e-01 4.9878111481666565e-01 - 6.9737559556961060e-01 - <_> - - 1 0 738 -2.3008750285953283e-03 -1 -2 739 - 3.3026169985532761e-02 - - 5.3398311138153076e-01 5.0332891941070557e-01 - 6.8519067764282227e-01 - <_> - - 0 1 740 -1.3585069682449102e-03 -1 -2 741 - 7.8067491995170712e-04 - - 3.0028221011161804e-01 4.5930838584899902e-01 - 6.4400452375411987e-01 - <_> - - 1 0 742 -1.8025759607553482e-02 -1 -2 743 - 1.2354910140857100e-03 - - 5.3112912178039551e-01 4.7291061282157898e-01 - 5.7214611768722534e-01 - <_> - - 0 1 744 -9.2583027435466647e-04 -1 -2 745 - 8.0123997759073973e-04 - - 3.6623328924179077e-01 5.3619897365570068e-01 - 3.0086329579353333e-01 - <_> - 63 - 3.0672130584716797e+01 - - <_> - - 0 1 746 2.4914839304983616e-03 -1 -2 747 - -5.0488598644733429e-02 - - 3.4223890304565430e-01 7.7034580707550049e-01 - 4.5163908600807190e-01 - <_> - - 1 0 748 -7.7838351717218757e-04 -1 -2 749 - 2.3572890495415777e-04 - - 3.2563421130180359e-01 3.4065559506416321e-01 - 5.8970272541046143e-01 - <_> - - 0 1 750 4.5575071126222610e-03 -1 -2 751 - 8.1241987645626068e-03 - - 4.3065789341926575e-01 7.1495872735977173e-01 - 4.3456849455833435e-01 - <_> - - 0 1 752 -4.4612158671952784e-04 -1 -2 753 - -2.8972938889637589e-04 - - 3.2959741353988647e-01 5.8456200361251831e-01 - 3.5266879200935364e-01 - <_> - - 0 1 754 7.1604831646254752e-06 -1 -2 755 - -3.8497708737850189e-04 - - 4.0819549560546875e-01 4.2031130194664001e-01 - 6.6341269016265869e-01 - <_> - - 0 1 756 1.9489860278554261e-04 -1 -2 757 - -1.7083849757909775e-02 - - 3.9424669742584229e-01 2.2940720617771149e-01 - 5.2389609813690186e-01 - <_> - - 0 1 758 8.3513697609305382e-04 -1 -2 759 - 7.5499608647078276e-04 - - 3.0260318517684937e-01 6.0321962833404541e-01 - 3.4124588966369629e-01 - <_> - - 1 0 760 8.0216713249683380e-03 -1 -2 761 - -3.8930509239435196e-02 - - 7.3062407970428467e-01 3.5993251204490662e-01 - 5.2343809604644775e-01 - <_> - - 1 0 762 -7.0348767621908337e-05 -1 -2 763 - -8.5350573062896729e-03 - - 3.4937581419944763e-01 2.7461090683937073e-01 - 5.6265860795974731e-01 - <_> - - 0 1 764 1.0854450054466724e-02 -1 -2 765 - 4.5329501153901219e-04 - - 5.2822262048721313e-01 4.5220491290092468e-01 - 6.0543018579483032e-01 - <_> - - 0 1 766 1.8117150466423482e-04 -1 -2 767 - 4.6641560038551688e-04 - - 3.3068621158599854e-01 1.4550000429153442e-01 - 5.3849279880523682e-01 - <_> - - 1 0 768 -8.4854792803525925e-03 -1 -2 769 - -1.8934309482574463e-02 - - 4.8141559958457947e-01 3.5637411475181580e-01 - 5.4051452875137329e-01 - <_> - - 1 0 770 4.9814549274742603e-03 -1 -2 771 - 3.4286780282855034e-03 - - 6.9577431678771973e-01 5.0508928298950195e-01 - 2.3169949650764465e-01 - <_> - - 1 0 772 4.4203791185282171e-04 -1 -2 773 - 2.3822550429031253e-04 - - 6.0185819864273071e-01 4.7550821304321289e-01 - 5.5852377414703369e-01 - <_> - - 0 1 774 -6.4261639490723610e-03 -1 -2 775 - 9.9637769162654877e-03 - - 2.2824659943580627e-01 4.0405881404876709e-01 - 5.6501698493957520e-01 - <_> - - 0 1 776 1.3654050417244434e-02 -1 -2 777 - -9.9892877042293549e-03 - - 5.2677392959594727e-01 6.7940497398376465e-01 - 4.7970339655876160e-01 - <_> - - 1 0 778 3.6558631807565689e-02 -1 -2 779 - 4.8999379941960797e-05 - - 8.8425733149051666e-02 4.0207880735397339e-01 - 5.4573321342468262e-01 - <_> - - 0 1 780 1.3654050417244434e-02 -1 -2 781 - 1.8802779959514737e-03 - - 5.2676129341125488e-01 4.8060521483421326e-01 - 6.3943648338317871e-01 - <_> - - 0 1 782 -1.3654050417244434e-02 -1 -2 783 - 1.2778700329363346e-03 - - 1.7248100042343140e-01 4.4798240065574646e-01 - 6.3100087642669678e-01 - <_> - - 1 0 784 9.8843395244330168e-04 -1 -2 785 - 1.4511500012304168e-05 - - 5.9481692314147949e-01 4.8541748523712158e-01 - 5.3093612194061279e-01 - <_> - - 0 1 786 -2.2775429533794522e-04 -1 -2 787 - -1.4753740280866623e-02 - - 3.1836318969726562e-01 3.0849760770797729e-01 - 5.3520262241363525e-01 - <_> - - 0 1 788 -3.4148250706493855e-03 -1 -2 789 - 7.5806681998074055e-03 - - 6.1153268814086914e-01 4.9516460299491882e-01 - 7.0613312721252441e-01 - <_> - - 1 0 790 -5.7734688743948936e-03 -1 -2 791 - 7.4033669079653919e-05 - - 3.7542209029197693e-01 4.1155171394348145e-01 - 5.8894449472427368e-01 - <_> - - 0 1 792 -8.2278084009885788e-03 -1 -2 793 - 5.3380909375846386e-03 - - 9.5610566437244415e-02 5.3005087375640869e-01 - 3.9618980884552002e-01 - <_> - - 0 1 794 -2.7049109339714050e-03 -1 -2 795 - 7.7341338619589806e-03 - - 6.4818692207336426e-01 5.1104402542114258e-01 - 3.1215190887451172e-01 - <_> - - 0 1 796 1.0886609554290771e-02 -1 -2 797 - 1.1038660071790218e-02 - - 4.8014289140701294e-01 5.4297101497650146e-01 - 4.1623631119728088e-01 - <_> - - 0 1 798 -1.0054199956357479e-02 -1 -2 799 - 7.7072880230844021e-03 - - 7.3293352127075195e-01 5.3568720817565918e-01 - 3.4555470943450928e-01 - <_> - - 0 1 800 -5.8278098003938794e-04 -1 -2 801 - -2.5739220436662436e-03 - - 3.6550220847129822e-01 3.7767601013183594e-01 - 5.3917747735977173e-01 - <_> - - 0 1 802 -7.0167761296033859e-03 -1 -2 803 - -1.7727289814502001e-03 - - 4.0393048524856567e-01 6.9504439830780029e-01 - 4.9811169505119324e-01 - <_> - - 1 0 804 -1.6318289563059807e-02 -1 -2 805 - -1.1663000099360943e-02 - - 5.2967327833175659e-01 5.8426398038864136e-01 - 4.7895029187202454e-01 - <_> - - 1 0 806 2.5881489273160696e-03 -1 -2 807 - -3.7328999023884535e-03 - - 6.0921788215637207e-01 6.7217427492141724e-01 - 4.0668940544128418e-01 - <_> - - 0 1 808 -1.4355930034071207e-03 -1 -2 809 - 1.8340899841859937e-03 - - 3.5850879549980164e-01 5.3711581230163574e-01 - 4.0335071086883545e-01 - <_> - - 1 0 810 1.2280289828777313e-01 -1 -2 811 - 5.0228700041770935e-02 - - 1.5475720167160034e-01 5.4338437318801880e-01 - 8.4292672574520111e-02 - <_> - - 1 0 812 -2.1437000483274460e-02 -1 -2 813 - -3.1009620055556297e-02 - - 4.8600539565086365e-01 1.8330100178718567e-01 - 5.2075541019439697e-01 - <_> - - 0 1 814 -1.2973720207810402e-02 -1 -2 815 - 1.5818020328879356e-03 - - 7.0482409000396729e-01 4.1705870628356934e-01 - 5.8651638031005859e-01 - <_> - - 1 0 816 -9.7806248813867569e-03 -1 -2 817 - 1.1735740117728710e-03 - - 5.3079181909561157e-01 5.5224531888961792e-01 - 3.5071650147438049e-01 - <_> - - 1 0 818 1.4651629608124495e-03 -1 -2 819 - 2.3532148916274309e-03 - - 3.0426511168479919e-01 5.3393232822418213e-01 - 2.8062361478805542e-01 - <_> - - 0 1 820 -6.1809681355953217e-03 -1 -2 821 - 6.5688649192452431e-04 - - 6.4101332426071167e-01 5.6208711862564087e-01 - 4.3903189897537231e-01 - <_> - - 1 0 822 2.6228010654449463e-02 -1 -2 823 - -1.7958110198378563e-02 - - 6.4455568790435791e-01 2.0027139782905579e-01 - 4.6246650815010071e-01 - <_> - - 1 0 824 -7.6468721963465214e-03 -1 -2 825 - -2.7482809964567423e-03 - - 5.2632009983062744e-01 5.8739811182022095e-01 - 4.8366001248359680e-01 - <_> - - 1 0 826 1.3851850293576717e-02 -1 -2 827 - 2.6369190309196711e-03 - - 1.5661309659481049e-01 4.2701789736747742e-01 - 5.8066600561141968e-01 - <_> - - 0 1 828 -3.1513599678874016e-03 -1 -2 829 - -1.4788460248382762e-05 - - 6.2158662080764771e-01 5.5766427516937256e-01 - 4.1220021247863770e-01 - <_> - - 0 1 830 -7.3676988482475281e-02 -1 -2 831 - -3.0912780202925205e-03 - - 1.5367099642753601e-01 6.3442689180374146e-01 - 4.5074120163917542e-01 - <_> - - 0 1 832 7.9240966588258743e-03 -1 -2 833 - 8.5778040811419487e-03 - - 5.4579752683639526e-01 5.4016572237014771e-01 - 3.8907998800277710e-01 - <_> - - 1 0 834 5.5403169244527817e-03 -1 -2 835 - -1.1886510037584230e-04 - - 3.5556110739707947e-01 5.8367502689361572e-01 - 4.2743161320686340e-01 - <_> - - 0 1 836 -1.8408369272947311e-02 -1 -2 837 - -2.3490579333156347e-03 - - 5.8604401350021362e-01 4.4989579916000366e-01 - 5.4981988668441772e-01 - <_> - - 1 0 838 -7.6157399453222752e-03 -1 -2 839 - -3.3190969843417406e-03 - - 4.1009929776191711e-01 6.7013788223266602e-01 - 4.3530011177062988e-01 - <_> - - 1 0 840 -9.4642979092895985e-04 -1 -2 841 - 8.7858550250530243e-03 - - 5.3911769390106201e-01 5.5040502548217773e-01 - 3.9909350872039795e-01 - <_> - - 1 0 842 1.6395459533669055e-04 -1 -2 843 - -2.3508940357714891e-03 - - 3.5929331183433533e-01 4.0341728925704956e-01 - 5.8060771226882935e-01 - <_> - - 1 0 844 7.5449963333085179e-05 -1 -2 845 - 2.7018489316105843e-02 - - 5.4123848676681519e-01 4.9449229240417480e-01 - 5.5894362926483154e-01 - <_> - - 1 0 846 8.4561208495870233e-04 -1 -2 847 - -1.1687109945341945e-03 - - 5.8092182874679565e-01 4.7469571232795715e-01 - 2.8458958864212036e-01 - <_> - - 1 0 848 2.2897500544786453e-02 -1 -2 849 - 7.0879262685775757e-01 - - 2.4144110083580017e-01 5.1957648992538452e-01 - 1.0300920158624649e-01 - <_> - - 1 0 850 3.7483830004930496e-02 -1 -2 851 - 1.2827500468119979e-03 - - 1.8146389722824097e-01 4.2460718750953674e-01 - 5.7079732418060303e-01 - <_> - - 0 1 852 -5.1718312315642834e-03 -1 -2 853 - 2.7545939665287733e-03 - - 6.1433231830596924e-01 5.2056711912155151e-01 - 4.2204418778419495e-01 - <_> - - 0 1 854 -3.6072919610887766e-03 -1 -2 855 - -2.5258748792111874e-04 - - 3.1825920939445496e-01 5.7104682922363281e-01 - 4.2260938882827759e-01 - <_> - - 1 0 856 -7.0514748804271221e-03 -1 -2 857 - -5.4323761723935604e-03 - - 5.1628297567367554e-01 2.6662889122962952e-01 - 5.2146798372268677e-01 - <_> - - 1 0 858 -1.4652940080850385e-05 -1 -2 859 - -1.8556920113041997e-03 - - 3.9817610383033752e-01 3.3227631449699402e-01 - 5.7058340311050415e-01 - <_> - - 1 0 860 4.7609540633857250e-03 -1 -2 861 - 1.5676260227337480e-03 - - 6.6365581750869751e-01 5.5055677890777588e-01 - 4.4206619262695312e-01 - <_> - - 1 0 862 5.4239919409155846e-03 -1 -2 863 - -6.4692399464547634e-03 - - 5.9599381685256958e-01 5.3695940971374512e-01 - 3.7443399429321289e-01 - <_> - - 0 1 864 -7.8038539504632354e-04 -1 -2 865 - 4.5086450874805450e-02 - - 4.1035950183868408e-01 5.1775068044662476e-01 - 1.8781000375747681e-01 - <_> - - 0 1 866 -5.1405387930572033e-03 -1 -2 867 - -2.1236129105091095e-02 - - 2.3528920114040375e-01 1.7087510228157043e-01 - 5.4249739646911621e-01 - <_> - - 0 1 868 -2.3763340432196856e-03 -1 -2 869 - 5.4122589528560638e-02 - - 5.8365309238433838e-01 5.1174330711364746e-01 - 1.8659310042858124e-01 - <_> - - 0 1 870 -5.3492980077862740e-04 -1 -2 871 - -5.8454048121348023e-04 - - 5.1086932420730591e-01 4.7754910588264465e-01 - 2.4398539960384369e-01 - <_> - 71 - 3.4677078247070312e+01 - - <_> - - 0 1 872 3.0031939968466759e-03 -1 -2 873 - 6.9161207647994161e-04 - - 3.3496499061584473e-01 4.5183679461479187e-01 - 7.2893542051315308e-01 - <_> - - 0 1 874 1.1212790384888649e-02 -1 -2 875 - -7.6108198845759034e-04 - - 2.9508009552955627e-01 5.6690549850463867e-01 - 2.8308510780334473e-01 - <_> - - 0 1 876 1.1984579759882763e-04 -1 -2 877 - -1.9725349557120353e-04 - - 4.0905779600143433e-01 6.9514942169189453e-01 - 4.6378681063652039e-01 - <_> - - 1 0 878 -5.5180420167744160e-03 -1 -2 879 - 1.2148249661549926e-03 - - 3.1676751375198364e-01 3.3167061209678650e-01 - 5.3963977098464966e-01 - <_> - - 0 1 880 -4.2497441172599792e-03 -1 -2 881 - -9.4915721565485001e-03 - - 2.6005738973617554e-01 7.4842947721481323e-01 - 5.0731921195983887e-01 - <_> - - 1 0 882 6.5378600265830755e-04 -1 -2 883 - -4.9741100519895554e-04 - - 3.9520108699798584e-01 5.8802747726440430e-01 - 3.5521200299263000e-01 - <_> - - 0 1 884 -4.3079249560832977e-02 -1 -2 885 - -5.1999092102050781e-04 - - 2.4348780512809753e-01 3.1955629587173462e-01 - 5.5854547023773193e-01 - <_> - - 1 0 886 -4.5451628975570202e-03 -1 -2 887 - -7.9610403627157211e-03 - - 4.8452898859977722e-01 3.8011810183525085e-01 - 5.3585118055343628e-01 - <_> - - 1 0 888 -3.1919340835884213e-04 -1 -2 889 - -1.9223889335989952e-02 - - 4.3563291430473328e-01 2.6130661368370056e-01 - 6.1554962396621704e-01 - <_> - - 0 1 890 -1.3076990144327283e-03 -1 -2 891 - 1.9825039431452751e-02 - - 5.9420621395111084e-01 4.9454280734062195e-01 - 7.3848551511764526e-01 - <_> - - 0 1 892 -2.2013280540704727e-03 -1 -2 893 - -7.8596705570816994e-03 - - 2.2144819796085358e-01 3.6009770631790161e-01 - 5.2985501289367676e-01 - <_> - - 1 0 894 1.4142199652269483e-03 -1 -2 895 - -1.1232759803533554e-02 - - 5.7765662670135498e-01 6.9344568252563477e-01 - 4.8272070288658142e-01 - <_> - - 1 0 896 2.9746301006525755e-03 -1 -2 897 - 5.3283828310668468e-04 - - 3.2166770100593567e-01 3.9625000953674316e-01 - 5.6803637742996216e-01 - <_> - - 1 0 898 1.0105259716510773e-02 -1 -2 899 - -1.1653699912130833e-02 - - 7.5674182176589966e-01 6.5235567092895508e-01 - 5.0270539522171021e-01 - <_> - - 0 1 900 -7.0609981194138527e-03 -1 -2 901 - 2.2343141026794910e-03 - - 2.5387701392173767e-01 4.3872770667076111e-01 - 6.1776322126388550e-01 - <_> - - 1 0 902 -2.9802279546856880e-02 -1 -2 903 - 1.1611840454861522e-03 - - 5.2011400461196899e-01 4.6479099988937378e-01 - 6.1842548847198486e-01 - <_> - - 1 0 904 9.4824447296559811e-04 -1 -2 905 - 4.1284630424343050e-04 - - 3.0409941077232361e-01 4.5188081264495850e-01 - 6.2457829713821411e-01 - <_> - - 0 1 906 -3.1203540042042732e-02 -1 -2 907 - 2.7652881108224392e-03 - - 2.7889358997344971e-01 4.6985000371932983e-01 - 6.5024542808532715e-01 - <_> - - 1 0 908 2.5644779205322266e-02 -1 -2 909 - -7.5331530533730984e-03 - - 1.8051710724830627e-01 3.2080689072608948e-01 - 5.5220228433609009e-01 - <_> - - 1 0 910 3.2047149725258350e-03 -1 -2 911 - -2.4282479716930538e-04 - - 6.4369338750839233e-01 5.6767052412033081e-01 - 4.5091038942337036e-01 - <_> - - 0 1 912 -6.1979342717677355e-04 -1 -2 913 - -8.0101029016077518e-04 - - 3.1221461296081543e-01 2.9651939868927002e-01 - 5.2304947376251221e-01 - <_> - - 1 0 914 -9.1816839994862676e-04 -1 -2 915 - 1.2239529751241207e-03 - - 5.4647117853164673e-01 4.6185028553009033e-01 - 5.6795489788055420e-01 - <_> - - 0 1 916 -6.8743730662390590e-04 -1 -2 917 - -1.8252469599246979e-03 - - 5.4308801889419556e-01 5.4336231946945190e-01 - 3.3852210640907288e-01 - <_> - - 1 0 918 -7.4570789001882076e-03 -1 -2 919 - 5.3775748237967491e-03 - - 5.2655947208404541e-01 4.8572158813476562e-01 - 6.8151241540908813e-01 - <_> - - 1 0 920 3.7602309603244066e-03 -1 -2 921 - 8.7752222316339612e-04 - - 2.8321608901023865e-01 3.9668309688568115e-01 - 5.5124807357788086e-01 - <_> - - 1 0 922 5.5084479972720146e-03 -1 -2 923 - -7.5949047459289432e-04 - - 6.7846202850341797e-01 3.9065030217170715e-01 - 5.4572027921676636e-01 - <_> - - 1 0 924 1.6352660022675991e-03 -1 -2 925 - -1.2750849418807775e-04 - - 3.6402040719985962e-01 5.8297240734100342e-01 - 4.1949799656867981e-01 - <_> - - 0 1 926 2.2067610174417496e-02 -1 -2 927 - -1.9203789532184601e-02 - - 4.6067029237747192e-01 3.2614830136299133e-01 - 5.2360808849334717e-01 - <_> - - 0 1 928 -1.2998109683394432e-02 -1 -2 929 - -3.1332690268754959e-03 - - 7.0221120119094849e-01 2.8704708814620972e-01 - 5.0764769315719604e-01 - <_> - - 1 0 930 -5.2937557920813560e-03 -1 -2 931 - 2.1857069805264473e-03 - - 4.7095209360122681e-01 4.7082918882369995e-01 - 6.1698418855667114e-01 - <_> - - 0 1 932 -4.5750709250569344e-03 -1 -2 933 - -4.5152138918638229e-02 - - 3.1142529845237732e-01 1.8514350056648254e-01 - 5.5048149824142456e-01 - <_> - - 1 0 934 -2.7783559635281563e-03 -1 -2 935 - -2.5752480141818523e-03 - - 4.9373480677604675e-01 6.1529481410980225e-01 - 4.7354999184608459e-01 - <_> - - 1 0 936 1.1614130344241858e-03 -1 -2 937 - 2.3350189439952374e-03 - - 6.5105718374252319e-01 4.0883418917655945e-01 - 5.6841522455215454e-01 - <_> - - 1 0 938 3.8499289657920599e-03 -1 -2 939 - 2.4529630318284035e-03 - - 3.0258288979530334e-01 5.2325028181076050e-01 - 2.0176209509372711e-01 - <_> - - 1 0 940 3.6731390282511711e-03 -1 -2 941 - 2.1937100682407618e-03 - - 6.4284259080886841e-01 4.3288651108741760e-01 - 6.4205098152160645e-01 - <_> - - 1 0 942 -6.4666871912777424e-03 -1 -2 943 - -5.7186251506209373e-03 - - 5.2540659904479980e-01 2.4909840524196625e-01 - 5.2876192331314087e-01 - <_> - - 1 0 944 9.9941878579556942e-04 -1 -2 945 - -7.8276498243212700e-04 - - 3.3297958970069885e-01 3.5983449220657349e-01 - 5.4983407258987427e-01 - <_> - - 0 1 946 4.3231188319623470e-03 -1 -2 947 - 4.0838290005922318e-03 - - 4.8187050223350525e-01 5.2663302421569824e-01 - 3.1057891249656677e-01 - <_> - - 1 0 948 3.0515898833982646e-04 -1 -2 949 - 1.2640280183404684e-03 - - 3.9952918887138367e-01 3.2284379005432129e-01 - 5.8192151784896851e-01 - <_> - - 0 1 950 -1.0152660310268402e-02 -1 -2 951 - -2.6863690000027418e-03 - - 8.0260711908340454e-01 3.8756170868873596e-01 - 5.4665708541870117e-01 - <_> - - 1 0 952 -9.0515613555908203e-03 -1 -2 953 - -6.3204211182892323e-03 - - 4.3720579147338867e-01 1.1265510320663452e-01 - 6.3954162597656250e-01 - <_> - - 0 1 954 2.6117300149053335e-03 -1 -2 955 - 1.4339019544422626e-02 - - 5.4239892959594727e-01 4.9792730808258057e-01 - 6.0422360897064209e-01 - <_> - - 1 0 956 2.8452780097723007e-03 -1 -2 957 - 1.4783289771003183e-05 - - 3.4910920262336731e-01 4.1950678825378418e-01 - 5.7759660482406616e-01 - <_> - - 0 1 958 8.1814555451273918e-03 -1 -2 959 - 6.6321990452706814e-03 - - 4.8859870433807373e-01 5.4444682598114014e-01 - 4.4209951162338257e-01 - <_> - - 0 1 960 -2.2483461070805788e-03 -1 -2 961 - 1.2374560348689556e-02 - - 6.6997921466827393e-01 4.4786059856414795e-01 - 6.5648937225341797e-01 - <_> - - 1 0 962 -6.6516688093543053e-03 -1 -2 963 - -8.5750613361597061e-03 - - 5.5118787288665771e-01 4.0174451470375061e-01 - 5.4055362939834595e-01 - <_> - - 1 0 964 6.5078441984951496e-03 -1 -2 965 - 2.8675209730863571e-02 - - 2.2943930327892303e-01 5.1779001951217651e-01 - 3.5677561163902283e-01 - <_> - - 0 1 966 7.0673860609531403e-03 -1 -2 967 - 1.2367829913273454e-03 - - 5.5646997690200806e-01 3.6276981234550476e-01 - 5.5724138021469116e-01 - <_> - - 1 0 968 7.4818679131567478e-03 -1 -2 969 - 4.7109839506447315e-03 - - 6.7849111557006836e-01 4.1212528944015503e-01 - 6.0722357034683228e-01 - <_> - - 1 0 970 -6.9405790418386459e-03 -1 -2 971 - 3.3302098512649536e-02 - - 5.4597669839859009e-01 5.2767068147659302e-01 - 2.3749159276485443e-01 - <_> - - 1 0 972 3.6104630678892136e-02 -1 -2 973 - 1.9674649462103844e-02 - - 7.2492793202400208e-02 4.6263459324836731e-01 - 8.2089632749557495e-01 - <_> - - 0 1 974 3.4766150638461113e-03 -1 -2 975 - 1.3987369602546096e-03 - - 5.2087318897247314e-01 5.4844141006469727e-01 - 4.2300349473953247e-01 - <_> - - 1 0 976 4.0974249131977558e-03 -1 -2 977 - 2.6973790954798460e-03 - - 2.7805531024932861e-01 5.4038310050964355e-01 - 3.7909889221191406e-01 - <_> - - 1 0 978 -5.6591699831187725e-03 -1 -2 979 - 3.9460969856008887e-04 - - 4.7983360290527344e-01 3.7669500708580017e-01 - 5.4292291402816772e-01 - <_> - - 1 0 980 2.1750570740550756e-03 -1 -2 981 - 1.4614439569413662e-03 - - 6.2071627378463745e-01 3.3579450845718384e-01 - 5.1426321268081665e-01 - <_> - - 1 0 982 -5.3006567759439349e-04 -1 -2 983 - 1.4869309961795807e-01 - - 5.3446400165557861e-01 5.1596081256866455e-01 - 2.5618231296539307e-01 - <_> - - 1 0 984 -5.8816498494707048e-05 -1 -2 985 - -1.6275369562208652e-03 - - 5.1230919361114502e-01 6.0176461935043335e-01 - 3.1093719601631165e-01 - <_> - - 0 1 986 -1.2881809845566750e-02 -1 -2 987 - 9.4982917653396726e-04 - - 2.7122870087623596e-01 5.4424422979354858e-01 - 4.0288880467414856e-01 - <_> - - 1 0 988 -1.2315999716520309e-02 -1 -2 989 - 9.0286601334810257e-03 - - 4.7360658645629883e-01 7.4514347314834595e-01 - 3.4879919886589050e-01 - <_> - - 0 1 990 -8.6876116693019867e-02 -1 -2 991 - -1.5107560102478601e-05 - - 2.2903330624103546e-01 5.5178898572921753e-01 - 4.3931490182876587e-01 - <_> - - 0 1 992 -1.7457660287618637e-02 -1 -2 993 - -2.5219470262527466e-03 - - 9.0167902410030365e-02 6.2335401773452759e-01 - 4.7894591093063354e-01 - <_> - - 0 1 994 1.0656520025804639e-03 -1 -2 995 - -4.2540300637483597e-03 - - 5.4896962642669678e-01 5.5798089504241943e-01 - 4.3758779764175415e-01 - <_> - - 0 1 996 -9.0349102392792702e-03 -1 -2 997 - -1.5230999561026692e-03 - - 3.5791561007499695e-01 5.6136602163314819e-01 - 3.9390438795089722e-01 - <_> - - 1 0 998 2.8441150207072496e-03 -1 -2 999 - -3.2824429217725992e-03 - - 3.9015549421310425e-01 4.5286190509796143e-01 - 5.4413431882858276e-01 - <_> - - 1 0 1000 3.2161718991119415e-05 -1 -2 1001 - 3.0118400900391862e-05 - - 5.8031117916107178e-01 3.3368501067161560e-01 - 5.5048561096191406e-01 - <_> - - 0 1 1002 -5.6150099262595177e-03 -1 -2 1003 - -1.7389209941029549e-02 - - 6.1247891187667847e-01 8.7271630764007568e-02 - 5.2045881748199463e-01 - <_> - - 0 1 1004 -4.4361080654198304e-05 -1 -2 1005 - 1.0354899859521538e-04 - - 3.9353290200233459e-01 5.9188538789749146e-01 - 4.1196140646934509e-01 - <_> - - 0 1 1006 1.5939630102366209e-03 -1 -2 1007 - 2.5440789759159088e-03 - - 4.8396238684654236e-01 4.7873649001121521e-01 - 6.3606631755828857e-01 - <_> - - 0 1 1008 1.5083180187502876e-05 -1 -2 1009 - -9.9282202427275479e-05 - - 4.2311170697212219e-01 4.2745891213417053e-01 - 6.0940480232238770e-01 - <_> - - 1 0 1010 5.5371708003804088e-04 -1 -2 1011 - 1.9186759600415826e-03 - - 4.2719879746437073e-01 4.4971078634262085e-01 - 5.5491220951080322e-01 - <_> - - 1 0 1012 -5.0764222396537662e-04 -1 -2 1013 - 1.7236480489373207e-03 - - 5.4771959781646729e-01 2.8829228878021240e-01 - 5.6151270866394043e-01 - <_> - 75 - 3.6726501464843750e+01 - - <_> - - 0 1 1014 1.3092169538140297e-02 -1 -2 1015 - 4.1446479735895991e-04 - - 3.3388701081275940e-01 3.0993521213531494e-01 - 6.6774922609329224e-01 - <_> - - 0 1 1016 2.1835729479789734e-02 -1 -2 1017 - 4.8323940485715866e-02 - - 4.3690490722656250e-01 4.3017241358757019e-01 - 6.1538851261138916e-01 - <_> - - 0 1 1018 1.6091950237751007e-03 -1 -2 1019 - 1.3469760306179523e-03 - - 3.3873260021209717e-01 6.2487137317657471e-01 - 3.5941308736801147e-01 - <_> - - 0 1 1020 1.7729059618432075e-04 -1 -2 1021 - 3.6743620876222849e-04 - - 3.8684248924255371e-01 4.4093450903892517e-01 - 5.4764741659164429e-01 - <_> - - 0 1 1022 -1.2352119665592909e-03 -1 -2 1023 - 1.1705530341714621e-03 - - 3.2601711153984070e-01 4.1113489866256714e-01 - 6.0881638526916504e-01 - <_> - - 1 0 1024 -2.9695429475395940e-05 -1 -2 1025 - 2.7050738572143018e-04 - - 4.2694228887557983e-01 4.3064668774604797e-01 - 5.8105140924453735e-01 - <_> - - 1 0 1026 -7.9626210208516568e-05 -1 -2 1027 - 3.3152441028505564e-04 - - 3.6691430211067200e-01 4.6106639504432678e-01 - 6.2905901670455933e-01 - <_> - - 1 0 1028 -5.2305828779935837e-02 -1 -2 1029 - 2.6880469173192978e-02 - - 5.3286898136138916e-01 5.2132612466812134e-01 - 3.2312199473381042e-01 - <_> - - 1 0 1030 -2.4203000066336244e-04 -1 -2 1031 - -1.6424639616161585e-03 - - 3.5685700178146362e-01 3.4406611323356628e-01 - 5.6256049871444702e-01 - <_> - - 1 0 1032 -2.6830288697965443e-04 -1 -2 1033 - -2.2649629972875118e-03 - - 4.5611730217933655e-01 5.3213518857955933e-01 - 3.6741548776626587e-01 - <_> - - 1 0 1034 1.5627209097146988e-02 -1 -2 1035 - 1.6211320459842682e-01 - - 2.0293539762496948e-01 5.5630332231521606e-01 - 2.6188498735427856e-01 - <_> - - 0 1 1036 -3.7391691002994776e-03 -1 -2 1037 - -2.0878419745713472e-03 - - 6.0621947050094604e-01 5.9507638216018677e-01 - 4.5451170206069946e-01 - <_> - - 1 0 1038 2.3334210272878408e-03 -1 -2 1039 - 6.5116386394947767e-05 - - 6.4355242252349854e-01 3.5207340121269226e-01 - 5.1797789335250854e-01 - <_> - - 0 1 1040 7.4625718407332897e-03 -1 -2 1041 - -2.2032689303159714e-02 - - 5.3266882896423340e-01 3.4919810295104980e-01 - 5.4292368888854980e-01 - <_> - - 0 1 1042 -8.3081610500812531e-03 -1 -2 1043 - -4.3259368976578116e-04 - - 2.0840230584144592e-01 3.9652720093727112e-01 - 5.4254537820816040e-01 - <_> - - 1 0 1044 -3.2209228724241257e-02 -1 -2 1045 - -9.0424838708713651e-04 - - 5.3064119815826416e-01 5.4503858089447021e-01 - 4.2566969990730286e-01 - <_> - - 1 0 1046 2.2727500181645155e-03 -1 -2 1047 - 5.9820008464157581e-03 - - 5.9686112403869629e-01 4.7581401467323303e-01 - 3.1509441137313843e-01 - <_> - - 1 0 1048 -5.8856618124991655e-04 -1 -2 1049 - -8.8227191008627415e-04 - - 4.8477488756179810e-01 5.4263162612915039e-01 - 4.3383410573005676e-01 - <_> - - 1 0 1050 -7.4473457061685622e-05 -1 -2 1051 - 3.9148979703895748e-04 - - 4.2875099182128906e-01 6.3451850414276123e-01 - 4.1018518805503845e-01 - <_> - - 1 0 1052 -3.6939629353582859e-03 -1 -2 1053 - -1.1207849718630314e-02 - - 4.8491048812866211e-01 4.1463369131088257e-01 - 5.4712641239166260e-01 - <_> - - 0 1 1054 -1.0337409563362598e-02 -1 -2 1055 - 3.6883640568703413e-03 - - 2.8771838545799255e-01 5.1019018888473511e-01 - 7.2169512510299683e-01 - <_> - - 1 0 1056 -3.8984280545264482e-03 -1 -2 1057 - -5.9986729174852371e-03 - - 5.2761822938919067e-01 6.6184598207473755e-01 - 4.8416310548782349e-01 - <_> - - 1 0 1058 4.5043681748211384e-03 -1 -2 1059 - 1.7799530178308487e-02 - - 1.8741579353809357e-01 4.6169349551200867e-01 - 7.0889657735824585e-01 - <_> - - 0 1 1060 -1.8462570384144783e-02 -1 -2 1061 - 1.4931300029275008e-05 - - 3.0019798874855042e-01 4.5618081092834473e-01 - 5.6107878684997559e-01 - <_> - - 0 1 1062 -8.6021229624748230e-02 -1 -2 1063 - -6.0818758356617764e-05 - - 2.3417009413242340e-01 5.6722861528396606e-01 - 4.1999641060829163e-01 - <_> - - 1 0 1064 1.2670679716393352e-03 -1 -2 1065 - 1.3699879636988044e-03 - - 6.2074822187423706e-01 5.3949588537216187e-01 - 3.8238629698753357e-01 - <_> - - 1 0 1066 3.3162781037390232e-03 -1 -2 1067 - -1.4532039640471339e-03 - - 7.0616811513900757e-01 3.0655130743980408e-01 - 4.8273730278015137e-01 - <_> - - 1 0 1068 -7.1492061018943787e-02 -1 -2 1069 - 1.9857978913933039e-03 - - 5.1931220293045044e-01 4.6424350142478943e-01 - 5.8076947927474976e-01 - <_> - - 1 0 1070 6.2516499310731888e-03 -1 -2 1071 - 2.7005500160157681e-03 - - 2.9498139023780823e-01 4.5858868956565857e-01 - 6.0223537683486938e-01 - <_> - - 0 1 1072 1.1130389757454395e-02 -1 -2 1073 - 1.5092849731445312e-02 - - 4.3578410148620605e-01 4.5615398883819580e-01 - 6.1190617084503174e-01 - <_> - - 0 1 1074 -2.7943300083279610e-02 -1 -2 1075 - 4.4036991312168539e-05 - - 6.5371441841125488e-01 3.4747231006622314e-01 - 5.3369677066802979e-01 - <_> - - 0 1 1076 -1.2232770211994648e-02 -1 -2 1077 - -6.8591412855312228e-04 - - 3.7316760420799255e-01 5.7172292470932007e-01 - 4.7933790087699890e-01 - <_> - - 0 1 1078 -3.8992990739643574e-03 -1 -2 1079 - 4.9113907152786851e-04 - - 4.0564361214637756e-01 6.1740481853485107e-01 - 4.4717541337013245e-01 - <_> - - 1 0 1080 8.2117747515439987e-03 -1 -2 1081 - -4.5564480125904083e-02 - - 6.1796981096267700e-01 2.2854949533939362e-01 - 5.2495658397674561e-01 - <_> - - 0 1 1082 -5.3631910122931004e-03 -1 -2 1083 - -1.2274970300495625e-02 - - 1.7849500477313995e-01 7.2619527578353882e-01 - 4.5503988862037659e-01 - <_> - - 0 1 1084 5.4185991175472736e-03 -1 -2 1085 - 8.1846961984410882e-04 - - 5.2529907226562500e-01 5.4452222585678101e-01 - 3.2722181081771851e-01 - <_> - - 1 0 1086 4.1358140297234058e-03 -1 -2 1087 - 3.9578010910190642e-04 - - 7.0138317346572876e-01 4.9659439921379089e-01 - 3.2955980300903320e-01 - <_> - - 0 1 1088 4.6887691132724285e-03 -1 -2 1089 - -1.8255440518260002e-02 - - 5.3626418113708496e-01 6.4961087703704834e-01 - 4.7571370005607605e-01 - <_> - - 0 1 1090 -6.2736468389630318e-03 -1 -2 1091 - 2.4320168886333704e-03 - - 2.3437410593032837e-01 4.6201181411743164e-01 - 6.8984192609786987e-01 - <_> - - 0 1 1092 -4.9617629498243332e-02 -1 -2 1093 - 1.1701210169121623e-03 - - 2.1007199585437775e-01 4.6215289831161499e-01 - 5.7971358299255371e-01 - <_> - - 0 1 1094 -4.5237291604280472e-02 -1 -2 1095 - 4.7563421539962292e-03 - - 2.1182620525360107e-01 4.8846149444580078e-01 - 6.8724989891052246e-01 - <_> - - 1 0 1096 -1.4835969544947147e-02 -1 -2 1097 - 7.7436608262360096e-04 - - 5.2751058340072632e-01 4.1723209619522095e-01 - 5.4911398887634277e-01 - <_> - - 1 0 1098 1.4835969544947147e-02 -1 -2 1099 - -8.0892542609944940e-04 - - 2.1248769760131836e-01 5.4952150583267212e-01 - 4.2077958583831787e-01 - <_> - - 0 1 1100 7.7517668250948191e-04 -1 -2 1101 - -6.7618978209793568e-03 - - 3.3219420909881592e-01 2.2129580378532410e-01 - 5.2326530218124390e-01 - <_> - - 0 1 1102 -4.0135860443115234e-02 -1 -2 1103 - -3.3651469275355339e-03 - - 1.1017960309982300e-01 3.8101008534431458e-01 - 5.6172919273376465e-01 - <_> - - 1 0 1104 7.4713007779791951e-04 -1 -2 1105 - -4.2727389372885227e-03 - - 5.7950568199157715e-01 6.3922691345214844e-01 - 4.7114381194114685e-01 - <_> - - 1 0 1106 3.6202510818839073e-03 -1 -2 1107 - 4.7307618660852313e-04 - - 3.4098839759826660e-01 3.6593028903007507e-01 - 5.3881710767745972e-01 - <_> - - 1 0 1108 3.3094909042119980e-02 -1 -2 1109 - -1.1544119566679001e-02 - - 7.1703857183456421e-01 6.3868182897567749e-01 - 4.6813040971755981e-01 - <_> - - 0 1 1110 -7.4234469793736935e-03 -1 -2 1111 - -4.2252950370311737e-03 - - 3.2637009024620056e-01 5.7678192853927612e-01 - 4.3464180827140808e-01 - <_> - - 0 1 1112 1.8133109435439110e-02 -1 -2 1113 - 7.0903049781918526e-03 - - 4.6978279948234558e-01 4.4373890757560730e-01 - 6.0616689920425415e-01 - <_> - - 0 1 1114 -1.3272940181195736e-02 -1 -2 1115 - 1.4632199599873275e-04 - - 6.5585112571716309e-01 3.3763539791107178e-01 - 5.0916552543640137e-01 - <_> - - 0 1 1116 -3.5790191031992435e-03 -1 -2 1117 - -4.6997101162560284e-04 - - 2.9478839039802551e-01 5.5569821596145630e-01 - 4.6654561161994934e-01 - <_> - - 0 1 1118 -4.8179440200328827e-02 -1 -2 1119 - -9.2581362696364522e-04 - - 7.3383557796478271e-01 3.5438719391822815e-01 - 5.2851498126983643e-01 - <_> - - 0 1 1120 -1.4780730009078979e-02 -1 -2 1121 - -1.0027450323104858e-01 - - 1.9444419443607330e-01 9.9049292504787445e-02 - 5.1398539543151855e-01 - <_> - - 0 1 1122 -9.3848101096227765e-04 -1 -2 1123 - -2.8861360624432564e-03 - - 5.8271098136901855e-01 3.4414279460906982e-01 - 5.1488387584686279e-01 - <_> - - 1 0 1124 -4.3682761490345001e-02 -1 -2 1125 - 2.6115700602531433e-03 - - 5.2079981565475464e-01 4.8355031013488770e-01 - 6.3222199678421021e-01 - <_> - - 1 0 1126 4.3682761490345001e-02 -1 -2 1127 - 1.7179530113935471e-03 - - 1.3645380735397339e-01 4.5373201370239258e-01 - 6.0667508840560913e-01 - <_> - - 1 0 1128 -3.3964909613132477e-02 -1 -2 1129 - -1.0993590112775564e-03 - - 4.9683749675750732e-01 5.8316808938980103e-01 - 4.6882399916648865e-01 - <_> - - 1 0 1130 5.4301079362630844e-02 -1 -2 1131 - 1.0993590112775564e-03 - - 7.5682890415191650e-01 4.3301481008529663e-01 - 5.7684689760208130e-01 - <_> - - 1 0 1132 -1.4954120160837192e-05 -1 -2 1133 - 3.1415868550539017e-02 - - 4.4432818889617920e-01 5.2744728326797485e-01 - 3.0378559231758118e-01 - <_> - - 1 0 1134 1.0831849649548531e-02 -1 -2 1135 - 8.6545711383223534e-04 - - 3.5817208886146545e-01 5.9375840425491333e-01 - 4.2946299910545349e-01 - <_> - - 1 0 1136 2.2743160370737314e-03 -1 -2 1137 - 3.9340821094810963e-03 - - 5.9545767307281494e-01 4.7922229766845703e-01 - 5.8561331033706665e-01 - <_> - - 1 0 1138 8.1451907753944397e-03 -1 -2 1139 - -5.2763288840651512e-03 - - 3.5734778642654419e-01 4.0260228514671326e-01 - 5.7647430896759033e-01 - <_> - - 1 0 1140 -8.3787851035594940e-03 -1 -2 1141 - 1.5621910570189357e-03 - - 4.9813330173492432e-01 4.7365880012512207e-01 - 5.5836081504821777e-01 - <_> - - 1 0 1142 3.2318739686161280e-03 -1 -2 1143 - 6.6804019734263420e-03 - - 6.1674368381500244e-01 4.1314241290092468e-01 - 6.2806951999664307e-01 - <_> - - 0 1 1144 -3.3396480139344931e-03 -1 -2 1145 - -2.0933480560779572e-01 - - 3.4463581442832947e-01 1.0386580228805542e-01 - 5.2044892311096191e-01 - <_> - - 1 0 1146 6.3805822283029556e-03 -1 -2 1147 - -6.0137799009680748e-03 - - 2.1674020588397980e-01 6.7383992671966553e-01 - 4.8966509103775024e-01 - <_> - - 1 0 1148 -8.1756077706813812e-03 -1 -2 1149 - 6.3951779156923294e-04 - - 5.1779150962829590e-01 4.8196458816528320e-01 - 5.4644381999969482e-01 - <_> - - 1 0 1150 1.0127760469913483e-03 -1 -2 1151 - 4.9784599104896188e-04 - - 3.4235960245132446e-01 4.4884610176086426e-01 - 5.9126710891723633e-01 - <_> - - 1 0 1152 1.3596490316558629e-04 -1 -2 1153 - 1.3571660034358501e-02 - - 5.5688631534576416e-01 5.1610678434371948e-01 - 1.7130009829998016e-01 - <_> - - 1 0 1154 3.0259079721872695e-05 -1 -2 1155 - -3.2625840976834297e-03 - - 4.9162039160728455e-01 6.4046627283096313e-01 - 2.8590849041938782e-01 - <_> - - 1 0 1156 -1.9217010412830859e-04 -1 -2 1157 - 2.1993879228830338e-02 - - 5.4592829942703247e-01 4.7157138586044312e-01 - 5.6900751590728760e-01 - <_> - - 1 0 1158 7.8907777788117528e-04 -1 -2 1159 - 5.0893891602754593e-04 - - 3.2798269391059875e-01 4.3020078539848328e-01 - 5.6960451602935791e-01 - <_> - - 1 0 1160 1.1662710312521085e-04 -1 -2 1161 - 8.0604078248143196e-03 - - 5.3872352838516235e-01 5.0214231014251709e-01 - 5.9653222560882568e-01 - <_> - - 1 0 1162 9.5925969071686268e-04 -1 -2 1163 - -1.9526129588484764e-02 - - 3.4734940528869629e-01 6.4755451679229736e-01 - 4.6437820792198181e-01 - <_> - 78 - 3.8236038208007812e+01 - - <_> - - 0 1 1164 4.1242439299821854e-02 -1 -2 1165 - 1.5626709908246994e-02 - - 3.3933150768280029e-01 5.1041001081466675e-01 - 7.7728152275085449e-01 - <_> - - 0 1 1166 2.9947189614176750e-04 -1 -2 1167 - -1.0037609608843923e-03 - - 3.6646738648414612e-01 5.4056507349014282e-01 - 3.9262050390243530e-01 - <_> - - 0 1 1168 6.8128242855891585e-04 -1 -2 1169 - 1.3098999625071883e-04 - - 4.2515191435813904e-01 4.1351449489593506e-01 - 6.9257462024688721e-01 - <_> - - 1 0 1170 3.1696720980107784e-03 -1 -2 1171 - -2.0587369799613953e-03 - - 3.4558731317520142e-01 2.2341939806938171e-01 - 5.2861189842224121e-01 - <_> - - 1 0 1172 -4.6395038953050971e-04 -1 -2 1173 - 3.5089480224996805e-03 - - 4.2065200209617615e-01 6.5029817819595337e-01 - 4.1175979375839233e-01 - <_> - - 1 0 1174 -2.3975980002433062e-03 -1 -2 1175 - 1.0901279747486115e-03 - - 3.6733010411262512e-01 2.9062381386756897e-01 - 5.4451119899749756e-01 - <_> - - 0 1 1176 -1.6524370585102588e-04 -1 -2 1177 - -4.1602319106459618e-04 - - 4.2335158586502075e-01 3.8863611221313477e-01 - 6.2691658735275269e-01 - <_> - - 0 1 1178 -2.3739910102449358e-04 -1 -2 1179 - 2.4739760905504227e-02 - - 5.5244511365890503e-01 4.9600958824157715e-01 - 5.3734910488128662e-01 - <_> - - 0 1 1180 -1.5342839993536472e-02 -1 -2 1181 - 1.1540469713509083e-02 - - 6.8494051694869995e-01 4.0372350811958313e-01 - 6.7869400978088379e-01 - <_> - - 1 0 1182 6.4230621792376041e-03 -1 -2 1183 - 1.2977809645235538e-02 - - 3.8146761059761047e-01 5.5270588397979736e-01 - 3.7449559569358826e-01 - <_> - - 0 1 1184 1.1063399724662304e-03 -1 -2 1185 - 1.3743690215051174e-03 - - 3.5209289193153381e-01 5.6419032812118530e-01 - 3.0750259757041931e-01 - <_> - - 0 1 1186 1.6233779489994049e-02 -1 -2 1187 - -8.1519351806491613e-04 - - 4.8888280987739563e-01 5.4563212394714355e-01 - 4.7435501217842102e-01 - <_> - - 0 1 1188 -9.0782493352890015e-02 -1 -2 1189 - 1.1665210127830505e-02 - - 2.9252481460571289e-01 4.6884548664093018e-01 - 6.2303477525711060e-01 - <_> - - 0 1 1190 -2.3286409676074982e-02 -1 -2 1191 - 2.1559339947998524e-03 - - 6.8958431482315063e-01 5.3558021783828735e-01 - 3.4234660863876343e-01 - <_> - - 0 1 1192 -4.3167220428586006e-03 -1 -2 1193 - 1.5610599657520652e-03 - - 5.9370762109756470e-01 4.7086599469184875e-01 - 2.7369970083236694e-01 - <_> - - 0 1 1194 1.4076639898121357e-02 -1 -2 1195 - 7.1018589660525322e-03 - - 5.2871561050415039e-01 5.3361928462982178e-01 - 3.2248139381408691e-01 - <_> - - 0 1 1196 -4.8221647739410400e-03 -1 -2 1197 - -5.3852899000048637e-03 - - 2.9839101433753967e-01 5.6239992380142212e-01 - 4.2959120869636536e-01 - <_> - - 1 0 1198 7.3483278974890709e-03 -1 -2 1199 - -3.5707519855350256e-03 - - 6.8139612674713135e-01 5.8579689264297485e-01 - 4.6034291386604309e-01 - <_> - - 1 0 1200 2.3340100888162851e-03 -1 -2 1201 - 4.7432780265808105e-03 - - 2.7448511123657227e-01 5.0475269556045532e-01 - 2.3627419769763947e-01 - <_> - - 0 1 1202 6.5055489540100098e-03 -1 -2 1203 - 1.2589249759912491e-02 - - 5.2422481775283813e-01 4.8236909508705139e-01 - 6.7525368928909302e-01 - <_> - - 0 1 1204 -6.3358368352055550e-03 -1 -2 1205 - -5.7639651931822300e-03 - - 1.7346349358558655e-01 6.3543808460235596e-01 - 4.5874750614166260e-01 - <_> - - 0 1 1206 1.3599749654531479e-03 -1 -2 1207 - 2.8404260054230690e-02 - - 4.5803809165954590e-01 5.1763808727264404e-01 - 1.2043850123882294e-01 - <_> - - 0 1 1208 -9.2958156019449234e-03 -1 -2 1209 - -1.1800320353358984e-03 - - 2.3379570245742798e-01 3.9028140902519226e-01 - 5.6529301404953003e-01 - <_> - - 0 1 1210 -2.0948140881955624e-03 -1 -2 1211 - 4.1679958812892437e-03 - - 5.5120289325714111e-01 5.4559761285781860e-01 - 4.7989490628242493e-01 - <_> - - 1 0 1212 5.4458891972899437e-03 -1 -2 1213 - -1.2766510481014848e-03 - - 6.1270868778228760e-01 5.3171318769454956e-01 - 3.8509321212768555e-01 - <_> - - 0 1 1214 5.9404270723462105e-04 -1 -2 1215 - 4.2309608310461044e-02 - - 5.4464370012283325e-01 5.2346438169479370e-01 - 2.2130440175533295e-01 - <_> - - 0 1 1216 5.6189671158790588e-03 -1 -2 1217 - 7.2401198558509350e-03 - - 4.9161979556083679e-01 1.4714759588241577e-01 - 4.8528939485549927e-01 - <_> - - 0 1 1218 -4.5610670931637287e-03 -1 -2 1219 - 4.5506159949582070e-05 - - 2.7737739682197571e-01 4.6264618635177612e-01 - 5.7680791616439819e-01 - <_> - - 0 1 1220 -6.1903791502118111e-03 -1 -2 1221 - 8.1186462193727493e-04 - - 1.6442899405956268e-01 4.7785910964012146e-01 - 6.2618649005889893e-01 - <_> - - 0 1 1222 1.3779809698462486e-02 -1 -2 1223 - 1.1290319962427020e-03 - - 5.2573078870773315e-01 5.4980480670928955e-01 - 3.9831069111824036e-01 - <_> - - 0 1 1224 -1.0610350000206381e-04 -1 -2 1225 - 1.6695790691301227e-04 - - 4.0335190296173096e-01 4.1493400931358337e-01 - 5.7953411340713501e-01 - <_> - - 1 0 1226 1.1290319962427020e-03 -1 -2 1227 - -1.2019349634647369e-01 - - 3.9341148734092712e-01 7.3400482535362244e-02 - 5.2025860548019409e-01 - <_> - - 0 1 1228 -1.5230740420520306e-02 -1 -2 1229 - 3.5759829916059971e-03 - - 3.7495058774948120e-01 5.0781500339508057e-01 - 6.6060662269592285e-01 - <_> - - 0 1 1230 1.3479460030794144e-02 -1 -2 1231 - -2.1162950433790684e-03 - - 4.5477110147476196e-01 3.3110061287879944e-01 - 5.3842592239379883e-01 - <_> - - 0 1 1232 -1.7877709120512009e-02 -1 -2 1233 - 1.0931970318779349e-03 - - 6.5132528543472290e-01 5.2647650241851807e-01 - 3.4569910168647766e-01 - <_> - - 0 1 1234 -3.0553159303963184e-03 -1 -2 1235 - 3.6365049891173840e-03 - - 6.2686139345169067e-01 5.3992128372192383e-01 - 4.3453970551490784e-01 - <_> - - 0 1 1236 9.7896481747739017e-05 -1 -2 1237 - -3.2714448752813041e-04 - - 3.8356059789657593e-01 3.3376678824424744e-01 - 5.5391657352447510e-01 - <_> - - 1 0 1238 4.3425030889920890e-04 -1 -2 1239 - 1.4005579985678196e-02 - - 5.7882702350616455e-01 5.2750778198242188e-01 - 2.7011251449584961e-01 - <_> - - 0 1 1240 -9.2654931358993053e-04 -1 -2 1241 - 3.9504268206655979e-03 - - 5.8522802591323853e-01 4.7283369302749634e-01 - 3.3139181137084961e-01 - <_> - - 1 0 1242 -5.8086868375539780e-04 -1 -2 1243 - -1.2018020264804363e-02 - - 4.2588108777999878e-01 5.6097871065139771e-01 - 4.8951920866966248e-01 - <_> - - 0 1 1244 -1.4521540701389313e-01 -1 -2 1245 - -6.6049019806087017e-03 - - 4.3894480913877487e-02 4.2291709780693054e-01 - 5.6162929534912109e-01 - <_> - - 1 0 1246 -3.4909751266241074e-02 -1 -2 1247 - 3.7478420417755842e-03 - - 4.7881281375885010e-01 4.8002821207046509e-01 - 5.8013892173767090e-01 - <_> - - 1 0 1248 3.3038031309843063e-02 -1 -2 1249 - 3.6872599739581347e-03 - - 7.0781761407852173e-01 4.4496241211891174e-01 - 5.9577310085296631e-01 - <_> - - 0 1 1250 -4.5311939902603626e-03 -1 -2 1251 - 4.1058510541915894e-03 - - 4.1770470142364502e-01 5.3729480504989624e-01 - 3.7369269132614136e-01 - <_> - - 0 1 1252 -8.7599847465753555e-03 -1 -2 1253 - -2.3003309965133667e-02 - - 6.6588079929351807e-01 2.6479220390319824e-01 - 5.1018178462982178e-01 - <_> - - 0 1 1254 5.3664818406105042e-03 -1 -2 1255 - 3.8971770554780960e-02 - - 4.5486348867416382e-01 5.1570618152618408e-01 - 3.4364390373229980e-01 - <_> - - 0 1 1256 -2.7767190709710121e-02 -1 -2 1257 - -9.8894089460372925e-03 - - 2.3543910682201385e-01 6.8877410888671875e-01 - 5.1110517978668213e-01 - <_> - - 0 1 1258 -3.2073140610009432e-03 -1 -2 1259 - -6.7484978353604674e-04 - - 5.4388678073883057e-01 5.4511487483978271e-01 - 4.8313531279563904e-01 - <_> - - 0 1 1260 -5.1947520114481449e-03 -1 -2 1261 - -2.6169899501837790e-04 - - 2.1134190261363983e-01 5.2736818790435791e-01 - 3.9925870299339294e-01 - <_> - - 0 1 1262 2.2421479225158691e-03 -1 -2 1263 - -1.2139769969508052e-03 - - 4.6882608532905579e-01 5.5042350292205811e-01 - 4.3848711252212524e-01 - <_> - - 0 1 1264 -2.9469770379364491e-03 -1 -2 1265 - -3.9291830034926534e-04 - - 3.8928470015525818e-01 6.0017228126525879e-01 - 4.5616629719734192e-01 - <_> - - 1 0 1266 6.2550729513168335e-01 -1 -2 1267 - 9.7744520753622055e-03 - - 6.8125613033771515e-02 4.8130258917808533e-01 - 5.6206572055816650e-01 - <_> - - 1 0 1268 9.4378247857093811e-02 -1 -2 1269 - -1.9560910295695066e-03 - - 6.6632293164730072e-02 3.5882329940795898e-01 - 5.2954071760177612e-01 - <_> - - 0 1 1270 9.0652769431471825e-03 -1 -2 1271 - 4.2138071148656309e-04 - - 4.8226881027221680e-01 4.6703329682350159e-01 - 5.6831127405166626e-01 - <_> - - 1 0 1272 -4.4220191193744540e-04 -1 -2 1273 - -4.7313501127064228e-03 - - 5.3607952594757080e-01 6.1372458934783936e-01 - 3.1880891323089600e-01 - <_> - - 0 1 1274 1.5395509544759989e-03 -1 -2 1275 - 2.4315000046044588e-03 - - 4.4877201318740845e-01 4.8941668868064880e-01 - 6.7166537046432495e-01 - <_> - - 0 1 1276 -1.5581619925796986e-02 -1 -2 1277 - 1.0816920548677444e-03 - - 3.3367419242858887e-01 4.7182199358940125e-01 - 5.9606271982192993e-01 - <_> - - 0 1 1278 -2.2197659127414227e-03 -1 -2 1279 - -9.3048671260476112e-04 - - 3.5885548591613770e-01 6.2187129259109497e-01 - 4.8173001408576965e-01 - <_> - - 0 1 1280 -4.7418707981705666e-03 -1 -2 1281 - -6.2950369901955128e-03 - - 2.5500270724296570e-01 6.7280787229537964e-01 - 5.0510638952255249e-01 - <_> - - 0 1 1282 3.5216049291193485e-03 -1 -2 1283 - -2.4289379362016916e-03 - - 5.4019099473953247e-01 5.4194617271423340e-01 - 4.3471428751945496e-01 - <_> - - 0 1 1284 -2.5261470582336187e-03 -1 -2 1285 - -1.4817339833825827e-03 - - 6.9706249237060547e-01 3.2634168863296509e-01 - 4.9178731441497803e-01 - <_> - - 0 1 1286 -2.2474530339241028e-01 -1 -2 1287 - 2.8342509176582098e-03 - - 7.2937291115522385e-03 4.5792299509048462e-01 - 5.3798812627792358e-01 - <_> - - 0 1 1288 -2.0821610465645790e-02 -1 -2 1289 - 1.4896340144332498e-04 - - 6.0240888595581055e-01 3.3361440896987915e-01 - 4.9628159403800964e-01 - <_> - - 0 1 1290 -3.3524499740451574e-03 -1 -2 1291 - -3.7279881536960602e-02 - - 3.5587510466575623e-01 1.6985629498958588e-01 - 5.2089858055114746e-01 - <_> - - 1 0 1292 1.3896770542487502e-04 -1 -2 1293 - -3.1912620761431754e-04 - - 5.5906862020492554e-01 5.8487337827682495e-01 - 3.7958368659019470e-01 - <_> - - 1 0 1294 5.4003461264073849e-04 -1 -2 1295 - 3.8956850767135620e-03 - - 5.6702882051467896e-01 5.1826947927474976e-01 - 3.3277091383934021e-01 - <_> - - 1 0 1296 1.6084529925137758e-03 -1 -2 1297 - -5.7474587811157107e-04 - - 5.4104858636856079e-01 6.0226422548294067e-01 - 3.6446440219879150e-01 - <_> - - 1 0 1298 1.3435039669275284e-02 -1 -2 1299 - 2.1368139423429966e-03 - - 3.4412819147109985e-01 5.2924340963363647e-01 - 2.7470758557319641e-01 - <_> - - 1 0 1300 1.4157629571855068e-02 -1 -2 1301 - 5.3884391672909260e-03 - - 8.0278682708740234e-01 5.2223151922225952e-01 - 3.5867279767990112e-01 - <_> - - 0 1 1302 8.8013410568237305e-03 -1 -2 1303 - 3.8858849438838661e-04 - - 4.9003869295120239e-01 4.6810561418533325e-01 - 5.7219529151916504e-01 - <_> - - 0 1 1304 -2.2143588867038488e-03 -1 -2 1305 - -8.4642972797155380e-03 - - 5.3888058662414551e-01 6.6755378246307373e-01 - 3.4484419226646423e-01 - <_> - - 1 0 1306 1.5044390223920345e-02 -1 -2 1307 - 7.6346402056515217e-03 - - 9.2396140098571777e-01 4.8848968744277954e-01 - 6.3060528039932251e-01 - <_> - - 1 0 1308 3.3895121305249631e-04 -1 -2 1309 - 2.1157610171940178e-04 - - 3.9974310994148254e-01 5.6639820337295532e-01 - 3.9729809761047363e-01 - <_> - - 1 0 1310 -2.7514949440956116e-02 -1 -2 1311 - 5.1603060215711594e-02 - - 5.2010637521743774e-01 5.1407301425933838e-01 - 1.2451309710741043e-01 - <_> - - 1 0 1312 3.7510651163756847e-03 -1 -2 1313 - -2.1457639522850513e-03 - - 3.8020950555801392e-01 3.3094480633735657e-01 - 5.4745388031005859e-01 - <_> - - 1 0 1314 -5.8178009930998087e-04 -1 -2 1315 - -9.3638541875407100e-04 - - 4.8926019668579102e-01 5.9373992681503296e-01 - 4.6646690368652344e-01 - <_> - - 1 0 1316 4.1667491197586060e-02 -1 -2 1317 - -6.7763780243694782e-03 - - 7.0213532447814941e-01 3.2227510213851929e-01 - 5.0683951377868652e-01 - <_> - - 1 0 1318 -2.9170580673962831e-03 -1 -2 1319 - 3.2789530814625323e-04 - - 4.7177010774612427e-01 4.5093831419944763e-01 - 5.6511628627777100e-01 - <_> - 91 - 4.4682968139648438e+01 - - <_> - - 0 1 1320 1.1729800142347813e-02 -1 -2 1321 - 1.1712179984897375e-03 - - 3.8052248954772949e-01 3.1400179862976074e-01 - 6.8581461906433105e-01 - <_> - - 1 0 1322 9.3555096536874771e-03 -1 -2 1323 - 1.6570610459893942e-03 - - 6.8346732854843140e-01 2.9924729466438293e-01 - 5.4756778478622437e-01 - <_> - - 1 0 1324 -1.3387809740379453e-03 -1 -2 1325 - 1.7580550047568977e-04 - - 2.9414069652557373e-01 3.8969779014587402e-01 - 5.8729708194732666e-01 - <_> - - 0 1 1326 -2.9473248869180679e-03 -1 -2 1327 - 8.3220899105072021e-03 - - 3.5765719413757324e-01 5.2324008941650391e-01 - 3.2310879230499268e-01 - <_> - - 1 0 1328 7.4366689659655094e-03 -1 -2 1329 - -2.1322889369912446e-04 - - 6.7156732082366943e-01 5.4705417156219482e-01 - 3.8633960485458374e-01 - <_> - - 0 1 1330 -7.8024631366133690e-03 -1 -2 1331 - 5.6611228501424193e-04 - - 2.7714601159095764e-01 4.6891361474990845e-01 - 5.8519637584686279e-01 - <_> - - 0 1 1332 -9.2346500605344772e-03 -1 -2 1333 - -1.4676499631605111e-05 - - 2.7043971419334412e-01 5.6225502490997314e-01 - 3.5793170332908630e-01 - <_> - - 0 1 1334 9.7007937729358673e-03 -1 -2 1335 - -3.5320650786161423e-03 - - 4.1738718748092651e-01 4.1950130462646484e-01 - 5.5494689941406250e-01 - <_> - - 1 0 1336 2.1616410464048386e-02 -1 -2 1337 - 3.4567608963698149e-03 - - 2.8573909401893616e-01 6.0245329141616821e-01 - 4.3775078654289246e-01 - <_> - - 0 1 1338 2.2914320230484009e-02 -1 -2 1339 - 3.4328910987824202e-03 - - 4.6893501281738281e-01 4.6646049618721008e-01 - 5.7625621557235718e-01 - <_> - - 0 1 1340 -8.6510833352804184e-03 -1 -2 1341 - 1.4510039472952485e-03 - - 6.3817399740219116e-01 3.7114879488945007e-01 - 5.5307507514953613e-01 - <_> - - 0 1 1342 7.8191719949245453e-03 -1 -2 1343 - 2.0798550394829363e-04 - - 5.2643620967864990e-01 3.7305128574371338e-01 - 5.4457312822341919e-01 - <_> - - 0 1 1344 -3.9962218143045902e-03 -1 -2 1345 - -1.5010139577498194e-05 - - 2.4381700158119202e-01 5.3246712684631348e-01 - 3.6829888820648193e-01 - <_> - - 0 1 1346 -4.2428788729012012e-03 -1 -2 1347 - 9.1374982148408890e-03 - - 6.4814740419387817e-01 4.8961588740348816e-01 - 6.5588432550430298e-01 - <_> - - 1 0 1348 8.8254585862159729e-03 -1 -2 1349 - 9.4092212384566665e-04 - - 3.6138701438903809e-01 5.5028957128524780e-01 - 3.6325180530548096e-01 - <_> - - 0 1 1350 -1.2503350153565407e-02 -1 -2 1351 - 8.6759645491838455e-03 - - 2.2611320018768311e-01 4.9878901243209839e-01 - 6.8471962213516235e-01 - <_> - - 0 1 1352 -1.0416760109364986e-02 -1 -2 1353 - 2.7432460337877274e-03 - - 2.4462990462779999e-01 3.5115250945091248e-01 - 5.3998267650604248e-01 - <_> - - 0 1 1354 -4.2385691776871681e-03 -1 -2 1355 - 1.8325870856642723e-02 - - 6.8236732482910156e-01 4.8915800452232361e-01 - 7.1356189250946045e-01 - <_> - - 0 1 1356 -2.4334540590643883e-02 -1 -2 1357 - 4.6469361404888332e-04 - - 3.5225218534469604e-01 4.0498688817024231e-01 - 5.5158257484436035e-01 - <_> - - 1 0 1358 3.4260009415447712e-03 -1 -2 1359 - -2.5827318895608187e-03 - - 4.1267699003219604e-01 2.8994289040565491e-01 - 5.3864318132400513e-01 - <_> - - 1 0 1360 1.0545699624344707e-03 -1 -2 1361 - -9.1257691383361816e-04 - - 3.7713441252708435e-01 5.8273869752883911e-01 - 4.2675569653511047e-01 - <_> - - 0 1 1362 2.6589010376483202e-03 -1 -2 1363 - 4.8598358407616615e-03 - - 4.6881249547004700e-01 4.8539221286773682e-01 - 6.1636447906494141e-01 - <_> - - 1 0 1364 8.0638676881790161e-03 -1 -2 1365 - -7.5898370705544949e-03 - - 1.7491950094699860e-01 6.8261897563934326e-01 - 4.8940700292587280e-01 - <_> - - 0 1 1366 3.6368070868775249e-04 -1 -2 1367 - 6.2594950199127197e-02 - - 4.6145960688591003e-01 5.1830172538757324e-01 - 2.6866960525512695e-01 - <_> - - 0 1 1368 -4.9753207713365555e-03 -1 -2 1369 - -2.0880119409412146e-03 - - 1.7584669589996338e-01 6.3693821430206299e-01 - 4.9300441145896912e-01 - <_> - - 1 0 1370 9.5644511748105288e-04 -1 -2 1371 - -3.1721461564302444e-02 - - 4.1393989324569702e-01 6.0455572605133057e-01 - 4.8163640499114990e-01 - <_> - - 0 1 1372 1.2898689601570368e-03 -1 -2 1373 - 9.8405163735151291e-03 - - 5.4508107900619507e-01 2.9240009188652039e-01 - 6.6996061801910400e-01 - <_> - - 1 0 1374 1.2237089686095715e-03 -1 -2 1375 - -8.4232585504651070e-03 - - 6.2828367948532104e-01 5.9865701198577881e-01 - 4.8525801301002502e-01 - <_> - - 0 1 1376 -7.2726322105154395e-04 -1 -2 1377 - 4.6842931769788265e-03 - - 3.3400490880012512e-01 5.1689237356185913e-01 - 2.6794800162315369e-01 - <_> - - 0 1 1378 -1.0379579616710544e-03 -1 -2 1379 - 9.1342730447649956e-03 - - 5.9257918596267700e-01 5.4377281665802002e-01 - 4.3468001484870911e-01 - <_> - - 0 1 1380 1.4971119817346334e-03 -1 -2 1381 - 1.5762320253998041e-03 - - 4.1295009851455688e-01 4.5228740572929382e-01 - 6.5562921762466431e-01 - <_> - - 0 1 1382 8.7496247142553329e-03 -1 -2 1383 - -8.5103599121794105e-04 - - 4.5320340991020203e-01 3.7859839200973511e-01 - 5.4169750213623047e-01 - <_> - - 0 1 1384 -1.7325570806860924e-02 -1 -2 1385 - -8.3266440778970718e-03 - - 6.8842482566833496e-01 3.0913260579109192e-01 - 5.2436548471450806e-01 - <_> - - 0 1 1386 1.5157909729168750e-05 -1 -2 1387 - 1.8041470320895314e-03 - - 4.7657939791679382e-01 4.7253859043121338e-01 - 5.7165551185607910e-01 - <_> - - 1 0 1388 3.0691560823470354e-03 -1 -2 1389 - -5.2225510444259271e-05 - - 2.1433599293231964e-01 5.6532102823257446e-01 - 4.3851110339164734e-01 - <_> - - 1 0 1390 1.0072169970953837e-04 -1 -2 1391 - 1.3573700562119484e-04 - - 5.9247761964797974e-01 4.5734488964080811e-01 - 5.7693827152252197e-01 - <_> - - 1 0 1392 9.2137878527864814e-04 -1 -2 1393 - 3.0316581251099706e-04 - - 5.9926092624664307e-01 3.6100810766220093e-01 - 5.0493258237838745e-01 - <_> - - 1 0 1394 3.9582479745149612e-02 -1 -2 1395 - 4.7519680112600327e-02 - - 1.5384890139102936e-01 5.2161407470703125e-01 - 1.4283910393714905e-01 - <_> - - 1 0 1396 1.8871759995818138e-02 -1 -2 1397 - -3.9876459049992263e-04 - - 2.8255069255828857e-01 4.0350168943405151e-01 - 5.4377931356430054e-01 - <_> - - 0 1 1398 4.6556600136682391e-04 -1 -2 1399 - 6.7090610973536968e-03 - - 4.6689969301223755e-01 5.3313547372817993e-01 - 4.1365718841552734e-01 - <_> - - 0 1 1400 -1.8931160448119044e-03 -1 -2 1401 - -1.3056949712336063e-02 - - 7.1551632881164551e-01 3.1178998947143555e-01 - 5.2084398269653320e-01 - <_> - - 1 0 1402 -1.9484119547996670e-04 -1 -2 1403 - 1.5093220099515747e-05 - - 4.6376588940620422e-01 4.5616531372070312e-01 - 5.4452341794967651e-01 - <_> - - 1 0 1404 -7.1617960202274844e-06 -1 -2 1405 - 3.0164679628796875e-04 - - 4.1931080818176270e-01 5.9662377834320068e-01 - 4.1005000472068787e-01 - <_> - - 0 1 1406 4.4195181690156460e-03 -1 -2 1407 - -7.3984181508421898e-03 - - 4.8450559377670288e-01 6.2068462371826172e-01 - 4.9312090873718262e-01 - <_> - - 1 0 1408 -7.8031201846897602e-03 -1 -2 1409 - -1.0731429792940617e-02 - - 5.2824628353118896e-01 9.1048341989517212e-01 - 3.4559220075607300e-01 - <_> - - 0 1 1410 1.4246780192479491e-03 -1 -2 1411 - -8.2717568147927523e-05 - - 4.7085541486740112e-01 5.6516230106353760e-01 - 4.7310239076614380e-01 - <_> - - 1 0 1412 4.4803409837186337e-03 -1 -2 1413 - 3.0789140146225691e-03 - - 6.1758869886398315e-01 5.1395332813262939e-01 - 3.4230878949165344e-01 - <_> - - 1 0 1414 -1.1310289846733212e-03 -1 -2 1415 - -1.0410690447315574e-03 - - 4.9182820320129395e-01 5.9420871734619141e-01 - 4.9230429530143738e-01 - <_> - - 1 0 1416 1.1648540385067463e-03 -1 -2 1417 - 9.0057362103834748e-04 - - 6.4052718877792358e-01 4.5043969154357910e-01 - 6.1920768022537231e-01 - <_> - - 0 1 1418 6.8781538866460323e-03 -1 -2 1419 - -3.5283900797367096e-02 - - 5.3748130798339844e-01 2.2471010684967041e-01 - 5.2171707153320312e-01 - <_> - - 0 1 1420 -1.3320200378075242e-03 -1 -2 1421 - -2.3177571129053831e-03 - - 2.5547030568122864e-01 3.7925159931182861e-01 - 5.2432268857955933e-01 - <_> - - 0 1 1422 2.1332940377760679e-04 -1 -2 1423 - 1.3467900454998016e-02 - - 3.8603371381759644e-01 5.3806877136230469e-01 - 4.1783639788627625e-01 - <_> - - 0 1 1424 -1.2829169863834977e-03 -1 -2 1425 - 5.1571638323366642e-04 - - 6.1336231231689453e-01 4.0285378694534302e-01 - 5.5368518829345703e-01 - <_> - - 0 1 1426 3.9254198782145977e-03 -1 -2 1427 - -3.3780589699745178e-02 - - 5.2799212932586670e-01 2.3346750438213348e-01 - 5.1759117841720581e-01 - <_> - - 0 1 1428 -3.7853721529245377e-02 -1 -2 1429 - -4.0752900531515479e-04 - - 1.0748530179262161e-01 5.3459298610687256e-01 - 4.1989380121231079e-01 - <_> - - 0 1 1430 -3.1193809118121862e-03 -1 -2 1431 - -1.5714969485998154e-02 - - 3.8558250665664673e-01 3.3351901173591614e-01 - 5.2632021903991699e-01 - <_> - - 0 1 1432 -7.8525702701881528e-04 -1 -2 1433 - -2.8750501223839819e-04 - - 5.8603972196578979e-01 5.4377847909927368e-01 - 3.7161049246788025e-01 - <_> - - 1 0 1434 2.8016859665513039e-02 -1 -2 1435 - -1.9018839811906219e-03 - - 3.3307549357414246e-01 5.3665977716445923e-01 - 4.6937939524650574e-01 - <_> - - 1 0 1436 2.0647559314966202e-02 -1 -2 1437 - 4.3002571910619736e-03 - - 1.0069560259580612e-01 4.8160359263420105e-01 - 6.2156772613525391e-01 - <_> - - 0 1 1438 1.3459140434861183e-02 -1 -2 1439 - -1.0320040397346020e-02 - - 5.4619538784027100e-01 4.5784530043601990e-01 - 5.4193097352981567e-01 - <_> - - 1 0 1440 3.1990748643875122e-01 -1 -2 1441 - 9.2198798665776849e-04 - - 2.0080469548702240e-01 5.1932811737060547e-01 - 3.9121940732002258e-01 - <_> - - 0 1 1442 4.1852539288811386e-04 -1 -2 1443 - 3.5891108564101160e-04 - - 4.2997440695762634e-01 4.3445029854774475e-01 - 5.5319738388061523e-01 - <_> - - 0 1 1444 -2.0992439985275269e-01 -1 -2 1445 - -4.9328152090311050e-03 - - 1.0757210105657578e-01 5.7627969980239868e-01 - 4.5746439695358276e-01 - <_> - - 1 0 1446 2.3409130517393351e-03 -1 -2 1447 - 4.7120270319283009e-03 - - 7.4768078327178955e-01 5.2617651224136353e-01 - 4.5055508613586426e-01 - <_> - - 0 1 1448 2.8713190928101540e-02 -1 -2 1449 - -2.6156550738960505e-03 - - 4.4071030616760254e-01 4.2442709207534790e-01 - 6.8929767608642578e-01 - <_> - - 0 1 1450 -1.3558969832956791e-02 -1 -2 1451 - -3.0331799644045532e-04 - - 1.2522679567337036e-01 4.0777918696403503e-01 - 5.4428178071975708e-01 - <_> - - 0 1 1452 -5.5601762142032385e-04 -1 -2 1453 - 2.4025330785661936e-03 - - 5.3780037164688110e-01 3.1665799021720886e-01 - 5.2857381105422974e-01 - <_> - - 1 0 1454 -3.4089901018887758e-03 -1 -2 1455 - 8.0019602319225669e-04 - - 4.9052149057388306e-01 4.5227360725402832e-01 - 5.5806142091751099e-01 - <_> - - 1 0 1456 2.1901070140302181e-03 -1 -2 1457 - 3.3745369873940945e-03 - - 6.6126817464828491e-01 5.1077651977539062e-01 - 3.3869299292564392e-01 - <_> - - 1 0 1458 8.0019602319225669e-04 -1 -2 1459 - 1.7346069216728210e-02 - - 5.7075601816177368e-01 5.0160211324691772e-01 - 6.3064599037170410e-01 - <_> - - 0 1 1460 -1.9568449351936579e-03 -1 -2 1461 - -1.1229019612073898e-02 - - 3.0178061127662659e-01 6.2938511371612549e-01 - 4.5204889774322510e-01 - <_> - - 0 1 1462 -2.6608388870954514e-03 -1 -2 1463 - -1.1615100316703320e-02 - - 3.3440071344375610e-01 2.8253790736198425e-01 - 5.1509708166122437e-01 - <_> - - 0 1 1464 -9.5248602330684662e-02 -1 -2 1465 - 7.3701781220734119e-03 - - 1.3982650637626648e-01 5.2939987182617188e-01 - 2.3317280411720276e-01 - <_> - - 1 0 1466 -1.4953900128602982e-02 -1 -2 1467 - 5.7038792874664068e-04 - - 4.9404659867286682e-01 5.4665708541870117e-01 - 4.6267679333686829e-01 - <_> - - 1 0 1468 5.8516198769211769e-03 -1 -2 1469 - 2.1150549582671374e-04 - - 6.2700408697128296e-01 5.5081409215927124e-01 - 4.0618729591369629e-01 - <_> - - 1 0 1470 -6.9679190346505493e-06 -1 -2 1471 - -7.9677387839183211e-04 - - 4.0965679287910461e-01 5.6155568361282349e-01 - 4.6668860316276550e-01 - <_> - - 1 0 1472 1.9459480419754982e-02 -1 -2 1473 - -1.1160830035805702e-02 - - 2.3114809393882751e-01 3.0870118737220764e-01 - 5.5146622657775879e-01 - <_> - - 1 0 1474 1.4056149870157242e-02 -1 -2 1475 - -3.2958350493572652e-04 - - 7.0050561428070068e-01 5.7974857091903687e-01 - 4.6916508674621582e-01 - <_> - - 0 1 1476 -5.4636420682072639e-03 -1 -2 1477 - 5.8881669247057289e-05 - - 5.9285950660705566e-01 3.7413978576660156e-01 - 5.1701688766479492e-01 - <_> - - 0 1 1478 6.6343429498374462e-03 -1 -2 1479 - 4.5263409614562988e-02 - - 5.4149878025054932e-01 5.1803272962570190e-01 - 1.5296840667724609e-01 - <_> - - 0 1 1480 -8.0646127462387085e-03 -1 -2 1481 - 4.7389548853971064e-04 - - 2.5154680013656616e-01 5.1219987869262695e-01 - 3.7259489297866821e-01 - <_> - - 1 0 1482 1.4877359717502259e-05 -1 -2 1483 - 2.4321159347891808e-02 - - 5.5324357748031616e-01 4.9607661366462708e-01 - 5.9833151102066040e-01 - <_> - - 0 1 1484 6.9931396865285933e-05 -1 -2 1485 - 2.6287760119885206e-03 - - 4.1639530658721924e-01 5.8801448345184326e-01 - 3.3996629714965820e-01 - <_> - - 1 0 1486 3.8190539926290512e-03 -1 -2 1487 - -2.5989150628447533e-02 - - 7.8466212749481201e-01 3.2881140708923340e-01 - 5.1550877094268799e-01 - <_> - - 0 1 1488 1.2062400346621871e-03 -1 -2 1489 - -1.5557400183752179e-03 - - 4.5960599184036255e-01 3.1269869208335876e-01 - 7.1833992004394531e-01 - <_> - - 1 0 1490 -2.2691930644214153e-03 -1 -2 1491 - 2.3287249496206641e-04 - - 5.2740061283111572e-01 4.8786661028862000e-01 - 5.6151527166366577e-01 - <_> - - 1 0 1492 -5.5999699980020523e-03 -1 -2 1493 - -1.0496189817786217e-02 - - 5.1608121395111084e-01 5.7016140222549438e-01 - 3.2048508524894714e-01 - <_> - - 0 1 1494 -1.4814930182183161e-05 -1 -2 1495 - -6.4287078566849232e-04 - - 5.5388379096984863e-01 5.3494292497634888e-01 - 4.4721511006355286e-01 - <_> - - 0 1 1496 -1.8891949730459601e-04 -1 -2 1497 - -9.0413521975278854e-03 - - 5.0128370523452759e-01 2.5629359483718872e-01 - 4.5033830404281616e-01 - <_> - - 1 0 1498 7.9534705728292465e-03 -1 -2 1499 - -2.7908999472856522e-03 - - 2.6304998993873596e-01 5.7565087080001831e-01 - 4.8548638820648193e-01 - <_> - - 1 0 1500 3.2857100013643503e-03 -1 -2 1501 - 7.7063008211553097e-04 - - 4.0847519040107727e-01 4.0733560919761658e-01 - 5.9202408790588379e-01 - <_> - 97 - 4.7763450622558594e+01 - - <_> - - 0 1 1502 6.3021942973136902e-02 -1 -2 1503 - -2.8374609537422657e-03 - - 3.4193828701972961e-01 6.8295639753341675e-01 - 4.4045230746269226e-01 - <_> - - 0 1 1504 4.6461950987577438e-02 -1 -2 1505 - 2.9152540490031242e-02 - - 4.3917450308799744e-01 4.6010631322860718e-01 - 6.3579368591308594e-01 - <_> - - 1 0 1506 -1.4000290320836939e-05 -1 -2 1507 - -1.2757079675793648e-03 - - 3.7300100922584534e-01 3.0938240885734558e-01 - 5.9013700485229492e-01 - <_> - - 0 1 1508 1.3596529606729746e-03 -1 -2 1509 - 1.7991929780691862e-04 - - 4.3375650048255920e-01 4.2175039649009705e-01 - 5.8468478918075562e-01 - <_> - - 1 0 1510 -1.4166639630275313e-05 -1 -2 1511 - 6.0252390539972112e-05 - - 4.0846911072731018e-01 5.0872868299484253e-01 - 7.2771841287612915e-01 - <_> - - 1 0 1512 6.4320368692278862e-03 -1 -2 1513 - 4.6682319953106344e-04 - - 2.9679030179977417e-01 4.1104629635810852e-01 - 5.5812197923660278e-01 - <_> - - 0 1 1514 5.7436279021203518e-03 -1 -2 1515 - 3.2019240316003561e-03 - - 4.2873099446296692e-01 4.2661958932876587e-01 - 6.4440459012985229e-01 - <_> - - 1 0 1516 -5.7637941790744662e-04 -1 -2 1517 - -3.7901920732110739e-03 - - 4.0848249197006226e-01 3.1819209456443787e-01 - 5.2306932210922241e-01 - <_> - - 1 0 1518 4.8914109356701374e-03 -1 -2 1519 - 4.6459292061626911e-03 - - 3.5483568906784058e-01 5.6105977296829224e-01 - 2.6938489079475403e-01 - <_> - - 0 1 1520 -6.8799369037151337e-03 -1 -2 1521 - -1.8147470429539680e-02 - - 6.2354081869125366e-01 2.8619819879531860e-01 - 5.2268481254577637e-01 - <_> - - 1 0 1522 1.1409220314817503e-04 -1 -2 1523 - -5.4334272863343358e-04 - - 3.2578331232070923e-01 3.8829690217971802e-01 - 5.3411662578582764e-01 - <_> - - 0 1 1524 -2.7602489572018385e-03 -1 -2 1525 - -1.9730569329112768e-03 - - 6.3539659976959229e-01 5.8807611465454102e-01 - 4.5930901169776917e-01 - <_> - - 1 0 1526 2.4565239436924458e-03 -1 -2 1527 - 1.9392010290175676e-04 - - 3.1340101361274719e-01 5.2771317958831787e-01 - 3.6041069030761719e-01 - <_> - - 0 1 1528 7.8643016517162323e-02 -1 -2 1529 - 6.5276869572699070e-03 - - 5.2903419733047485e-01 4.6544799208641052e-01 - 6.0449051856994629e-01 - <_> - - 0 1 1530 -7.8716799616813660e-02 -1 -2 1531 - 5.7298499159514904e-03 - - 2.5411269068717957e-01 4.3669191002845764e-01 - 5.8228862285614014e-01 - <_> - - 1 0 1532 6.2386557692661881e-04 -1 -2 1533 - -8.5267230868339539e-02 - - 5.4726922512054443e-01 1.4616079628467560e-01 - 5.1818108558654785e-01 - <_> - - 1 0 1534 4.0981110185384750e-02 -1 -2 1535 - 7.7135749161243439e-03 - - 1.2701350450515747e-01 4.8326849937438965e-01 - 2.2235789895057678e-01 - <_> - - 0 1 1536 -6.8663940764963627e-03 -1 -2 1537 - 1.4559639617800713e-02 - - 5.9189289808273315e-01 4.7615069150924683e-01 - 5.7272237539291382e-01 - <_> - - 0 1 1538 -1.0064310394227505e-02 -1 -2 1539 - 3.6274080630391836e-03 - - 3.6367309093475342e-01 5.2717310190200806e-01 - 2.7405250072479248e-01 - <_> - - 0 1 1540 -2.3421540390700102e-03 -1 -2 1541 - -2.4686409160494804e-02 - - 5.4977840185165405e-01 6.0598951578140259e-01 - 4.9603140354156494e-01 - <_> - - 1 0 1542 1.9456120207905769e-04 -1 -2 1543 - 3.1714211218059063e-04 - - 3.7694650888442993e-01 4.0623620152473450e-01 - 5.6682151556015015e-01 - <_> - - 0 1 1544 2.0793990697711706e-03 -1 -2 1545 - 1.7982709687203169e-03 - - 4.6186569333076477e-01 4.8675051331520081e-01 - 6.5184497833251953e-01 - <_> - - 0 1 1546 -2.2287059982772917e-04 -1 -2 1547 - 3.2623921288177371e-04 - - 5.6775957345962524e-01 3.7107339501380920e-01 - 5.6766051054000854e-01 - <_> - - 0 1 1548 -6.6792681813240051e-02 -1 -2 1549 - -1.4869889710098505e-03 - - 2.5115218758583069e-01 3.8867509365081787e-01 - 5.2622538805007935e-01 - <_> - - 0 1 1550 -5.0454870797693729e-03 -1 -2 1551 - -4.8297587782144547e-03 - - 6.5574729442596436e-01 5.9341061115264893e-01 - 4.2859220504760742e-01 - <_> - - 1 0 1552 -1.0722599690780044e-03 -1 -2 1553 - 8.7901195511221886e-03 - - 5.4260587692260742e-01 5.3513032197952271e-01 - 4.8342779278755188e-01 - <_> - - 0 1 1554 -7.1750381030142307e-03 -1 -2 1555 - 1.1251230025663972e-03 - - 2.0671689510345459e-01 5.1122522354125977e-01 - 3.4687140583992004e-01 - <_> - - 0 1 1556 1.0634710080921650e-02 -1 -2 1557 - -1.1763219721615314e-02 - - 4.4790080189704895e-01 6.2539017200469971e-01 - 4.9689871072769165e-01 - <_> - - 1 0 1558 9.2324063181877136e-02 -1 -2 1559 - 1.8991080578416586e-03 - - 2.0313039422035217e-01 5.6187218427658081e-01 - 4.0465721487998962e-01 - <_> - - 1 0 1560 -1.0510340332984924e-02 -1 -2 1561 - -7.4531312566250563e-04 - - 4.9432641267776489e-01 5.6134277582168579e-01 - 3.8453319668769836e-01 - <_> - - 1 0 1562 8.0041000619530678e-03 -1 -2 1563 - 5.8110528625547886e-03 - - 7.7598422765731812e-01 4.6247330307960510e-01 - 6.2862771749496460e-01 - <_> - - 0 1 1564 -2.7918580919504166e-02 -1 -2 1565 - 2.1739399526268244e-03 - - 2.4093140661716461e-01 5.3455048799514771e-01 - 3.5079580545425415e-01 - <_> - - 0 1 1566 -4.0639587678015232e-03 -1 -2 1567 - 6.0017139185220003e-04 - - 6.6471010446548462e-01 4.9985098838806152e-01 - 3.0221650004386902e-01 - <_> - - 1 0 1568 1.9214770291000605e-03 -1 -2 1569 - -1.3860830105841160e-02 - - 5.9191507101058960e-01 6.3517677783966064e-01 - 4.9933108687400818e-01 - <_> - - 1 0 1570 2.3006850853562355e-02 -1 -2 1571 - -1.3857929734513164e-03 - - 1.9023360311985016e-01 5.2533692121505737e-01 - 3.9858600497245789e-01 - <_> - - 0 1 1572 1.2637410545721650e-03 -1 -2 1573 - -1.4675210230052471e-02 - - 4.6661040186882019e-01 3.8231649994850159e-01 - 5.3266328573226929e-01 - <_> - - 0 1 1574 -2.9535070061683655e-03 -1 -2 1575 - -1.7189770005643368e-03 - - 7.0636558532714844e-01 3.8134628534317017e-01 - 5.2467352151870728e-01 - <_> - - 1 0 1576 -4.2484089499339461e-04 -1 -2 1577 - -8.5248658433556557e-04 - - 4.7916388511657715e-01 4.4912180304527283e-01 - 5.3709012269973755e-01 - <_> - - 1 0 1578 8.9034568518400192e-03 -1 -2 1579 - 1.4895649655954912e-05 - - 2.0764739811420441e-01 4.4476351141929626e-01 - 5.6671631336212158e-01 - <_> - - 0 1 1580 -4.7091601300053298e-04 -1 -2 1581 - 4.3084810022264719e-04 - - 5.4650712013244629e-01 5.4932618141174316e-01 - 4.5807081460952759e-01 - <_> - - 0 1 1582 -6.3893961487337947e-04 -1 -2 1583 - -7.3733746830839664e-05 - - 5.5015718936920166e-01 5.0857907533645630e-01 - 3.3056980371475220e-01 - <_> - - 0 1 1584 -8.8991485536098480e-03 -1 -2 1585 - -1.0253350250422955e-02 - - 4.2764690518379211e-01 1.1232180148363113e-01 - 5.1527231931686401e-01 - <_> - - 0 1 1586 -5.9637490659952164e-02 -1 -2 1587 - 2.1707199513912201e-02 - - 7.3867720365524292e-01 4.9962919950485229e-01 - 1.3394139707088470e-01 - <_> - - 0 1 1588 9.9107045680284500e-03 -1 -2 1589 - -1.0998300276696682e-02 - - 4.6790120005607605e-01 6.9286561012268066e-01 - 5.0120681524276733e-01 - <_> - - 1 0 1590 7.4608891736716032e-04 -1 -2 1591 - 2.9539171373471618e-04 - - 5.8335822820663452e-01 3.8263911008834839e-01 - 5.5663508176803589e-01 - <_> - - 1 0 1592 5.0054129213094711e-02 -1 -2 1593 - -7.2330660186707973e-03 - - 3.0027210712432861e-01 5.9080427885055542e-01 - 5.0008708238601685e-01 - <_> - - 0 1 1594 -2.6863380335271358e-03 -1 -2 1595 - -1.0195849463343620e-03 - - 3.9750349521636963e-01 3.6976858973503113e-01 - 5.7561928033828735e-01 - <_> - - 0 1 1596 -2.0204920321702957e-02 -1 -2 1597 - 2.1340379025787115e-03 - - 6.3752681016921997e-01 5.3632658720016479e-01 - 4.4331708550453186e-01 - <_> - - 0 1 1598 -1.8348889425396919e-03 -1 -2 1599 - -5.9489468112587929e-03 - - 5.8289992809295654e-01 2.6806709170341492e-01 - 4.6428859233856201e-01 - <_> - - 0 1 1600 -2.3030120064504445e-04 -1 -2 1601 - 5.0581009127199650e-03 - - 5.4753202199935913e-01 5.3208339214324951e-01 - 4.6464928984642029e-01 - <_> - - 0 1 1602 -5.1950011402368546e-04 -1 -2 1603 - -6.8620947422459722e-04 - - 5.2327448129653931e-01 4.9350860714912415e-01 - 3.1031179428100586e-01 - <_> - - 0 1 1604 -7.4936267919838428e-03 -1 -2 1605 - -1.5682930126786232e-02 - - 2.8830468654632568e-01 3.6403131484985352e-01 - 5.3687548637390137e-01 - <_> - - 0 1 1606 -3.2649750355631113e-03 -1 -2 1607 - 3.8463930832222104e-04 - - 6.4686310291290283e-01 5.2596598863601685e-01 - 3.8314279913902283e-01 - <_> - - 1 0 1608 4.4492390006780624e-03 -1 -2 1609 - 2.3118320852518082e-02 - - 2.0868189632892609e-01 4.9785330891609192e-01 - 5.9612572193145752e-01 - <_> - - 1 0 1610 2.0835159812122583e-03 -1 -2 1611 - 1.1513150529935956e-03 - - 5.7464218139648438e-01 3.5868450999259949e-01 - 5.3634738922119141e-01 - <_> - - 1 0 1612 3.6104708909988403e-02 -1 -2 1613 - 3.6256198654882610e-04 - - 2.8331369161605835e-01 5.4777222871780396e-01 - 4.1105321049690247e-01 - <_> - - 0 1 1614 -3.4635469783097506e-03 -1 -2 1615 - -2.8796829283237457e-03 - - 5.9903860092163086e-01 5.7252532243728638e-01 - 4.1495120525360107e-01 - <_> - - 1 0 1616 -8.1119500100612640e-03 -1 -2 1617 - 4.5932079665362835e-03 - - 5.3963518142700195e-01 5.3797042369842529e-01 - 3.8913029432296753e-01 - <_> - - 1 0 1618 7.0014740340411663e-03 -1 -2 1619 - 8.0169539432972670e-04 - - 3.7146711349487305e-01 5.5295670032501221e-01 - 3.7558048963546753e-01 - <_> - - 1 0 1620 -8.6652329191565514e-03 -1 -2 1621 - -2.7315050829201937e-03 - - 5.0257730484008789e-01 5.8503222465515137e-01 - 4.6175739169120789e-01 - <_> - - 1 0 1622 1.3301590224727988e-03 -1 -2 1623 - -4.2648240923881531e-03 - - 5.9377008676528931e-01 5.6453680992126465e-01 - 3.9376249909400940e-01 - <_> - - 0 1 1624 6.3251499086618423e-03 -1 -2 1625 - -3.0753740575164557e-03 - - 5.1821058988571167e-01 3.0074161291122437e-01 - 5.1964038610458374e-01 - <_> - - 0 1 1626 -7.3622138006612659e-04 -1 -2 1627 - 3.0082479497650638e-05 - - 3.6975800991058350e-01 4.3275931477546692e-01 - 5.7158088684082031e-01 - <_> - - 0 1 1628 -3.8722730241715908e-03 -1 -2 1629 - 6.2879058532416821e-04 - - 3.4737130999565125e-01 5.4382592439651489e-01 - 4.4539061188697815e-01 - <_> - - 1 0 1630 1.3411579420790076e-03 -1 -2 1631 - -8.3681922405958176e-03 - - 6.5117138624191284e-01 1.4432950317859650e-01 - 4.8881998658180237e-01 - <_> - - 1 0 1632 9.3305751215666533e-04 -1 -2 1633 - -1.0746510233730078e-03 - - 3.9511090517044067e-01 3.9102658629417419e-01 - 5.3495037555694580e-01 - <_> - - 0 1 1634 -1.8610050901770592e-02 -1 -2 1635 - 1.3651419430971146e-03 - - 1.2757439911365509e-01 5.0382888317108154e-01 - 6.9513040781021118e-01 - <_> - - 0 1 1636 7.3744421824812889e-03 -1 -2 1637 - 8.4163323044776917e-03 - - 5.2534431219100952e-01 5.0112438201904297e-01 - 7.3113328218460083e-01 - <_> - - 0 1 1638 5.1413988694548607e-03 -1 -2 1639 - 4.5847031287848949e-03 - - 4.9535360932350159e-01 2.5355559587478638e-01 - 6.4624428749084473e-01 - <_> - - 1 0 1640 2.8565239161252975e-02 -1 -2 1641 - 4.3958800961263478e-04 - - 2.3307220637798309e-01 4.7022441029548645e-01 - 5.5445492267608643e-01 - <_> - - 1 0 1642 3.1459458172321320e-02 -1 -2 1643 - 5.6011630222201347e-03 - - 3.3689688891172409e-02 4.7871211171150208e-01 - 6.3383519649505615e-01 - <_> - - 0 1 1644 7.1835669223219156e-04 -1 -2 1645 - -5.5303089320659637e-03 - - 5.4314869642257690e-01 4.1058328747749329e-01 - 5.4039907455444336e-01 - <_> - - 1 0 1646 1.4129279879853129e-03 -1 -2 1647 - 2.5530709535814822e-04 - - 3.1055399775505066e-01 4.2544719576835632e-01 - 5.4471540451049805e-01 - <_> - - 1 0 1648 3.1966410460881889e-04 -1 -2 1649 - 5.0411392003297806e-03 - - 6.1183619499206543e-01 5.2900421619415283e-01 - 4.2247870564460754e-01 - <_> - - 0 1 1650 7.7617880888283253e-03 -1 -2 1651 - 2.9374631121754646e-03 - - 4.3153458833694458e-01 6.6292631626129150e-01 - 3.0289649963378906e-01 - <_> - - 1 0 1652 -1.6497720498591661e-03 -1 -2 1653 - -5.8834417723119259e-03 - - 5.4918527603149414e-01 3.1885540485382080e-01 - 5.1842892169952393e-01 - <_> - - 1 0 1654 8.7459187489002943e-04 -1 -2 1655 - -1.5308779664337635e-02 - - 3.3288308978080750e-01 3.9236080646514893e-01 - 5.2351391315460205e-01 - <_> - - 1 0 1656 3.2292451709508896e-02 -1 -2 1657 - -4.3842519517056644e-04 - - 5.9776467084884644e-01 4.5416879653930664e-01 - 5.3694289922714233e-01 - <_> - - 1 0 1658 1.5429529594257474e-03 -1 -2 1659 - -2.4733028840273619e-03 - - 6.3181412220001221e-01 3.4906330704689026e-01 - 4.7590249776840210e-01 - <_> - - 1 0 1660 2.0994939841330051e-03 -1 -2 1661 - -5.7541108690202236e-03 - - 5.8871978521347046e-01 5.9613317251205444e-01 - 4.8419830203056335e-01 - <_> - - 0 1 1662 -1.0233130306005478e-02 -1 -2 1663 - 2.2554509341716766e-01 - - 1.7054040729999542e-01 4.7793799638748169e-01 - 9.7879663109779358e-02 - <_> - - 1 0 1664 2.9666559770703316e-02 -1 -2 1665 - -2.8518449980765581e-03 - - 5.8222240209579468e-01 5.4596269130706787e-01 - 4.6100661158561707e-01 - <_> - - 1 0 1666 9.7465328872203827e-04 -1 -2 1667 - 1.4044740055396687e-05 - - 3.6703228950500488e-01 4.3023860454559326e-01 - 5.6917107105255127e-01 - <_> - - 0 1 1668 -1.7579430714249611e-02 -1 -2 1669 - -5.2381679415702820e-02 - - 6.9173210859298706e-01 7.1100401878356934e-01 - 5.0601547956466675e-01 - <_> - - 0 1 1670 -1.1242110282182693e-02 -1 -2 1671 - -3.6728400737047195e-03 - - 8.7691891193389893e-01 6.5191918611526489e-01 - 4.5460689067840576e-01 - <_> - - 0 1 1672 3.5082760732620955e-03 -1 -2 1673 - 6.1679710634052753e-03 - - 5.3298658132553101e-01 5.2204591035842896e-01 - 2.9535189270973206e-01 - <_> - - 1 0 1674 -9.7009900491684675e-04 -1 -2 1675 - -1.0957010090351105e-02 - - 5.0486332178115845e-01 5.8373582363128662e-01 - 3.0200859904289246e-01 - <_> - - 0 1 1676 -8.3272513002157211e-03 -1 -2 1677 - 2.9798380637657829e-05 - - 3.1580638885498047e-01 4.3863898515701294e-01 - 5.4432111978530884e-01 - <_> - - 1 0 1678 2.8244039276614785e-04 -1 -2 1679 - -8.1364117795601487e-04 - - 5.6253957748413086e-01 5.2811980247497559e-01 - 3.4014078974723816e-01 - <_> - - 1 0 1680 1.8008040497079492e-03 -1 -2 1681 - -6.9944779388606548e-03 - - 3.4716591238975525e-01 4.4816970825195312e-01 - 5.3857702016830444e-01 - <_> - - 0 1 1682 4.5625398342963308e-05 -1 -2 1683 - -7.3189922841265798e-04 - - 4.4925129413604736e-01 4.1673120856285095e-01 - 6.0211020708084106e-01 - <_> - - 0 1 1684 -2.9980219551362097e-04 -1 -2 1685 - -2.9060940505587496e-05 - - 4.1484281420707703e-01 5.5920898914337158e-01 - 4.0732109546661377e-01 - <_> - - 0 1 1686 -5.9742690064013004e-04 -1 -2 1687 - 1.4831830048933625e-04 - - 6.0889142751693726e-01 5.2983051538467407e-01 - 3.7619501352310181e-01 - <_> - - 1 0 1688 -2.9441029764711857e-03 -1 -2 1689 - 1.3741210103034973e-01 - - 4.7160848975181580e-01 5.1013368368148804e-01 - 4.6746801584959030e-02 - <_> - - 0 1 1690 -8.8414177298545837e-02 -1 -2 1691 - 7.0610277354717255e-02 - - 1.1818689852952957e-01 5.1190632581710815e-01 - 7.7784419059753418e-01 - <_> - - 0 1 1692 -7.7188978902995586e-03 -1 -2 1693 - 1.5115399844944477e-02 - - 1.8741349875926971e-01 4.9800279736518860e-01 - 7.0058178901672363e-01 - <_> - - 0 1 1694 1.0671879863366485e-03 -1 -2 1695 - 7.0487911580130458e-04 - - 4.4822388887405396e-01 6.2657529115676880e-01 - 4.4026550650596619e-01 - <_> - 90 - 4.4251281738281250e+01 - - <_> - - 1 0 1696 -9.8690733313560486e-02 -1 -2 1697 - 6.2373418360948563e-02 - - 3.9994749426841736e-01 5.2477848529815674e-01 - 8.1935757398605347e-01 - <_> - - 0 1 1698 1.9496519817039371e-03 -1 -2 1699 - -8.9139147894456983e-04 - - 3.5298168659210205e-01 5.8527278900146484e-01 - 3.2459780573844910e-01 - <_> - - 0 1 1700 -5.5150408297777176e-04 -1 -2 1701 - -1.1721949558705091e-03 - - 3.8928169012069702e-01 4.3350520730018616e-01 - 6.5206241607666016e-01 - <_> - - 1 0 1702 -7.4480642797425389e-04 -1 -2 1703 - -2.6264840271323919e-03 - - 4.0411350131034851e-01 5.6249821186065674e-01 - 3.9675250649452209e-01 - <_> - - 0 1 1704 -3.9712688885629177e-04 -1 -2 1705 - 3.5984949208796024e-03 - - 3.8561120629310608e-01 5.9978890419006348e-01 - 4.2416140437126160e-01 - <_> - - 1 0 1706 5.3080618381500244e-03 -1 -2 1707 - 9.6319877775385976e-04 - - 6.6601687669754028e-01 4.4813790917396545e-01 - 5.5834877490997314e-01 - <_> - - 0 1 1708 5.0776469288393855e-04 -1 -2 1709 - 3.6223160568624735e-03 - - 3.5354590415954590e-01 3.4098070859909058e-01 - 5.4206877946853638e-01 - <_> - - 0 1 1710 -6.2061410397291183e-02 -1 -2 1711 - 6.4387189922854304e-04 - - 1.9340839982032776e-01 4.0836268663406372e-01 - 5.4902219772338867e-01 - <_> - - 1 0 1712 2.6239909231662750e-02 -1 -2 1713 - 8.1940297968685627e-04 - - 2.2857080399990082e-01 4.6486678719520569e-01 - 6.0173559188842773e-01 - <_> - - 1 0 1714 2.3833119485061616e-04 -1 -2 1715 - -1.5869759954512119e-03 - - 3.5980388522148132e-01 4.2596510052680969e-01 - 5.4764348268508911e-01 - <_> - - 0 1 1716 -6.7263417877256870e-03 -1 -2 1717 - 1.1006110347807407e-02 - - 6.5072381496429443e-01 5.1494097709655762e-01 - 3.3629849553108215e-01 - <_> - - 1 0 1718 7.1445819921791553e-03 -1 -2 1719 - -4.7233798541128635e-03 - - 2.6729300618171692e-01 5.6521821022033691e-01 - 4.2981448769569397e-01 - <_> - - 1 0 1720 9.8437406122684479e-03 -1 -2 1721 - 1.5124640412977897e-05 - - 1.1518859863281250e-01 4.3735980987548828e-01 - 5.6121289730072021e-01 - <_> - - 0 1 1722 3.9908871054649353e-02 -1 -2 1723 - 5.3903679363429546e-03 - - 5.2046489715576172e-01 4.8134678602218628e-01 - 6.3612091541290283e-01 - <_> - - 0 1 1724 -3.9908871054649353e-02 -1 -2 1725 - 5.3903679363429546e-03 - - 1.5068709850311279e-01 4.5816949009895325e-01 - 6.2002408504486084e-01 - <_> - - 1 0 1726 6.7005190066993237e-03 -1 -2 1727 - -1.2623789720237255e-02 - - 3.4322351217269897e-01 3.0882269144058228e-01 - 5.2267378568649292e-01 - <_> - - 1 0 1728 1.1806610040366650e-02 -1 -2 1729 - -3.4257229417562485e-03 - - 7.1879392862319946e-01 3.1208148598670959e-01 - 5.0658440589904785e-01 - <_> - - 0 1 1730 3.9385299896821380e-04 -1 -2 1731 - 3.4388188272714615e-02 - - 4.7545841336250305e-01 5.2616578340530396e-01 - 3.3501741290092468e-01 - <_> - - 0 1 1732 -7.5009986758232117e-02 -1 -2 1733 - 4.9022492021322250e-04 - - 1.7134809494018555e-01 4.7258019447326660e-01 - 5.9564691781997681e-01 - <_> - - 0 1 1734 -8.5525289177894592e-03 -1 -2 1735 - 1.3135520566720515e-04 - - 6.5582227706909180e-01 4.8354008793830872e-01 - 5.5869138240814209e-01 - <_> - - 1 0 1736 4.7948658466339111e-03 -1 -2 1737 - 2.0124691072851419e-03 - - 2.6457059383392334e-01 3.6579450964927673e-01 - 5.1247721910476685e-01 - <_> - - 0 1 1738 -1.1785479635000229e-01 -1 -2 1739 - 1.5575019642710686e-03 - - 2.3856540024280548e-01 5.4904741048812866e-01 - 4.2747479677200317e-01 - <_> - - 0 1 1740 -1.5573759563267231e-02 -1 -2 1741 - -2.1854790393263102e-03 - - 6.9389009475708008e-01 3.6459881067276001e-01 - 5.0925260782241821e-01 - <_> - - 0 1 1742 2.9272339306771755e-03 -1 -2 1743 - 6.4663668163120747e-03 - - 4.6858081221580505e-01 4.9734100699424744e-01 - 7.7260971069335938e-01 - <_> - - 0 1 1744 -7.6140360906720161e-03 -1 -2 1745 - 4.1512572206556797e-03 - - 6.8774658441543579e-01 4.7885251045227051e-01 - 6.9216579198837280e-01 - <_> - - 0 1 1746 2.7711640577763319e-03 -1 -2 1747 - -1.2836109846830368e-02 - - 5.4818397760391235e-01 3.8001629710197449e-01 - 5.2044928073883057e-01 - <_> - - 0 1 1748 -2.4380050599575043e-03 -1 -2 1749 - 2.1713329479098320e-03 - - 2.5824350118637085e-01 4.9611631035804749e-01 - 3.2152029871940613e-01 - <_> - - 1 0 1750 6.2800728483125567e-04 -1 -2 1751 - -9.7982389852404594e-03 - - 5.4604238271713257e-01 6.0465437173843384e-01 - 4.9399220943450928e-01 - <_> - - 1 0 1752 7.3543828912079334e-03 -1 -2 1753 - -1.4665040187537670e-02 - - 5.2910941839218140e-01 5.4461228847503662e-01 - 3.5673621296882629e-01 - <_> - - 0 1 1754 3.0244510620832443e-02 -1 -2 1755 - -5.6660208851099014e-02 - - 5.5183291435241699e-01 6.9309788942337036e-01 - 5.0933879613876343e-01 - <_> - - 0 1 1756 -5.6967479176819324e-03 -1 -2 1757 - 3.0806770548224449e-02 - - 3.2015261054039001e-01 4.9892461299896240e-01 - 2.2770540416240692e-01 - <_> - - 0 1 1758 2.2748769260942936e-03 -1 -2 1759 - 2.0436900667846203e-03 - - 4.8109310865402222e-01 5.2838671207427979e-01 - 3.2559248805046082e-01 - <_> - - 0 1 1760 -8.6277956143021584e-03 -1 -2 1761 - 6.5113382879644632e-04 - - 6.2665361166000366e-01 5.0971370935440063e-01 - 3.1919100880622864e-01 - <_> - - 0 1 1762 8.8188261725008488e-04 -1 -2 1763 - -1.4594909735023975e-02 - - 4.5495858788490295e-01 2.6450389623641968e-01 - 5.1538681983947754e-01 - <_> - - 0 1 1764 -1.2304580304771662e-03 -1 -2 1765 - -2.1867299801670015e-04 - - 6.1975848674774170e-01 5.4691988229751587e-01 - 4.2068558931350708e-01 - <_> - - 0 1 1766 -1.0909959673881531e-03 -1 -2 1767 - 3.5210378700867295e-04 - - 4.1407600045204163e-01 5.4766088724136353e-01 - 4.1550210118293762e-01 - <_> - - 0 1 1768 -7.2563779540359974e-03 -1 -2 1769 - 1.4701850013807416e-03 - - 7.1604692935943604e-01 5.2408081293106079e-01 - 3.7296628952026367e-01 - <_> - - 0 1 1770 1.1472719779703766e-04 -1 -2 1771 - 3.0506469774991274e-03 - - 4.0337988734245300e-01 5.2639859914779663e-01 - 3.5600930452346802e-01 - <_> - - 0 1 1772 2.6269949739798903e-04 -1 -2 1773 - -3.6365550477057695e-03 - - 4.5697999000549316e-01 3.0425709486007690e-01 - 5.8682537078857422e-01 - <_> - - 1 0 1774 -8.4893293678760529e-03 -1 -2 1775 - 5.8107408694922924e-03 - - 4.9141570925712585e-01 4.9185299873352051e-01 - 6.2669628858566284e-01 - <_> - - 1 0 1776 7.5583951547741890e-04 -1 -2 1777 - -2.2017690353095531e-03 - - 5.6332361698150635e-01 5.5539160966873169e-01 - 3.8276460766792297e-01 - <_> - - 0 1 1778 2.7908938936889172e-03 -1 -2 1779 - -1.8228569533675909e-03 - - 5.4986977577209473e-01 4.3822830915451050e-01 - 5.4240328073501587e-01 - <_> - - 0 1 1780 -7.2495508939027786e-03 -1 -2 1781 - -6.8744522286579013e-04 - - 2.8881219029426575e-01 3.4726551175117493e-01 - 5.0763708353042603e-01 - <_> - - 0 1 1782 2.5174440816044807e-03 -1 -2 1783 - -1.0151379741728306e-02 - - 4.6612051129341125e-01 3.7447750568389893e-01 - 5.2940011024475098e-01 - <_> - - 1 0 1784 -4.1399952024221420e-03 -1 -2 1785 - -4.7078551724553108e-03 - - 4.6604850888252258e-01 4.1750618815422058e-01 - 6.9163060188293457e-01 - <_> - - 1 0 1786 4.1981041431427002e-02 -1 -2 1787 - -1.4272999949753284e-02 - - 2.0182150602340698e-01 7.5111979246139526e-01 - 5.0320839881896973e-01 - <_> - - 1 0 1788 4.0869521908462048e-03 -1 -2 1789 - 1.7606799956411123e-03 - - 2.5045138597488403e-01 3.3014011383056641e-01 - 5.2183371782302856e-01 - <_> - - 0 1 1790 1.2550549581646919e-04 -1 -2 1791 - -2.9503209516406059e-03 - - 4.6144428849220276e-01 4.6199500560760498e-01 - 5.2470302581787109e-01 - <_> - - 0 1 1792 -1.1312420247122645e-03 -1 -2 1793 - -1.6983180539682508e-03 - - 6.3143682479858398e-01 3.4013068675994873e-01 - 5.0555270910263062e-01 - <_> - - 1 0 1794 -1.1457820422947407e-02 -1 -2 1795 - -8.4962565451860428e-03 - - 4.9399960041046143e-01 2.9654508829116821e-01 - 5.1943677663803101e-01 - <_> - - 1 0 1796 1.1919089592993259e-02 -1 -2 1797 - 6.4416420646011829e-03 - - 7.8869980573654175e-01 5.1069867610931396e-01 - 2.9671460390090942e-01 - <_> - - 0 1 1798 -8.7857811013236642e-04 -1 -2 1799 - -2.0312711130827665e-03 - - 5.7143712043762207e-01 4.4812008738517761e-01 - 5.3849118947982788e-01 - <_> - - 0 1 1800 -1.5262430533766747e-03 -1 -2 1801 - 4.2860880494117737e-03 - - 6.1935687065124512e-01 4.3398851156234741e-01 - 7.6972991228103638e-01 - <_> - - 1 0 1802 3.5010920837521553e-03 -1 -2 1803 - 1.2587670236825943e-02 - - 3.1713891029357910e-01 5.2466988563537598e-01 - 4.2412081360816956e-01 - <_> - - 0 1 1804 2.6207490009255707e-04 -1 -2 1805 - 4.4701730075757951e-05 - - 4.2318999767303467e-01 4.1741389036178589e-01 - 5.9196037054061890e-01 - <_> - - 0 1 1806 7.8084698179736733e-04 -1 -2 1807 - 8.8851212058216333e-04 - - 4.2773890495300293e-01 3.7201610207557678e-01 - 5.2268189191818237e-01 - <_> - - 0 1 1808 2.3369069676846266e-03 -1 -2 1809 - 1.6688359901309013e-03 - - 5.4780668020248413e-01 3.6286789178848267e-01 - 6.1500048637390137e-01 - <_> - - 0 1 1810 3.0844469438306987e-04 -1 -2 1811 - 3.4617560449987650e-03 - - 4.7470751404762268e-01 4.5801380276679993e-01 - 5.5856817960739136e-01 - <_> - - 0 1 1812 1.8961310386657715e-02 -1 -2 1813 - 1.7347310483455658e-01 - - 5.2988010644912720e-01 3.6983850598335266e-01 - 8.4986197948455811e-01 - <_> - - 1 0 1814 2.0020549709443003e-04 -1 -2 1815 - 1.0967060225084424e-03 - - 5.5656617879867554e-01 4.7957131266593933e-01 - 6.2862598896026611e-01 - <_> - - 0 1 1816 1.5107099898159504e-04 -1 -2 1817 - -3.4463501069694757e-03 - - 4.0524059534072876e-01 6.1730152368545532e-01 - 4.4142639636993408e-01 - <_> - - 1 0 1818 8.5176620632410049e-03 -1 -2 1819 - -3.5812109708786011e-02 - - 3.5705709457397461e-01 3.1513288617134094e-01 - 5.2527028322219849e-01 - <_> - - 0 1 1820 -2.1155400201678276e-02 -1 -2 1821 - 8.9890940580517054e-04 - - 6.1247211694717407e-01 5.1699757575988770e-01 - 3.5962718725204468e-01 - <_> - - 1 0 1822 -1.5613760333508253e-03 -1 -2 1823 - 6.7120860330760479e-04 - - 4.9149879813194275e-01 4.5462110638618469e-01 - 5.3958117961883545e-01 - <_> - - 0 1 1824 -2.1597029641270638e-02 -1 -2 1825 - -2.4947229772806168e-02 - - 1.9031339883804321e-01 6.9740772247314453e-01 - 4.9677160382270813e-01 - <_> - - 0 1 1826 1.8725979607552290e-03 -1 -2 1827 - 6.3912719488143921e-03 - - 4.7489479184150696e-01 5.1801782846450806e-01 - 2.9243218898773193e-01 - <_> - - 0 1 1828 -9.1552399098873138e-03 -1 -2 1829 - 2.1715660113841295e-03 - - 7.6658701896667480e-01 5.2155512571334839e-01 - 3.3657190203666687e-01 - <_> - - 1 0 1830 1.2330369791015983e-03 -1 -2 1831 - -4.0785901364870369e-04 - - 6.2609577178955078e-01 4.5335099101066589e-01 - 5.3864890336990356e-01 - <_> - - 0 1 1832 4.6437609125860035e-04 -1 -2 1833 - -1.1600199650274590e-04 - - 4.1034960746765137e-01 5.8303910493850708e-01 - 4.3041059374809265e-01 - <_> - - 0 1 1834 -1.2718720361590385e-02 -1 -2 1835 - 8.9431880041956902e-05 - - 2.1325829625129700e-01 4.8728910088539124e-01 - 5.4589152336120605e-01 - <_> - - 0 1 1836 -3.3913689549081028e-04 -1 -2 1837 - -1.8026340752840042e-02 - - 3.9743649959564209e-01 7.5685507059097290e-01 - 5.0456118583679199e-01 - <_> - - 1 0 1838 6.9179181009531021e-03 -1 -2 1839 - -1.1839679791592062e-04 - - 3.9662998914718628e-01 4.1980829834938049e-01 - 5.4358041286468506e-01 - <_> - - 0 1 1840 -3.9474181830883026e-03 -1 -2 1841 - 6.0050919273635373e-05 - - 6.3694578409194946e-01 5.2695667743682861e-01 - 3.8122430443763733e-01 - <_> - - 1 0 1842 9.1423643752932549e-03 -1 -2 1843 - 2.1305440168362111e-04 - - 4.1567629575729370e-01 3.5235330462455750e-01 - 5.3494542837142944e-01 - <_> - - 1 0 1844 -2.0855850016232580e-04 -1 -2 1845 - 1.3130389852449298e-03 - - 4.4033220410346985e-01 6.0581612586975098e-01 - 4.4682189822196960e-01 - <_> - - 1 0 1846 -2.9134768992662430e-03 -1 -2 1847 - 2.9645769391208887e-03 - - 4.8257058858871460e-01 4.8359981179237366e-01 - 6.0392779111862183e-01 - <_> - - 1 0 1848 1.7772549763321877e-03 -1 -2 1849 - -7.7136349864304066e-03 - - 6.8718272447586060e-01 2.8422209620475769e-01 - 5.1454281806945801e-01 - <_> - - 1 0 1850 5.1027478184551001e-04 -1 -2 1851 - 1.7460630042478442e-03 - - 6.0244262218475342e-01 4.7566100955009460e-01 - 5.7211542129516602e-01 - <_> - - 1 0 1852 3.8068278809078038e-04 -1 -2 1853 - 2.8228890150785446e-03 - - 4.9310690164566040e-01 3.3116981387138367e-01 - 6.2275981903076172e-01 - <_> - - 1 0 1854 -5.3000478073954582e-03 -1 -2 1855 - 4.4951299059903249e-05 - - 5.2320927381515503e-01 3.9952319860458374e-01 - 5.3147977590560913e-01 - <_> - - 0 1 1856 3.2752458937466145e-03 -1 -2 1857 - -2.8162579983472824e-03 - - 4.4816198945045471e-01 3.9079719781875610e-01 - 6.6716408729553223e-01 - <_> - - 0 1 1858 1.4112279750406742e-03 -1 -2 1859 - 8.3062034100294113e-03 - - 5.3570109605789185e-01 4.7709658741950989e-01 - 5.5700999498367310e-01 - <_> - - 0 1 1860 2.2164839319884777e-03 -1 -2 1861 - -4.9868631176650524e-03 - - 4.9471241235733032e-01 5.2413070201873779e-01 - 2.5126549601554871e-01 - <_> - - 1 0 1862 -3.6664260551333427e-03 -1 -2 1863 - -1.0581229813396931e-02 - - 4.6195539832115173e-01 6.3017189502716064e-01 - 4.9730318784713745e-01 - <_> - - 1 0 1864 7.3366491124033928e-03 -1 -2 1865 - -3.9318940252996981e-04 - - 2.8709700703620911e-01 4.2528051137924194e-01 - 5.5792468786239624e-01 - <_> - - 0 1 1866 -8.1375334411859512e-03 -1 -2 1867 - 2.4809150490909815e-03 - - 5.7473158836364746e-01 5.2033740282058716e-01 - 3.9035668969154358e-01 - <_> - - 1 0 1868 8.8749779388308525e-04 -1 -2 1869 - -4.2194919660687447e-04 - - 5.5343210697174072e-01 5.3380441665649414e-01 - 3.9258408546447754e-01 - <_> - - 0 1 1870 -7.9790111631155014e-03 -1 -2 1871 - 1.1439629597589374e-03 - - 4.1443160176277161e-01 4.7013729810714722e-01 - 5.2817362546920776e-01 - <_> - - 1 0 1872 7.5542130507528782e-03 -1 -2 1873 - 1.0288399644196033e-03 - - 2.5272560119628906e-01 5.6051462888717651e-01 - 4.2978560924530029e-01 - <_> - - 1 0 1874 -1.7234670231118798e-03 -1 -2 1875 - 5.7586699724197388e-01 - - 4.8396828770637512e-01 5.1105028390884399e-01 - 8.0489329993724823e-02 - <_> - 109 - 5.3755569458007812e+01 - - <_> - - 0 1 1876 6.6640521399676800e-03 -1 -2 1877 - 8.9905522763729095e-03 - - 3.8289201259613037e-01 4.8584291338920593e-01 - 7.3549592494964600e-01 - <_> - - 1 0 1878 5.7154200039803982e-03 -1 -2 1879 - 1.1257929727435112e-03 - - 6.7232239246368408e-01 4.4295778870582581e-01 - 6.0707777738571167e-01 - <_> - - 1 0 1880 -9.1789010912179947e-04 -1 -2 1881 - -1.0492859873920679e-03 - - 3.0763450264930725e-01 5.5936437845230103e-01 - 3.6510229110717773e-01 - <_> - - 0 1 1882 3.5453929740469903e-05 -1 -2 1883 - 2.9015709878876805e-04 - - 4.2779681086540222e-01 4.5835450291633606e-01 - 5.2846831083297729e-01 - <_> - - 1 0 1884 1.6071660502348095e-04 -1 -2 1885 - -5.2961107576265931e-04 - - 3.7981921434402466e-01 3.8504371047019958e-01 - 5.9396880865097046e-01 - <_> - - 0 1 1886 2.6682569296099246e-04 -1 -2 1887 - -1.3492540165316314e-04 - - 4.1230249404907227e-01 5.7605999708175659e-01 - 4.2376458644866943e-01 - <_> - - 0 1 1888 -1.0841679759323597e-02 -1 -2 1889 - 1.2077829800546169e-02 - - 3.9299210906028748e-01 5.7619231939315796e-01 - 2.7804449200630188e-01 - <_> - - 0 1 1890 2.2128869313746691e-03 -1 -2 1891 - -1.5266190283000469e-02 - - 4.7945070266723633e-01 7.4055880308151245e-02 - 5.1535779237747192e-01 - <_> - - 1 0 1892 6.7929533543065190e-05 -1 -2 1893 - 1.7633590323384851e-04 - - 5.8587378263473511e-01 3.5676109790802002e-01 - 5.5989629030227661e-01 - <_> - - 1 0 1894 8.1311381654813886e-04 -1 -2 1895 - 3.2630451023578644e-03 - - 5.3468507528305054e-01 4.7825369238853455e-01 - 5.4567539691925049e-01 - <_> - - 0 1 1896 -3.9503918960690498e-03 -1 -2 1897 - -3.9864578866399825e-04 - - 2.8318119049072266e-01 5.4852157831192017e-01 - 4.1596978902816772e-01 - <_> - - 0 1 1898 -1.1432520113885403e-02 -1 -2 1899 - 5.3339172154664993e-03 - - 5.6391012668609619e-01 4.5969840884208679e-01 - 5.9312427043914795e-01 - <_> - - 1 0 1900 8.3193257451057434e-03 -1 -2 1901 - -4.2479918920435011e-04 - - 3.2306200265884399e-01 3.7952938675880432e-01 - 5.4086112976074219e-01 - <_> - - 0 1 1902 -1.1189430207014084e-01 -1 -2 1903 - -7.5553781352937222e-03 - - 1.1322979629039764e-01 6.3393700122833252e-01 - 4.8387709259986877e-01 - <_> - - 0 1 1904 -7.0337029173970222e-03 -1 -2 1905 - -1.4833680354058743e-02 - - 5.6652551889419556e-01 6.7514181137084961e-01 - 4.1409450769424438e-01 - <_> - - 1 0 1906 8.7506724521517754e-03 -1 -2 1907 - 1.6645010327920318e-03 - - 3.5612589120864868e-01 5.3472799062728882e-01 - 3.6497798562049866e-01 - <_> - - 1 0 1908 9.4900820404291153e-03 -1 -2 1909 - 1.1133110383525491e-03 - - 2.7546560764312744e-01 4.2259928584098816e-01 - 5.6291788816452026e-01 - <_> - - 0 1 1910 9.4940755516290665e-03 -1 -2 1911 - -1.5396620146930218e-03 - - 4.9060368537902832e-01 4.0070518851280212e-01 - 5.3807091712951660e-01 - <_> - - 1 0 1912 1.3434959948062897e-01 -1 -2 1913 - -9.4940755516290665e-03 - - 2.2146719694137573e-01 7.3531562089920044e-01 - 5.0050330162048340e-01 - <_> - - 1 0 1914 2.0011790096759796e-02 -1 -2 1915 - -1.8875009845942259e-03 - - 3.3279061317443848e-01 3.9152890443801880e-01 - 5.4018497467041016e-01 - <_> - - 1 0 1916 7.1842782199382782e-03 -1 -2 1917 - 1.6976969782263041e-03 - - 7.1766048669815063e-01 4.5269781351089478e-01 - 6.0769128799438477e-01 - <_> - - 1 0 1918 4.9219978973269463e-03 -1 -2 1919 - 1.1803199537098408e-02 - - 2.5698339939117432e-01 4.9996379017829895e-01 - 5.9582281112670898e-01 - <_> - - 0 1 1920 -9.7703449428081512e-03 -1 -2 1921 - 2.1174899302423000e-03 - - 3.4590938687324524e-01 4.5151269435882568e-01 - 5.8297157287597656e-01 - <_> - - 0 1 1922 9.4801411032676697e-03 -1 -2 1923 - -2.6078789960592985e-03 - - 4.8073920607566833e-01 3.4622168540954590e-01 - 5.2015948295593262e-01 - <_> - - 0 1 1924 -5.7252747938036919e-03 -1 -2 1925 - -8.2325618714094162e-03 - - 6.5998530387878418e-01 2.8218281269073486e-01 - 5.1252847909927368e-01 - <_> - - 0 1 1926 8.9571950957179070e-04 -1 -2 1927 - -1.5021569561213255e-04 - - 4.8838189244270325e-01 4.8299181461334229e-01 - 5.4287171363830566e-01 - <_> - - 0 1 1928 4.8489659093320370e-04 -1 -2 1929 - -9.6192650496959686e-02 - - 4.4345989823341370e-01 2.2566360235214233e-01 - 5.9562277793884277e-01 - <_> - - 0 1 1930 -1.1053519556298852e-03 -1 -2 1931 - -1.0215040296316147e-01 - - 4.5272240042686462e-01 2.8443491458892822e-01 - 5.1864528656005859e-01 - <_> - - 1 0 1932 3.0147889629006386e-03 -1 -2 1933 - 7.6131648384034634e-03 - - 3.8089990615844727e-01 5.7186990976333618e-01 - 4.2625638842582703e-01 - <_> - - 1 0 1934 1.5197630273178220e-03 -1 -2 1935 - -1.4197279699146748e-02 - - 5.9427189826965332e-01 7.7311038970947266e-01 - 4.9976539611816406e-01 - <_> - - 0 1 1936 -1.3818879611790180e-02 -1 -2 1937 - -5.0701329018920660e-04 - - 6.6811382770538330e-01 3.3056080341339111e-01 - 4.7499749064445496e-01 - <_> - - 0 1 1938 -9.3537531793117523e-03 -1 -2 1939 - -9.4771059229969978e-03 - - 2.8609329462051392e-01 6.1888831853866577e-01 - 4.8421001434326172e-01 - <_> - - 1 0 1940 1.6923650400713086e-03 -1 -2 1941 - 5.8652542065829039e-04 - - 6.0702490806579590e-01 3.7826898694038391e-01 - 5.3681969642639160e-01 - <_> - - 0 1 1942 -2.5826620403677225e-03 -1 -2 1943 - -2.7307639829814434e-03 - - 3.6902099847793579e-01 3.8571149110794067e-01 - 5.3181087970733643e-01 - <_> - - 1 0 1944 2.1871570497751236e-02 -1 -2 1945 - -1.5010299648565706e-05 - - 2.3270089924335480e-01 5.5607229471206665e-01 - 4.3014100193977356e-01 - <_> - - 1 0 1946 5.3583700209856033e-03 -1 -2 1947 - 5.0057549960911274e-03 - - 6.7676377296447754e-01 5.1949042081832886e-01 - 3.6128538846969604e-01 - <_> - - 0 1 1948 -1.9030070398002863e-03 -1 -2 1949 - -7.8506693243980408e-03 - - 3.2378450036048889e-01 1.1948519945144653e-01 - 4.9917238950729370e-01 - <_> - - 1 0 1950 -2.7093670796602964e-03 -1 -2 1951 - 1.4138079714030027e-03 - - 4.8549601435661316e-01 4.8723229765892029e-01 - 5.9035778045654297e-01 - <_> - - 1 0 1952 9.0300198644399643e-03 -1 -2 1953 - -9.7925681620836258e-04 - - 6.5473157167434692e-01 5.8492732048034668e-01 - 4.5542308688163757e-01 - <_> - - 1 0 1954 1.3984439428895712e-03 -1 -2 1955 - 8.3372107474133372e-04 - - 4.0646260976791382e-01 5.3995430469512939e-01 - 4.1528099775314331e-01 - <_> - - 1 0 1956 1.0551059618592262e-02 -1 -2 1957 - 8.8344102550763637e-05 - - 1.7966809868812561e-01 4.2518630623817444e-01 - 5.4135227203369141e-01 - <_> - - 1 0 1958 -4.1022308170795441e-02 -1 -2 1959 - 7.5065628625452518e-03 - - 5.2281248569488525e-01 4.8537430167198181e-01 - 6.0934442281723022e-01 - <_> - - 1 0 1960 4.1022308170795441e-02 -1 -2 1961 - -5.3961377125233412e-04 - - 2.2050240635871887e-01 5.6927317380905151e-01 - 4.4687569141387939e-01 - <_> - - 0 1 1962 -6.8696036934852600e-02 -1 -2 1963 - -1.8447940237820148e-03 - - 1.4833140373229980e-01 6.2112838029861450e-01 - 4.9666011333465576e-01 - <_> - - 0 1 1964 -6.0959919355809689e-03 -1 -2 1965 - -4.2068301700055599e-03 - - 2.2946719825267792e-01 6.4070910215377808e-01 - 4.7485628724098206e-01 - <_> - - 1 0 1966 -7.1332789957523346e-04 -1 -2 1967 - 1.1756779998540878e-01 - - 5.3549361228942871e-01 5.1369780302047729e-01 - 1.0595739819109440e-02 - <_> - - 0 1 1968 5.9354289987822995e-05 -1 -2 1969 - -6.3173691742122173e-03 - - 3.7118038535118103e-01 1.7120739817619324e-01 - 5.0617581605911255e-01 - <_> - - 1 0 1970 1.4941499568521976e-02 -1 -2 1971 - -2.0789399277418852e-03 - - 6.7291188240051270e-01 4.4106459617614746e-01 - 5.4440277814865112e-01 - <_> - - 0 1 1972 -7.0736219640821218e-04 -1 -2 1973 - -3.1247111037373543e-03 - - 5.5689108371734619e-01 5.0238692760467529e-01 - 3.5624051094055176e-01 - <_> - - 1 0 1974 -7.8919378574937582e-04 -1 -2 1975 - 1.0179580189287663e-02 - - 5.4567861557006836e-01 5.5451387166976929e-01 - 4.6223109960556030e-01 - <_> - - 1 0 1976 -2.7506109327077866e-03 -1 -2 1977 - 1.0601329617202282e-02 - - 4.9425360560417175e-01 2.9612338542938232e-01 - 5.9643387794494629e-01 - <_> - - 0 1 1978 5.1466780714690685e-03 -1 -2 1979 - 7.6321147382259369e-02 - - 5.4952287673950195e-01 5.1739591360092163e-01 - 2.9402169585227966e-01 - <_> - - 0 1 1980 -1.5027689514681697e-03 -1 -2 1981 - 1.2266670353710651e-02 - - 3.1062999367713928e-01 4.6511501073837280e-01 - 6.8466138839721680e-01 - <_> - - 1 0 1982 -3.1118579208850861e-02 -1 -2 1983 - 2.8905589133501053e-02 - - 5.2260571718215942e-01 5.1822441816329956e-01 - 2.7054280042648315e-01 - <_> - - 1 0 1984 4.7598380595445633e-02 -1 -2 1985 - 3.0808549374341965e-02 - - 1.1095120012760162e-01 4.9386250972747803e-01 - 1.4041109383106232e-01 - <_> - - 1 0 1986 -2.1277810446918011e-04 -1 -2 1987 - 7.8969962894916534e-02 - - 4.3923568725585938e-01 5.2165520191192627e-01 - 2.2941139340400696e-01 - <_> - - 0 1 1988 -1.0257950052618980e-02 -1 -2 1989 - 1.2604889925569296e-03 - - 6.1766529083251953e-01 5.2362227439880371e-01 - 3.3289659023284912e-01 - <_> - - 1 0 1990 -3.3490460366010666e-02 -1 -2 1991 - -5.9202767442911863e-04 - - 4.8661869764328003e-01 4.1164070367813110e-01 - 5.3956401348114014e-01 - <_> - - 1 0 1992 3.0320750738610514e-05 -1 -2 1993 - -5.4369680583477020e-04 - - 5.6107360124588013e-01 5.6213891506195068e-01 - 3.4612038731575012e-01 - <_> - - 1 0 1994 -3.3490460366010666e-02 -1 -2 1995 - -5.9202767442911863e-04 - - 4.8967620730400085e-01 4.3054041266441345e-01 - 5.3407138586044312e-01 - <_> - - 0 1 1996 2.0550889894366264e-03 -1 -2 1997 - -4.4353571720421314e-03 - - 5.5449998378753662e-01 6.0385400056838989e-01 - 3.7465929985046387e-01 - <_> - - 1 0 1998 -8.4170423448085785e-02 -1 -2 1999 - 6.7419027909636497e-03 - - 5.0073480606079102e-01 5.2980971336364746e-01 - 4.7161450982093811e-01 - <_> - - 1 0 2000 1.0278150439262390e-02 -1 -2 2001 - 5.8800862170755863e-03 - - 6.2693750858306885e-01 5.1548278331756592e-01 - 3.8130408525466919e-01 - <_> - - 1 0 2002 -6.9679190346505493e-06 -1 -2 2003 - 8.2419527461752295e-04 - - 4.4402399659156799e-01 4.6975341439247131e-01 - 5.4855042695999146e-01 - <_> - - 0 1 2004 -5.5268318392336369e-03 -1 -2 2005 - 9.6128671430051327e-04 - - 5.5136048793792725e-01 3.6186391115188599e-01 - 5.8384567499160767e-01 - <_> - - 1 0 2006 2.4810510221868753e-03 -1 -2 2007 - -1.0480589699000120e-03 - - 2.5232228636741638e-01 4.1172578930854797e-01 - 5.3929960727691650e-01 - <_> - - 0 1 2008 -6.1287907883524895e-03 -1 -2 2009 - 1.1682329932227731e-04 - - 6.7263299226760864e-01 5.0411927700042725e-01 - 3.6077290773391724e-01 - <_> - - 0 1 2010 -3.9909478276968002e-02 -1 -2 2011 - 1.5859459526836872e-03 - - 1.5637390315532684e-01 4.8919808864593506e-01 - 5.7798451185226440e-01 - <_> - - 0 1 2012 -2.2690229117870331e-02 -1 -2 2013 - 2.0916070789098740e-03 - - 2.1868790686130524e-01 4.7715771198272705e-01 - 6.0992312431335449e-01 - <_> - - 0 1 2014 -2.4715419858694077e-02 -1 -2 2015 - -1.3419450260698795e-02 - - 3.4639969468116760e-01 3.6306929588317871e-01 - 5.2521961927413940e-01 - <_> - - 0 1 2016 -6.0629472136497498e-03 -1 -2 2017 - -2.0921030081808567e-03 - - 6.6663217544555664e-01 3.3995470404624939e-01 - 5.0356978178024292e-01 - <_> - - 0 1 2018 2.5961859151721001e-02 -1 -2 2019 - 1.7908669542521238e-04 - - 5.0368028879165649e-01 5.4185307025909424e-01 - 4.3189769983291626e-01 - <_> - - 0 1 2020 -3.1546850223094225e-03 -1 -2 2021 - -1.1397759662941098e-03 - - 7.2210252285003662e-01 3.3209729194641113e-01 - 5.0244337320327759e-01 - <_> - - 0 1 2022 -4.7840211540460587e-02 -1 -2 2023 - 4.1577088995836675e-04 - - 1.9387650489807129e-01 4.8021888732910156e-01 - 5.7307147979736328e-01 - <_> - - 0 1 2024 -4.4247039477340877e-04 -1 -2 2025 - 1.4479350065812469e-03 - - 4.2625150084495544e-01 5.7191711664199829e-01 - 4.0641531348228455e-01 - <_> - - 0 1 2026 1.5701510012149811e-02 -1 -2 2027 - 2.7805729769170284e-04 - - 4.9957260489463806e-01 5.2892869710922241e-01 - 4.5817288756370544e-01 - <_> - - 0 1 2028 -2.9010509606450796e-03 -1 -2 2029 - 2.0830519497394562e-04 - - 6.0121482610702515e-01 5.0579768419265747e-01 - 3.5994321107864380e-01 - <_> - - 1 0 2030 -5.1530029624700546e-02 -1 -2 2031 - 1.7163449956569821e-04 - - 4.9917969107627869e-01 4.6754699945449829e-01 - 5.3747731447219849e-01 - <_> - - 1 0 2032 2.3614279925823212e-02 -1 -2 2033 - -5.6427798699587584e-04 - - 6.5864789485931396e-01 3.8532960414886475e-01 - 5.1960402727127075e-01 - <_> - - 1 0 2034 6.6903959959745407e-03 -1 -2 2035 - -4.8789530992507935e-03 - - 6.0042357444763184e-01 3.2932278513908386e-01 - 5.2452367544174194e-01 - <_> - - 0 1 2036 -6.8537332117557526e-03 -1 -2 2037 - 9.9893810693174601e-04 - - 2.5659140944480896e-01 4.6154940128326416e-01 - 5.9424322843551636e-01 - <_> - - 0 1 2038 -1.3354700058698654e-04 -1 -2 2039 - 1.0165109997615218e-03 - - 5.4873758554458618e-01 4.5783591270446777e-01 - 5.4269278049468994e-01 - <_> - - 1 0 2040 9.1216771397739649e-04 -1 -2 2041 - 1.0080259526148438e-03 - - 3.9394611120223999e-01 4.0497899055480957e-01 - 5.5207037925720215e-01 - <_> - - 1 0 2042 -1.3102490629535168e-04 -1 -2 2043 - 5.5228749988600612e-04 - - 4.8790889978408813e-01 4.8449438810348511e-01 - 5.5128258466720581e-01 - <_> - - 1 0 2044 -1.2130969844292849e-04 -1 -2 2045 - -1.5112989785848185e-05 - - 4.3679711222648621e-01 6.4259552955627441e-01 - 4.8818269371986389e-01 - <_> - - 1 0 2046 -4.0125829400494695e-04 -1 -2 2047 - -6.5766851184889674e-04 - - 5.3720992803573608e-01 5.8345532417297363e-01 - 4.8690780997276306e-01 - <_> - - 1 0 2048 6.2220421386882663e-04 -1 -2 2049 - 1.4663359615951777e-03 - - 3.8246369361877441e-01 4.8134881258010864e-01 - 6.9667392969131470e-01 - <_> - - 0 1 2050 -4.9547709524631500e-02 -1 -2 2051 - 1.3017569435760379e-03 - - 5.3927659988403320e-02 5.3374558687210083e-01 - 4.1607481241226196e-01 - <_> - - 0 1 2052 -4.4914530590176582e-03 -1 -2 2053 - 1.6592369647696614e-03 - - 5.9974372386932373e-01 3.7271851301193237e-01 - 5.1156342029571533e-01 - <_> - - 0 1 2054 6.4695458859205246e-03 -1 -2 2055 - 4.9810269847512245e-03 - - 5.2520352602005005e-01 5.2567178010940552e-01 - 3.9344060420989990e-01 - <_> - - 0 1 2056 -3.8536980748176575e-02 -1 -2 2057 - -2.8275650739669800e-01 - - 2.0619249343872070e-01 6.1883211135864258e-02 - 4.9250578880310059e-01 - <_> - - 0 1 2058 -9.0301828458905220e-03 -1 -2 2059 - -4.3866269290447235e-02 - - 3.1575900316238403e-01 2.0336820185184479e-01 - 5.1647698879241943e-01 - <_> - - 0 1 2060 -4.5701069757342339e-03 -1 -2 2061 - -2.3362410720437765e-03 - - 6.6111832857131958e-01 2.8077891469001770e-01 - 4.9628761410713196e-01 - <_> - - 0 1 2062 5.3960331715643406e-03 -1 -2 2063 - -2.6297608856111765e-03 - - 5.1463878154754639e-01 6.2844878435134888e-01 - 4.9555888772010803e-01 - <_> - - 0 1 2064 -3.8577478844672441e-03 -1 -2 2065 - 1.3963800156489015e-03 - - 1.4867480099201202e-01 4.7013381123542786e-01 - 6.3209718465805054e-01 - <_> - - 1 0 2066 -8.8699469342827797e-03 -1 -2 2067 - -7.0626288652420044e-04 - - 5.2868181467056274e-01 4.6483701467514038e-01 - 5.3332102298736572e-01 - <_> - - 0 1 2068 4.2645810171961784e-03 -1 -2 2069 - 6.1572100967168808e-02 - - 5.0848782062530518e-01 3.6296251416206360e-01 - 8.7571567296981812e-01 - <_> - - 1 0 2070 -4.5381980016827583e-03 -1 -2 2071 - -4.0877899155020714e-03 - - 4.8566961288452148e-01 4.5841160416603088e-01 - 5.4202407598495483e-01 - <_> - - 1 0 2072 6.4308601431548595e-03 -1 -2 2073 - 7.0455260574817657e-03 - - 2.7073028683662415e-01 5.0574868917465210e-01 - 7.0265239477157593e-01 - <_> - - 1 0 2074 -2.3246440105140209e-03 -1 -2 2075 - 6.0276601288933307e-05 - - 4.8272788524627686e-01 4.2472490668296814e-01 - 5.5087631940841675e-01 - <_> - - 1 0 2076 1.8084559589624405e-02 -1 -2 2077 - 8.4693520329892635e-04 - - 8.1048011779785156e-01 5.1546192169189453e-01 - 3.5143798589706421e-01 - <_> - - 1 0 2078 -2.6931039988994598e-02 -1 -2 2079 - -4.2346641421318054e-03 - - 4.8868888616561890e-01 4.6223780512809753e-01 - 5.3824782371520996e-01 - <_> - - 1 0 2080 2.6947110891342163e-02 -1 -2 2081 - 4.6446882188320160e-03 - - 6.3665962219238281e-01 5.3685069084167480e-01 - 3.7654298543930054e-01 - <_> - - 0 1 2082 -6.9577661342918873e-03 -1 -2 2083 - 8.7609712500125170e-04 - - 4.2346870899200439e-01 4.6724060177803040e-01 - 5.3506839275360107e-01 - <_> - - 1 0 2084 1.6103329835459590e-03 -1 -2 2085 - -1.2848590267822146e-03 - - 5.7327628135681152e-01 5.4817992448806763e-01 - 3.7845930457115173e-01 - <_> - - 0 1 2086 1.0243539698421955e-02 -1 -2 2087 - 2.6889349101111293e-04 - - 5.1559072732925415e-01 5.3531897068023682e-01 - 4.3871539831161499e-01 - <_> - - 0 1 2088 3.7903659977018833e-03 -1 -2 2089 - -2.9369680210947990e-02 - - 5.0320029258728027e-01 5.8735388517379761e-01 - 2.2154450416564941e-01 - <_> - - 1 0 2090 6.0743088833987713e-03 -1 -2 2091 - -1.2710720300674438e-02 - - 5.4170298576354980e-01 6.0565119981765747e-01 - 4.9851819872856140e-01 - <_> - - 0 1 2092 -5.9445449151098728e-03 -1 -2 2093 - -2.8927479870617390e-03 - - 3.3520698547363281e-01 6.9292408227920532e-01 - 4.7782200574874878e-01 - - <_> - - <_> - 2 7 16 4 -1. - <_> - 2 9 16 2 2. - <_> - - <_> - 8 4 3 14 -1. - <_> - 8 11 3 7 2. - <_> - - <_> - 13 6 1 6 -1. - <_> - 13 9 1 3 2. - <_> - - <_> - 4 2 12 8 -1. - <_> - 8 2 4 8 3. - <_> - - <_> - 6 3 1 9 -1. - <_> - 6 6 1 3 3. - <_> - - <_> - 3 7 14 9 -1. - <_> - 3 10 14 3 3. - <_> - - <_> - 4 7 4 4 -1. - <_> - 4 9 4 2 2. - <_> - - <_> - 9 4 2 16 -1. - <_> - 9 12 2 8 2. - <_> - - <_> - 1 1 18 5 -1. - <_> - 7 1 6 5 3. - <_> - - <_> - 4 5 13 8 -1. - <_> - 4 9 13 4 2. - <_> - - <_> - 1 7 16 9 -1. - <_> - 1 10 16 3 3. - <_> - - <_> - 2 0 15 4 -1. - <_> - 2 2 15 2 2. - <_> - - <_> - 7 5 6 4 -1. - <_> - 9 5 2 4 3. - <_> - - <_> - 6 3 8 9 -1. - <_> - 6 6 8 3 3. - <_> - - <_> - 8 12 3 8 -1. - <_> - 8 16 3 4 2. - <_> - - <_> - 3 16 2 2 -1. - <_> - 3 17 2 1 2. - <_> - - <_> - 14 1 6 12 -1. - <_> - 14 1 3 12 2. - <_> - - <_> - 4 4 12 6 -1. - <_> - 8 4 4 6 3. - <_> - - <_> - 0 2 6 15 -1. - <_> - 3 2 3 15 2. - <_> - - <_> - 5 4 9 6 -1. - <_> - 5 6 9 2 3. - <_> - - <_> - 13 11 6 3 -1. - <_> - 13 12 6 1 3. - <_> - - <_> - 12 12 6 4 -1. - <_> - 12 14 6 2 2. - <_> - - <_> - 1 11 6 3 -1. - <_> - 1 12 6 1 3. - <_> - - <_> - 2 5 5 8 -1. - <_> - 2 9 5 4 2. - <_> - - <_> - 5 4 10 4 -1. - <_> - 5 6 10 2 2. - <_> - - <_> - 2 4 16 12 -1. - <_> - 2 8 16 4 3. - <_> - - <_> - 4 5 12 6 -1. - <_> - 8 5 4 6 3. - <_> - - <_> - 13 7 2 9 -1. - <_> - 13 10 2 3 3. - <_> - - <_> - 5 7 2 9 -1. - <_> - 5 10 2 3 3. - <_> - - <_> - 7 1 6 8 -1. - <_> - 9 1 2 8 3. - <_> - - <_> - 12 0 4 12 -1. - <_> - 14 0 2 6 2. - <_> - 12 6 2 6 2. - <_> - - <_> - 5 8 10 2 -1. - <_> - 5 9 10 1 2. - <_> - - <_> - 5 1 6 4 -1. - <_> - 7 1 2 4 3. - <_> - - <_> - 0 3 9 12 -1. - <_> - 3 3 3 12 3. - <_> - - <_> - 9 8 3 12 -1. - <_> - 9 12 3 4 3. - <_> - - <_> - 0 5 20 15 -1. - <_> - 0 10 20 5 3. - <_> - - <_> - 2 2 6 8 -1. - <_> - 2 2 3 4 2. - <_> - 5 6 3 4 2. - <_> - - <_> - 2 1 6 2 -1. - <_> - 2 2 6 1 2. - <_> - - <_> - 10 15 6 4 -1. - <_> - 13 15 3 2 2. - <_> - 10 17 3 2 2. - <_> - - <_> - 12 14 2 6 -1. - <_> - 12 16 2 2 3. - <_> - - <_> - 5 15 4 4 -1. - <_> - 5 15 2 2 2. - <_> - 7 17 2 2 2. - <_> - - <_> - 7 18 1 2 -1. - <_> - 7 19 1 1 2. - <_> - - <_> - 4 5 12 10 -1. - <_> - 10 5 6 5 2. - <_> - 4 10 6 5 2. - <_> - - <_> - 7 4 8 12 -1. - <_> - 11 4 4 6 2. - <_> - 7 10 4 6 2. - <_> - - <_> - 9 11 2 3 -1. - <_> - 9 12 2 1 3. - <_> - - <_> - 3 3 12 12 -1. - <_> - 3 3 6 6 2. - <_> - 9 9 6 6 2. - <_> - - <_> - 15 11 5 3 -1. - <_> - 15 12 5 1 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 0 11 5 3 -1. - <_> - 0 12 5 1 3. - <_> - - <_> - 7 18 3 2 -1. - <_> - 8 18 1 2 3. - <_> - - <_> - 2 8 16 2 -1. - <_> - 2 9 16 1 2. - <_> - - <_> - 9 6 5 12 -1. - <_> - 9 12 5 6 2. - <_> - - <_> - 6 3 8 6 -1. - <_> - 6 6 8 3 2. - <_> - - <_> - 4 7 12 2 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 10 9 6 8 -1. - <_> - 10 13 6 4 2. - <_> - - <_> - 12 5 3 10 -1. - <_> - 12 10 3 5 2. - <_> - - <_> - 4 6 3 9 -1. - <_> - 4 9 3 3 3. - <_> - - <_> - 7 4 6 4 -1. - <_> - 9 4 2 4 3. - <_> - - <_> - 12 3 8 3 -1. - <_> - 12 3 4 3 2. - <_> - - <_> - 15 0 3 6 -1. - <_> - 15 3 3 3 2. - <_> - - <_> - 2 12 10 8 -1. - <_> - 2 12 5 4 2. - <_> - 7 16 5 4 2. - <_> - - <_> - 5 5 6 8 -1. - <_> - 5 9 6 4 2. - <_> - - <_> - 12 3 8 3 -1. - <_> - 12 3 4 3 2. - <_> - - <_> - 15 0 3 6 -1. - <_> - 15 3 3 3 2. - <_> - - <_> - 0 3 8 3 -1. - <_> - 4 3 4 3 2. - <_> - - <_> - 2 1 4 4 -1. - <_> - 2 3 4 2 2. - <_> - - <_> - 10 2 3 2 -1. - <_> - 11 2 1 2 3. - <_> - - <_> - 10 3 3 1 -1. - <_> - 11 3 1 1 3. - <_> - - <_> - 7 15 3 4 -1. - <_> - 7 17 3 2 2. - <_> - - <_> - 4 13 3 6 -1. - <_> - 4 15 3 2 3. - <_> - - <_> - 10 5 1 14 -1. - <_> - 10 12 1 7 2. - <_> - - <_> - 5 4 10 6 -1. - <_> - 5 6 10 2 3. - <_> - - <_> - 5 0 6 3 -1. - <_> - 7 0 2 3 3. - <_> - - <_> - 6 0 3 5 -1. - <_> - 7 0 1 5 3. - <_> - - <_> - 7 15 6 5 -1. - <_> - 9 15 2 5 3. - <_> - - <_> - 9 10 2 6 -1. - <_> - 9 12 2 2 3. - <_> - - <_> - 8 17 3 2 -1. - <_> - 9 17 1 2 3. - <_> - - <_> - 1 12 7 6 -1. - <_> - 1 14 7 2 3. - <_> - - <_> - 9 6 3 7 -1. - <_> - 10 6 1 7 3. - <_> - - <_> - 16 3 4 9 -1. - <_> - 16 6 4 3 3. - <_> - - <_> - 8 6 3 7 -1. - <_> - 9 6 1 7 3. - <_> - - <_> - 0 5 18 8 -1. - <_> - 0 5 9 4 2. - <_> - 9 9 9 4 2. - <_> - - <_> - 13 5 2 10 -1. - <_> - 13 10 2 5 2. - <_> - - <_> - 12 10 2 6 -1. - <_> - 12 13 2 3 2. - <_> - - <_> - 7 0 3 5 -1. - <_> - 8 0 1 5 3. - <_> - - <_> - 6 5 8 6 -1. - <_> - 6 7 8 2 3. - <_> - - <_> - 10 3 6 14 -1. - <_> - 13 3 3 7 2. - <_> - 10 10 3 7 2. - <_> - - <_> - 13 5 1 8 -1. - <_> - 13 9 1 4 2. - <_> - - <_> - 4 3 6 14 -1. - <_> - 4 3 3 7 2. - <_> - 7 10 3 7 2. - <_> - - <_> - 6 5 1 8 -1. - <_> - 6 9 1 4 2. - <_> - - <_> - 8 1 1 6 -1. - <_> - 8 3 1 2 3. - <_> - - <_> - 2 0 15 2 -1. - <_> - 2 1 15 1 2. - <_> - - <_> - 0 7 20 6 -1. - <_> - 0 9 20 2 3. - <_> - - <_> - 10 10 6 8 -1. - <_> - 10 14 6 4 2. - <_> - - <_> - 7 1 3 2 -1. - <_> - 8 1 1 2 3. - <_> - - <_> - 8 1 2 2 -1. - <_> - 9 1 1 2 2. - <_> - - <_> - 4 3 12 9 -1. - <_> - 4 6 12 3 3. - <_> - - <_> - 6 5 9 5 -1. - <_> - 9 5 3 5 3. - <_> - - <_> - 5 5 9 5 -1. - <_> - 8 5 3 5 3. - <_> - - <_> - 4 6 6 12 -1. - <_> - 4 10 6 4 3. - <_> - - <_> - 13 0 6 18 -1. - <_> - 13 0 3 18 2. - <_> - - <_> - 10 8 1 12 -1. - <_> - 10 12 1 4 3. - <_> - - <_> - 3 2 6 10 -1. - <_> - 3 2 3 5 2. - <_> - 6 7 3 5 2. - <_> - - <_> - 1 2 4 6 -1. - <_> - 3 2 2 6 2. - <_> - - <_> - 9 18 3 2 -1. - <_> - 10 18 1 2 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 2 8 2 6 -1. - <_> - 2 10 2 2 3. - <_> - - <_> - 7 5 6 6 -1. - <_> - 7 7 6 2 3. - <_> - - <_> - 7 19 6 1 -1. - <_> - 9 19 2 1 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 8 3 3 1 -1. - <_> - 9 3 1 1 3. - <_> - - <_> - 2 2 16 2 -1. - <_> - 2 2 8 1 2. - <_> - 10 3 8 1 2. - <_> - - <_> - 8 11 5 3 -1. - <_> - 8 12 5 1 3. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 0 1 6 15 -1. - <_> - 2 1 2 15 3. - <_> - - <_> - 2 12 2 3 -1. - <_> - 2 13 2 1 3. - <_> - - <_> - 16 13 1 3 -1. - <_> - 16 14 1 1 3. - <_> - - <_> - 13 7 6 4 -1. - <_> - 16 7 3 2 2. - <_> - 13 9 3 2 2. - <_> - - <_> - 7 13 3 6 -1. - <_> - 7 16 3 3 2. - <_> - - <_> - 7 5 1 14 -1. - <_> - 7 12 1 7 2. - <_> - - <_> - 15 12 2 3 -1. - <_> - 15 13 2 1 3. - <_> - - <_> - 10 5 3 14 -1. - <_> - 10 12 3 7 2. - <_> - - <_> - 6 10 2 6 -1. - <_> - 6 13 2 3 2. - <_> - - <_> - 6 5 1 8 -1. - <_> - 6 9 1 4 2. - <_> - - <_> - 13 11 2 1 -1. - <_> - 13 11 1 1 2. - <_> - - <_> - 12 1 6 10 -1. - <_> - 15 1 3 5 2. - <_> - 12 6 3 5 2. - <_> - - <_> - 3 12 2 3 -1. - <_> - 3 13 2 1 3. - <_> - - <_> - 9 18 2 1 -1. - <_> - 10 18 1 1 2. - <_> - - <_> - 1 0 17 9 -1. - <_> - 1 3 17 3 3. - <_> - - <_> - 1 2 8 8 -1. - <_> - 1 2 4 4 2. - <_> - 5 6 4 4 2. - <_> - - <_> - 9 5 6 4 -1. - <_> - 9 5 3 4 2. - <_> - - <_> - 10 9 7 10 -1. - <_> - 10 14 7 5 2. - <_> - - <_> - 5 5 6 4 -1. - <_> - 8 5 3 4 2. - <_> - - <_> - 0 7 20 6 -1. - <_> - 0 9 20 2 3. - <_> - - <_> - 6 5 9 10 -1. - <_> - 6 10 9 5 2. - <_> - - <_> - 8 4 4 12 -1. - <_> - 8 10 4 6 2. - <_> - - <_> - 6 6 8 3 -1. - <_> - 6 7 8 1 3. - <_> - - <_> - 3 13 10 6 -1. - <_> - 3 13 5 3 2. - <_> - 8 16 5 3 2. - <_> - - <_> - 15 1 4 11 -1. - <_> - 15 1 2 11 2. - <_> - - <_> - 5 7 10 10 -1. - <_> - 10 7 5 5 2. - <_> - 5 12 5 5 2. - <_> - - <_> - 1 1 4 11 -1. - <_> - 3 1 2 11 2. - <_> - - <_> - 1 5 8 12 -1. - <_> - 1 11 8 6 2. - <_> - - <_> - 13 7 6 4 -1. - <_> - 16 7 3 2 2. - <_> - 13 9 3 2 2. - <_> - - <_> - 11 10 7 4 -1. - <_> - 11 12 7 2 2. - <_> - - <_> - 0 4 20 12 -1. - <_> - 0 4 10 6 2. - <_> - 10 10 10 6 2. - <_> - - <_> - 1 5 6 15 -1. - <_> - 1 10 6 5 3. - <_> - - <_> - 11 10 3 8 -1. - <_> - 11 14 3 4 2. - <_> - - <_> - 11 12 7 6 -1. - <_> - 11 14 7 2 3. - <_> - - <_> - 9 11 2 3 -1. - <_> - 9 12 2 1 3. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 3 14 14 4 -1. - <_> - 10 14 7 2 2. - <_> - 3 16 7 2 2. - <_> - - <_> - 18 7 2 4 -1. - <_> - 18 9 2 2 2. - <_> - - <_> - 3 12 6 6 -1. - <_> - 3 14 6 2 3. - <_> - - <_> - 0 4 3 6 -1. - <_> - 0 6 3 2 3. - <_> - - <_> - 9 14 3 3 -1. - <_> - 9 15 3 1 3. - <_> - - <_> - 10 7 10 4 -1. - <_> - 15 7 5 2 2. - <_> - 10 9 5 2 2. - <_> - - <_> - 7 2 6 8 -1. - <_> - 7 6 6 4 2. - <_> - - <_> - 6 3 6 2 -1. - <_> - 8 3 2 2 3. - <_> - - <_> - 10 6 3 5 -1. - <_> - 11 6 1 5 3. - <_> - - <_> - 9 0 6 19 -1. - <_> - 11 0 2 19 3. - <_> - - <_> - 3 12 1 2 -1. - <_> - 3 13 1 1 2. - <_> - - <_> - 7 14 5 3 -1. - <_> - 7 15 5 1 3. - <_> - - <_> - 2 1 18 4 -1. - <_> - 11 1 9 2 2. - <_> - 2 3 9 2 2. - <_> - - <_> - 10 5 3 8 -1. - <_> - 11 5 1 8 3. - <_> - - <_> - 0 1 18 4 -1. - <_> - 0 1 9 2 2. - <_> - 9 3 9 2 2. - <_> - - <_> - 7 5 3 8 -1. - <_> - 8 5 1 8 3. - <_> - - <_> - 9 5 2 6 -1. - <_> - 9 7 2 2 3. - <_> - - <_> - 10 8 5 2 -1. - <_> - 10 9 5 1 2. - <_> - - <_> - 2 10 15 1 -1. - <_> - 7 10 5 1 3. - <_> - - <_> - 2 7 2 6 -1. - <_> - 2 9 2 2 3. - <_> - - <_> - 9 14 3 3 -1. - <_> - 9 15 3 1 3. - <_> - - <_> - 9 7 4 10 -1. - <_> - 9 12 4 5 2. - <_> - - <_> - 0 8 8 2 -1. - <_> - 0 8 4 1 2. - <_> - 4 9 4 1 2. - <_> - - <_> - 5 9 10 8 -1. - <_> - 5 9 5 4 2. - <_> - 10 13 5 4 2. - <_> - - <_> - 9 7 2 4 -1. - <_> - 9 7 1 4 2. - <_> - - <_> - 9 6 3 4 -1. - <_> - 10 6 1 4 3. - <_> - - <_> - 8 3 2 1 -1. - <_> - 9 3 1 1 2. - <_> - - <_> - 8 6 3 4 -1. - <_> - 9 6 1 4 3. - <_> - - <_> - 12 0 4 14 -1. - <_> - 14 0 2 7 2. - <_> - 12 7 2 7 2. - <_> - - <_> - 12 5 6 9 -1. - <_> - 12 5 3 9 2. - <_> - - <_> - 0 2 6 16 -1. - <_> - 3 2 3 16 2. - <_> - - <_> - 1 12 4 2 -1. - <_> - 1 13 4 1 2. - <_> - - <_> - 7 7 6 1 -1. - <_> - 9 7 2 1 3. - <_> - - <_> - 8 3 4 9 -1. - <_> - 8 6 4 3 3. - <_> - - <_> - 12 10 4 6 -1. - <_> - 12 13 4 3 2. - <_> - - <_> - 8 1 8 16 -1. - <_> - 12 1 4 8 2. - <_> - 8 9 4 8 2. - <_> - - <_> - 4 6 3 6 -1. - <_> - 4 9 3 3 2. - <_> - - <_> - 1 3 6 2 -1. - <_> - 4 3 3 2 2. - <_> - - <_> - 9 8 3 12 -1. - <_> - 9 12 3 4 3. - <_> - - <_> - 10 9 7 10 -1. - <_> - 10 14 7 5 2. - <_> - - <_> - 3 9 7 10 -1. - <_> - 3 14 7 5 2. - <_> - - <_> - 7 5 1 14 -1. - <_> - 7 12 1 7 2. - <_> - - <_> - 13 14 1 6 -1. - <_> - 13 16 1 2 3. - <_> - - <_> - 14 12 3 6 -1. - <_> - 14 14 3 2 3. - <_> - - <_> - 6 14 1 6 -1. - <_> - 6 16 1 2 3. - <_> - - <_> - 3 12 3 6 -1. - <_> - 3 14 3 2 3. - <_> - - <_> - 8 13 5 3 -1. - <_> - 8 14 5 1 3. - <_> - - <_> - 9 14 2 3 -1. - <_> - 9 15 2 1 3. - <_> - - <_> - 5 1 10 8 -1. - <_> - 5 1 5 4 2. - <_> - 10 5 5 4 2. - <_> - - <_> - 6 4 5 4 -1. - <_> - 6 6 5 2 2. - <_> - - <_> - 1 10 18 1 -1. - <_> - 7 10 6 1 3. - <_> - - <_> - 11 10 4 3 -1. - <_> - 11 10 2 3 2. - <_> - - <_> - 5 11 6 1 -1. - <_> - 7 11 2 1 3. - <_> - - <_> - 3 13 2 3 -1. - <_> - 3 14 2 1 3. - <_> - - <_> - 12 12 3 4 -1. - <_> - 12 14 3 2 2. - <_> - - <_> - 11 10 5 6 -1. - <_> - 11 12 5 2 3. - <_> - - <_> - 0 8 16 2 -1. - <_> - 0 9 16 1 2. - <_> - - <_> - 2 1 3 4 -1. - <_> - 2 3 3 2 2. - <_> - - <_> - 9 7 3 3 -1. - <_> - 10 7 1 3 3. - <_> - - <_> - 5 6 12 6 -1. - <_> - 9 6 4 6 3. - <_> - - <_> - 8 7 3 3 -1. - <_> - 9 7 1 3 3. - <_> - - <_> - 3 6 12 6 -1. - <_> - 7 6 4 6 3. - <_> - - <_> - 10 5 6 5 -1. - <_> - 12 5 2 5 3. - <_> - - <_> - 5 7 10 2 -1. - <_> - 5 7 5 2 2. - <_> - - <_> - 4 5 6 5 -1. - <_> - 6 5 2 5 3. - <_> - - <_> - 9 3 2 10 -1. - <_> - 9 8 2 5 2. - <_> - - <_> - 3 1 16 2 -1. - <_> - 11 1 8 1 2. - <_> - 3 2 8 1 2. - <_> - - <_> - 9 9 3 2 -1. - <_> - 9 10 3 1 2. - <_> - - <_> - 1 1 16 2 -1. - <_> - 1 1 8 1 2. - <_> - 9 2 8 1 2. - <_> - - <_> - 8 14 1 3 -1. - <_> - 8 15 1 1 3. - <_> - - <_> - 4 5 12 10 -1. - <_> - 10 5 6 5 2. - <_> - 4 10 6 5 2. - <_> - - <_> - 7 13 6 6 -1. - <_> - 10 13 3 3 2. - <_> - 7 16 3 3 2. - <_> - - <_> - 8 9 3 2 -1. - <_> - 8 10 3 1 2. - <_> - - <_> - 7 2 6 4 -1. - <_> - 9 2 2 4 3. - <_> - - <_> - 6 6 9 3 -1. - <_> - 6 7 9 1 3. - <_> - - <_> - 10 7 6 1 -1. - <_> - 12 7 2 1 3. - <_> - - <_> - 0 0 18 6 -1. - <_> - 6 0 6 6 3. - <_> - - <_> - 6 10 2 6 -1. - <_> - 6 13 2 3 2. - <_> - - <_> - 11 12 3 6 -1. - <_> - 11 15 3 3 2. - <_> - - <_> - 4 4 12 12 -1. - <_> - 10 4 6 6 2. - <_> - 4 10 6 6 2. - <_> - - <_> - 1 2 3 6 -1. - <_> - 2 2 1 6 3. - <_> - - <_> - 1 5 3 7 -1. - <_> - 2 5 1 7 3. - <_> - - <_> - 4 13 12 4 -1. - <_> - 10 13 6 2 2. - <_> - 4 15 6 2 2. - <_> - - <_> - 3 3 17 12 -1. - <_> - 3 9 17 6 2. - <_> - - <_> - 3 3 14 12 -1. - <_> - 3 3 7 6 2. - <_> - 10 9 7 6 2. - <_> - - <_> - 2 11 16 9 -1. - <_> - 2 14 16 3 3. - <_> - - <_> - 9 14 3 6 -1. - <_> - 9 17 3 3 2. - <_> - - <_> - 8 14 4 6 -1. - <_> - 10 14 2 3 2. - <_> - 8 17 2 3 2. - <_> - - <_> - 6 2 6 1 -1. - <_> - 8 2 2 1 3. - <_> - - <_> - 9 5 2 5 -1. - <_> - 10 5 1 5 2. - <_> - - <_> - 9 8 3 5 -1. - <_> - 10 8 1 5 3. - <_> - - <_> - 9 12 6 1 -1. - <_> - 9 12 3 1 2. - <_> - - <_> - 8 8 3 5 -1. - <_> - 9 8 1 5 3. - <_> - - <_> - 6 10 4 3 -1. - <_> - 8 10 2 3 2. - <_> - - <_> - 0 4 20 6 -1. - <_> - 0 6 20 2 3. - <_> - - <_> - 1 3 8 6 -1. - <_> - 1 3 4 3 2. - <_> - 5 6 4 3 2. - <_> - - <_> - 7 15 6 4 -1. - <_> - 7 17 6 2 2. - <_> - - <_> - 3 10 14 10 -1. - <_> - 3 15 14 5 2. - <_> - - <_> - 6 4 4 4 -1. - <_> - 8 4 2 4 2. - <_> - - <_> - 0 4 20 10 -1. - <_> - 0 9 20 5 2. - <_> - - <_> - 9 4 2 14 -1. - <_> - 9 11 2 7 2. - <_> - - <_> - 2 0 16 4 -1. - <_> - 2 2 16 2 2. - <_> - - <_> - 4 12 6 8 -1. - <_> - 4 12 3 4 2. - <_> - 7 16 3 4 2. - <_> - - <_> - 0 5 6 7 -1. - <_> - 3 5 3 7 2. - <_> - - <_> - 10 7 10 4 -1. - <_> - 15 7 5 2 2. - <_> - 10 9 5 2 2. - <_> - - <_> - 5 8 12 1 -1. - <_> - 9 8 4 1 3. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 9 4 2 4 -1. - <_> - 9 6 2 2 2. - <_> - - <_> - 9 6 3 6 -1. - <_> - 10 6 1 6 3. - <_> - - <_> - 12 7 6 4 -1. - <_> - 15 7 3 2 2. - <_> - 12 9 3 2 2. - <_> - - <_> - 8 6 3 6 -1. - <_> - 9 6 1 6 3. - <_> - - <_> - 1 6 18 6 -1. - <_> - 1 6 9 3 2. - <_> - 10 9 9 3 2. - <_> - - <_> - 9 1 3 3 -1. - <_> - 10 1 1 3 3. - <_> - - <_> - 10 8 5 2 -1. - <_> - 10 9 5 1 2. - <_> - - <_> - 8 1 3 3 -1. - <_> - 9 1 1 3 3. - <_> - - <_> - 5 8 5 2 -1. - <_> - 5 9 5 1 2. - <_> - - <_> - 8 6 8 8 -1. - <_> - 12 6 4 4 2. - <_> - 8 10 4 4 2. - <_> - - <_> - 5 7 10 2 -1. - <_> - 5 7 5 2 2. - <_> - - <_> - 4 5 12 10 -1. - <_> - 4 5 6 5 2. - <_> - 10 10 6 5 2. - <_> - - <_> - 5 5 2 3 -1. - <_> - 5 6 2 1 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 9 14 3 3 -1. - <_> - 9 15 3 1 3. - <_> - - <_> - 8 14 3 3 -1. - <_> - 8 15 3 1 3. - <_> - - <_> - 1 10 8 9 -1. - <_> - 1 13 8 3 3. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 12 3 3 3 -1. - <_> - 13 3 1 3 3. - <_> - - <_> - 5 3 3 3 -1. - <_> - 6 3 1 3 3. - <_> - - <_> - 5 6 2 12 -1. - <_> - 5 10 2 4 3. - <_> - - <_> - 1 11 18 4 -1. - <_> - 10 11 9 2 2. - <_> - 1 13 9 2 2. - <_> - - <_> - 7 12 6 2 -1. - <_> - 7 13 6 1 2. - <_> - - <_> - 6 0 3 6 -1. - <_> - 7 0 1 6 3. - <_> - - <_> - 0 11 18 4 -1. - <_> - 0 11 9 2 2. - <_> - 9 13 9 2 2. - <_> - - <_> - 7 12 6 2 -1. - <_> - 7 13 6 1 2. - <_> - - <_> - 9 12 3 3 -1. - <_> - 9 13 3 1 3. - <_> - - <_> - 9 12 2 3 -1. - <_> - 9 13 2 1 3. - <_> - - <_> - 8 11 4 3 -1. - <_> - 8 12 4 1 3. - <_> - - <_> - 13 3 4 2 -1. - <_> - 13 4 4 1 2. - <_> - - <_> - 4 0 12 2 -1. - <_> - 4 1 12 1 2. - <_> - - <_> - 6 9 8 8 -1. - <_> - 6 9 4 4 2. - <_> - 10 13 4 4 2. - <_> - - <_> - 1 11 6 2 -1. - <_> - 1 12 6 1 2. - <_> - - <_> - 2 5 18 8 -1. - <_> - 11 5 9 4 2. - <_> - 2 9 9 4 2. - <_> - - <_> - 7 1 6 10 -1. - <_> - 7 6 6 5 2. - <_> - - <_> - 0 3 3 6 -1. - <_> - 0 5 3 2 3. - <_> - - <_> - 4 5 4 3 -1. - <_> - 4 6 4 1 3. - <_> - - <_> - 19 3 1 6 -1. - <_> - 19 5 1 2 3. - <_> - - <_> - 6 15 8 2 -1. - <_> - 6 16 8 1 2. - <_> - - <_> - 0 3 1 6 -1. - <_> - 0 5 1 2 3. - <_> - - <_> - 5 5 3 3 -1. - <_> - 5 6 3 1 3. - <_> - - <_> - 8 8 4 3 -1. - <_> - 8 9 4 1 3. - <_> - - <_> - 10 6 6 3 -1. - <_> - 12 6 2 3 3. - <_> - - <_> - 8 13 2 6 -1. - <_> - 8 16 2 3 2. - <_> - - <_> - 9 11 2 8 -1. - <_> - 9 15 2 4 2. - <_> - - <_> - 10 6 6 3 -1. - <_> - 12 6 2 3 3. - <_> - - <_> - 5 15 15 5 -1. - <_> - 10 15 5 5 3. - <_> - - <_> - 2 14 2 2 -1. - <_> - 2 15 2 1 2. - <_> - - <_> - 4 7 6 2 -1. - <_> - 6 7 2 2 3. - <_> - - <_> - 8 3 6 1 -1. - <_> - 10 3 2 1 3. - <_> - - <_> - 1 0 18 12 -1. - <_> - 7 0 6 12 3. - <_> - - <_> - 0 14 8 6 -1. - <_> - 4 14 4 6 2. - <_> - - <_> - 0 15 15 5 -1. - <_> - 5 15 5 5 3. - <_> - - <_> - 8 3 6 1 -1. - <_> - 10 3 2 1 3. - <_> - - <_> - 11 11 3 6 -1. - <_> - 11 14 3 3 2. - <_> - - <_> - 6 3 6 1 -1. - <_> - 8 3 2 1 3. - <_> - - <_> - 6 11 3 6 -1. - <_> - 6 14 3 3 2. - <_> - - <_> - 9 6 3 4 -1. - <_> - 10 6 1 4 3. - <_> - - <_> - 12 10 4 7 -1. - <_> - 12 10 2 7 2. - <_> - - <_> - 8 6 3 4 -1. - <_> - 9 6 1 4 3. - <_> - - <_> - 4 6 4 7 -1. - <_> - 6 6 2 7 2. - <_> - - <_> - 10 3 4 12 -1. - <_> - 10 3 2 12 2. - <_> - - <_> - 10 8 3 4 -1. - <_> - 11 8 1 4 3. - <_> - - <_> - 1 0 18 14 -1. - <_> - 7 0 6 14 3. - <_> - - <_> - 2 8 6 11 -1. - <_> - 5 8 3 11 2. - <_> - - <_> - 1 4 15 4 -1. - <_> - 1 6 15 2 2. - <_> - - <_> - 5 5 10 8 -1. - <_> - 5 9 10 4 2. - <_> - - <_> - 14 2 6 8 -1. - <_> - 14 2 3 8 2. - <_> - - <_> - 11 6 6 14 -1. - <_> - 14 6 3 7 2. - <_> - 11 13 3 7 2. - <_> - - <_> - 9 5 2 12 -1. - <_> - 9 11 2 6 2. - <_> - - <_> - 3 7 4 6 -1. - <_> - 3 9 4 2 3. - <_> - - <_> - 14 3 6 6 -1. - <_> - 14 3 3 6 2. - <_> - - <_> - 15 2 4 4 -1. - <_> - 15 4 4 2 2. - <_> - - <_> - 0 2 6 7 -1. - <_> - 3 2 3 7 2. - <_> - - <_> - 3 6 6 14 -1. - <_> - 3 6 3 7 2. - <_> - 6 13 3 7 2. - <_> - - <_> - 4 6 16 8 -1. - <_> - 4 10 16 4 2. - <_> - - <_> - 10 12 2 8 -1. - <_> - 10 16 2 4 2. - <_> - - <_> - 7 0 6 20 -1. - <_> - 9 0 2 20 3. - <_> - - <_> - 1 7 16 12 -1. - <_> - 1 7 8 6 2. - <_> - 9 13 8 6 2. - <_> - - <_> - 9 11 3 3 -1. - <_> - 9 12 3 1 3. - <_> - - <_> - 11 9 4 5 -1. - <_> - 11 9 2 5 2. - <_> - - <_> - 3 3 1 2 -1. - <_> - 3 4 1 1 2. - <_> - - <_> - 7 17 5 3 -1. - <_> - 7 18 5 1 3. - <_> - - <_> - 8 12 4 8 -1. - <_> - 10 12 2 4 2. - <_> - 8 16 2 4 2. - <_> - - <_> - 7 4 10 12 -1. - <_> - 12 4 5 6 2. - <_> - 7 10 5 6 2. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 5 9 4 5 -1. - <_> - 7 9 2 5 2. - <_> - - <_> - 9 9 8 2 -1. - <_> - 9 9 4 2 2. - <_> - - <_> - 14 15 5 2 -1. - <_> - 14 16 5 1 2. - <_> - - <_> - 9 14 2 3 -1. - <_> - 9 15 2 1 3. - <_> - - <_> - 1 7 8 4 -1. - <_> - 1 7 4 2 2. - <_> - 5 9 4 2 2. - <_> - - <_> - 19 3 1 2 -1. - <_> - 19 4 1 1 2. - <_> - - <_> - 9 12 2 3 -1. - <_> - 9 13 2 1 3. - <_> - - <_> - 3 14 14 4 -1. - <_> - 3 14 7 2 2. - <_> - 10 16 7 2 2. - <_> - - <_> - 5 0 10 2 -1. - <_> - 5 1 10 1 2. - <_> - - <_> - 11 14 4 6 -1. - <_> - 11 16 4 2 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 7 13 6 6 -1. - <_> - 7 13 3 3 2. - <_> - 10 16 3 3 2. - <_> - - <_> - 0 2 1 6 -1. - <_> - 0 4 1 2 3. - <_> - - <_> - 6 7 8 2 -1. - <_> - 6 8 8 1 2. - <_> - - <_> - 9 7 6 1 -1. - <_> - 9 7 3 1 2. - <_> - - <_> - 7 1 6 10 -1. - <_> - 7 6 6 5 2. - <_> - - <_> - 0 2 6 2 -1. - <_> - 0 3 6 1 2. - <_> - - <_> - 11 4 2 4 -1. - <_> - 11 4 1 4 2. - <_> - - <_> - 11 10 3 6 -1. - <_> - 11 13 3 3 2. - <_> - - <_> - 3 9 8 2 -1. - <_> - 7 9 4 2 2. - <_> - - <_> - 0 0 4 6 -1. - <_> - 2 0 2 6 2. - <_> - - <_> - 7 0 6 2 -1. - <_> - 9 0 2 2 3. - <_> - - <_> - 9 15 2 3 -1. - <_> - 9 16 2 1 3. - <_> - - <_> - 3 12 1 2 -1. - <_> - 3 13 1 1 2. - <_> - - <_> - 4 5 11 3 -1. - <_> - 4 6 11 1 3. - <_> - - <_> - 11 4 2 4 -1. - <_> - 11 4 1 4 2. - <_> - - <_> - 8 3 6 3 -1. - <_> - 10 3 2 3 3. - <_> - - <_> - 7 4 2 4 -1. - <_> - 8 4 1 4 2. - <_> - - <_> - 6 3 6 3 -1. - <_> - 8 3 2 3 3. - <_> - - <_> - 11 4 4 3 -1. - <_> - 11 5 4 1 3. - <_> - - <_> - 11 8 2 8 -1. - <_> - 11 12 2 4 2. - <_> - - <_> - 8 7 3 5 -1. - <_> - 9 7 1 5 3. - <_> - - <_> - 9 7 2 5 -1. - <_> - 10 7 1 5 2. - <_> - - <_> - 14 11 1 6 -1. - <_> - 14 13 1 2 3. - <_> - - <_> - 8 8 4 3 -1. - <_> - 8 9 4 1 3. - <_> - - <_> - 0 3 2 2 -1. - <_> - 0 4 2 1 2. - <_> - - <_> - 4 14 5 6 -1. - <_> - 4 16 5 2 3. - <_> - - <_> - 11 4 4 3 -1. - <_> - 11 5 4 1 3. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 5 4 4 3 -1. - <_> - 5 5 4 1 3. - <_> - - <_> - 5 15 4 2 -1. - <_> - 7 15 2 2 2. - <_> - - <_> - 15 1 5 9 -1. - <_> - 15 4 5 3 3. - <_> - - <_> - 9 10 3 3 -1. - <_> - 9 11 3 1 3. - <_> - - <_> - 1 6 2 6 -1. - <_> - 1 8 2 2 3. - <_> - - <_> - 2 4 8 15 -1. - <_> - 2 9 8 5 3. - <_> - - <_> - 9 12 3 2 -1. - <_> - 9 13 3 1 2. - <_> - - <_> - 9 12 3 3 -1. - <_> - 9 13 3 1 3. - <_> - - <_> - 7 6 3 5 -1. - <_> - 8 6 1 5 3. - <_> - - <_> - 5 3 6 2 -1. - <_> - 7 3 2 2 3. - <_> - - <_> - 6 1 8 10 -1. - <_> - 10 1 4 5 2. - <_> - 6 6 4 5 2. - <_> - - <_> - 0 0 20 10 -1. - <_> - 10 0 10 5 2. - <_> - 0 5 10 5 2. - <_> - - <_> - 6 3 3 1 -1. - <_> - 7 3 1 1 3. - <_> - - <_> - 0 2 6 8 -1. - <_> - 2 2 2 8 3. - <_> - - <_> - 11 10 3 4 -1. - <_> - 11 12 3 2 2. - <_> - - <_> - 12 6 3 8 -1. - <_> - 12 10 3 4 2. - <_> - - <_> - 6 10 3 4 -1. - <_> - 6 12 3 2 2. - <_> - - <_> - 5 6 3 8 -1. - <_> - 5 10 3 4 2. - <_> - - <_> - 2 6 18 6 -1. - <_> - 11 6 9 3 2. - <_> - 2 9 9 3 2. - <_> - - <_> - 7 14 7 3 -1. - <_> - 7 15 7 1 3. - <_> - - <_> - 0 0 2 12 -1. - <_> - 1 0 1 12 2. - <_> - - <_> - 1 2 18 16 -1. - <_> - 1 10 18 8 2. - <_> - - <_> - 9 13 5 3 -1. - <_> - 9 14 5 1 3. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 0 6 18 6 -1. - <_> - 0 6 9 3 2. - <_> - 9 9 9 3 2. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 17 4 1 3 -1. - <_> - 17 5 1 1 3. - <_> - - <_> - 12 11 1 9 -1. - <_> - 12 14 1 3 3. - <_> - - <_> - 2 4 1 3 -1. - <_> - 2 5 1 1 3. - <_> - - <_> - 5 4 2 3 -1. - <_> - 5 5 2 1 3. - <_> - - <_> - 1 2 18 3 -1. - <_> - 7 2 6 3 3. - <_> - - <_> - 0 1 20 6 -1. - <_> - 0 3 20 2 3. - <_> - - <_> - 7 5 6 3 -1. - <_> - 9 5 2 3 3. - <_> - - <_> - 13 7 6 4 -1. - <_> - 16 7 3 2 2. - <_> - 13 9 3 2 2. - <_> - - <_> - 3 1 4 10 -1. - <_> - 3 1 2 5 2. - <_> - 5 6 2 5 2. - <_> - - <_> - 0 4 19 10 -1. - <_> - 0 9 19 5 2. - <_> - - <_> - 9 8 3 12 -1. - <_> - 9 12 3 4 3. - <_> - - <_> - 11 18 5 2 -1. - <_> - 11 19 5 1 2. - <_> - - <_> - 5 16 6 4 -1. - <_> - 5 16 3 2 2. - <_> - 8 18 3 2 2. - <_> - - <_> - 5 18 3 2 -1. - <_> - 5 19 3 1 2. - <_> - - <_> - 13 11 3 2 -1. - <_> - 13 12 3 1 2. - <_> - - <_> - 8 5 8 4 -1. - <_> - 8 5 4 4 2. - <_> - - <_> - 1 2 18 6 -1. - <_> - 1 2 9 3 2. - <_> - 10 5 9 3 2. - <_> - - <_> - 3 5 14 6 -1. - <_> - 3 7 14 2 3. - <_> - - <_> - 18 1 2 6 -1. - <_> - 18 3 2 2 3. - <_> - - <_> - 9 11 6 1 -1. - <_> - 11 11 2 1 3. - <_> - - <_> - 0 2 6 11 -1. - <_> - 3 2 3 11 2. - <_> - - <_> - 4 12 2 3 -1. - <_> - 4 13 2 1 3. - <_> - - <_> - 6 12 9 2 -1. - <_> - 9 12 3 2 3. - <_> - - <_> - 9 4 6 15 -1. - <_> - 9 4 3 15 2. - <_> - - <_> - 5 11 6 1 -1. - <_> - 7 11 2 1 3. - <_> - - <_> - 5 4 6 15 -1. - <_> - 8 4 3 15 2. - <_> - - <_> - 14 12 6 7 -1. - <_> - 14 12 3 7 2. - <_> - - <_> - 18 3 2 9 -1. - <_> - 18 6 2 3 3. - <_> - - <_> - 8 1 3 1 -1. - <_> - 9 1 1 1 3. - <_> - - <_> - 0 12 6 7 -1. - <_> - 3 12 3 7 2. - <_> - - <_> - 13 7 6 4 -1. - <_> - 16 7 3 2 2. - <_> - 13 9 3 2 2. - <_> - - <_> - 8 0 10 2 -1. - <_> - 8 1 10 1 2. - <_> - - <_> - 1 7 6 4 -1. - <_> - 1 7 3 2 2. - <_> - 4 9 3 2 2. - <_> - - <_> - 1 2 3 3 -1. - <_> - 1 3 3 1 3. - <_> - - <_> - 9 13 4 3 -1. - <_> - 9 14 4 1 3. - <_> - - <_> - 12 13 7 2 -1. - <_> - 12 14 7 1 2. - <_> - - <_> - 5 12 9 2 -1. - <_> - 8 12 3 2 3. - <_> - - <_> - 6 10 4 8 -1. - <_> - 6 14 4 4 2. - <_> - - <_> - 1 0 18 4 -1. - <_> - 7 0 6 4 3. - <_> - - <_> - 12 0 5 2 -1. - <_> - 12 1 5 1 2. - <_> - - <_> - 7 7 1 12 -1. - <_> - 7 13 1 6 2. - <_> - - <_> - 6 2 3 4 -1. - <_> - 7 2 1 4 3. - <_> - - <_> - 0 13 20 6 -1. - <_> - 0 15 20 2 3. - <_> - - <_> - 8 5 12 2 -1. - <_> - 14 5 6 1 2. - <_> - 8 6 6 1 2. - <_> - - <_> - 8 14 2 3 -1. - <_> - 8 15 2 1 3. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 12 13 7 6 -1. - <_> - 12 15 7 2 3. - <_> - - <_> - 6 0 8 12 -1. - <_> - 10 0 4 6 2. - <_> - 6 6 4 6 2. - <_> - - <_> - 0 15 9 4 -1. - <_> - 0 17 9 2 2. - <_> - - <_> - 9 0 2 5 -1. - <_> - 10 0 1 5 2. - <_> - - <_> - 9 5 2 6 -1. - <_> - 9 5 1 6 2. - <_> - - <_> - 17 2 3 6 -1. - <_> - 17 4 3 2 3. - <_> - - <_> - 3 11 2 3 -1. - <_> - 3 12 2 1 3. - <_> - - <_> - 7 13 3 3 -1. - <_> - 7 14 3 1 3. - <_> - - <_> - 14 12 5 3 -1. - <_> - 14 13 5 1 3. - <_> - - <_> - 4 8 14 3 -1. - <_> - 4 9 14 1 3. - <_> - - <_> - 1 12 5 3 -1. - <_> - 1 13 5 1 3. - <_> - - <_> - 1 15 12 2 -1. - <_> - 1 15 6 1 2. - <_> - 7 16 6 1 2. - <_> - - <_> - 12 11 4 2 -1. - <_> - 12 12 4 1 2. - <_> - - <_> - 9 8 3 5 -1. - <_> - 10 8 1 5 3. - <_> - - <_> - 9 5 2 6 -1. - <_> - 10 5 1 6 2. - <_> - - <_> - 0 2 3 6 -1. - <_> - 0 4 3 2 3. - <_> - - <_> - 12 11 4 2 -1. - <_> - 12 12 4 1 2. - <_> - - <_> - 9 7 3 5 -1. - <_> - 10 7 1 5 3. - <_> - - <_> - 4 11 4 2 -1. - <_> - 4 12 4 1 2. - <_> - - <_> - 8 8 3 5 -1. - <_> - 9 8 1 5 3. - <_> - - <_> - 9 3 3 1 -1. - <_> - 10 3 1 1 3. - <_> - - <_> - 16 5 3 8 -1. - <_> - 17 5 1 8 3. - <_> - - <_> - 8 3 3 1 -1. - <_> - 9 3 1 1 3. - <_> - - <_> - 1 5 3 8 -1. - <_> - 2 5 1 8 3. - <_> - - <_> - 10 1 3 3 -1. - <_> - 11 1 1 3 3. - <_> - - <_> - 17 5 2 4 -1. - <_> - 17 5 1 4 2. - <_> - - <_> - 2 8 14 3 -1. - <_> - 2 9 14 1 3. - <_> - - <_> - 9 7 1 3 -1. - <_> - 9 8 1 1 3. - <_> - - <_> - 6 1 8 10 -1. - <_> - 6 6 8 5 2. - <_> - - <_> - 13 0 6 8 -1. - <_> - 16 0 3 4 2. - <_> - 13 4 3 4 2. - <_> - - <_> - 1 5 2 4 -1. - <_> - 2 5 1 4 2. - <_> - - <_> - 4 2 12 2 -1. - <_> - 4 3 12 1 2. - <_> - - <_> - 8 8 4 4 -1. - <_> - 8 10 4 2 2. - <_> - - <_> - 5 6 12 4 -1. - <_> - 9 6 4 4 3. - <_> - - <_> - 1 2 8 1 -1. - <_> - 5 2 4 1 2. - <_> - - <_> - 1 1 6 10 -1. - <_> - 3 1 2 10 3. - <_> - - <_> - 8 6 8 2 -1. - <_> - 8 6 4 2 2. - <_> - - <_> - 10 7 6 6 -1. - <_> - 12 7 2 6 3. - <_> - - <_> - 4 6 8 2 -1. - <_> - 8 6 4 2 2. - <_> - - <_> - 4 7 6 6 -1. - <_> - 6 7 2 6 3. - <_> - - <_> - 3 14 16 4 -1. - <_> - 3 16 16 2 2. - <_> - - <_> - 8 12 4 2 -1. - <_> - 8 13 4 1 2. - <_> - - <_> - 8 12 3 3 -1. - <_> - 8 13 3 1 3. - <_> - - <_> - 5 12 6 1 -1. - <_> - 8 12 3 1 2. - <_> - - <_> - 18 10 2 3 -1. - <_> - 18 11 2 1 3. - <_> - - <_> - 16 8 4 6 -1. - <_> - 16 10 4 2 3. - <_> - - <_> - 8 3 2 1 -1. - <_> - 9 3 1 1 2. - <_> - - <_> - 7 1 3 9 -1. - <_> - 8 1 1 9 3. - <_> - - <_> - 5 11 11 6 -1. - <_> - 5 14 11 3 2. - <_> - - <_> - 12 2 3 14 -1. - <_> - 12 9 3 7 2. - <_> - - <_> - 8 7 3 3 -1. - <_> - 9 7 1 3 3. - <_> - - <_> - 3 5 12 5 -1. - <_> - 7 5 4 5 3. - <_> - - <_> - 1 2 6 3 -1. - <_> - 4 2 3 3 2. - <_> - - <_> - 5 5 6 10 -1. - <_> - 5 5 3 5 2. - <_> - 8 10 3 5 2. - <_> - - <_> - 16 18 2 2 -1. - <_> - 16 18 1 2 2. - <_> - - <_> - 16 18 2 2 -1. - <_> - 16 18 1 2 2. - <_> - - <_> - 8 4 2 5 -1. - <_> - 9 4 1 5 2. - <_> - - <_> - 8 4 1 4 -1. - <_> - 8 6 1 2 2. - <_> - - <_> - 7 15 12 4 -1. - <_> - 13 15 6 2 2. - <_> - 7 17 6 2 2. - <_> - - <_> - 11 18 6 2 -1. - <_> - 11 19 6 1 2. - <_> - - <_> - 7 7 4 10 -1. - <_> - 7 12 4 5 2. - <_> - - <_> - 5 6 10 8 -1. - <_> - 5 10 10 4 2. - <_> - - <_> - 11 1 6 12 -1. - <_> - 14 1 3 6 2. - <_> - 11 7 3 6 2. - <_> - - <_> - 5 8 12 1 -1. - <_> - 9 8 4 1 3. - <_> - - <_> - 4 7 3 6 -1. - <_> - 4 9 3 2 3. - <_> - - <_> - 4 11 3 4 -1. - <_> - 4 13 3 2 2. - <_> - - <_> - 14 16 2 2 -1. - <_> - 14 17 2 1 2. - <_> - - <_> - 15 15 2 2 -1. - <_> - 15 16 2 1 2. - <_> - - <_> - 7 12 6 2 -1. - <_> - 7 13 6 1 2. - <_> - - <_> - 8 13 4 2 -1. - <_> - 8 14 4 1 2. - <_> - - <_> - 11 1 6 12 -1. - <_> - 14 1 3 6 2. - <_> - 11 7 3 6 2. - <_> - - <_> - 12 2 4 2 -1. - <_> - 12 3 4 1 2. - <_> - - <_> - 3 10 12 6 -1. - <_> - 3 10 6 3 2. - <_> - 9 13 6 3 2. - <_> - - <_> - 3 1 6 12 -1. - <_> - 3 1 3 6 2. - <_> - 6 7 3 6 2. - <_> - - <_> - 16 6 4 14 -1. - <_> - 18 6 2 7 2. - <_> - 16 13 2 7 2. - <_> - - <_> - 5 1 10 8 -1. - <_> - 10 1 5 4 2. - <_> - 5 5 5 4 2. - <_> - - <_> - 0 6 4 14 -1. - <_> - 0 6 2 7 2. - <_> - 2 13 2 7 2. - <_> - - <_> - 1 15 12 4 -1. - <_> - 1 15 6 2 2. - <_> - 7 17 6 2 2. - <_> - - <_> - 10 17 3 3 -1. - <_> - 11 17 1 3 3. - <_> - - <_> - 11 2 2 6 -1. - <_> - 12 2 1 3 2. - <_> - 11 5 1 3 2. - <_> - - <_> - 7 17 3 3 -1. - <_> - 8 17 1 3 3. - <_> - - <_> - 8 15 4 3 -1. - <_> - 8 16 4 1 3. - <_> - - <_> - 10 15 4 2 -1. - <_> - 12 15 2 1 2. - <_> - 10 16 2 1 2. - <_> - - <_> - 13 13 4 3 -1. - <_> - 13 14 4 1 3. - <_> - - <_> - 3 13 4 3 -1. - <_> - 3 14 4 1 3. - <_> - - <_> - 7 2 2 6 -1. - <_> - 7 2 1 3 2. - <_> - 8 5 1 3 2. - <_> - - <_> - 2 1 16 3 -1. - <_> - 2 2 16 1 3. - <_> - - <_> - 10 15 4 2 -1. - <_> - 12 15 2 1 2. - <_> - 10 16 2 1 2. - <_> - - <_> - 6 15 4 2 -1. - <_> - 6 15 2 1 2. - <_> - 8 16 2 1 2. - <_> - - <_> - 3 0 13 3 -1. - <_> - 3 1 13 1 3. - <_> - - <_> - 0 9 20 3 -1. - <_> - 0 10 20 1 3. - <_> - - <_> - 6 7 9 2 -1. - <_> - 6 8 9 1 2. - <_> - - <_> - 8 14 3 6 -1. - <_> - 9 14 1 6 3. - <_> - - <_> - 9 10 2 2 -1. - <_> - 9 11 2 1 2. - <_> - - <_> - 9 7 2 5 -1. - <_> - 9 7 1 5 2. - <_> - - <_> - 5 6 10 3 -1. - <_> - 5 6 5 3 2. - <_> - - <_> - 9 7 2 5 -1. - <_> - 10 7 1 5 2. - <_> - - <_> - 5 6 10 3 -1. - <_> - 10 6 5 3 2. - <_> - - <_> - 13 9 2 2 -1. - <_> - 13 9 1 2 2. - <_> - - <_> - 4 3 12 11 -1. - <_> - 8 3 4 11 3. - <_> - - <_> - 7 1 2 7 -1. - <_> - 8 1 1 7 2. - <_> - - <_> - 7 4 3 8 -1. - <_> - 8 4 1 8 3. - <_> - - <_> - 13 9 2 2 -1. - <_> - 13 9 1 2 2. - <_> - - <_> - 11 6 2 2 -1. - <_> - 12 6 1 1 2. - <_> - 11 7 1 1 2. - <_> - - <_> - 5 4 2 3 -1. - <_> - 5 5 2 1 3. - <_> - - <_> - 6 5 1 3 -1. - <_> - 6 6 1 1 3. - <_> - - <_> - 13 9 2 2 -1. - <_> - 13 9 1 2 2. - <_> - - <_> - 16 14 3 3 -1. - <_> - 16 15 3 1 3. - <_> - - <_> - 5 9 2 2 -1. - <_> - 6 9 1 2 2. - <_> - - <_> - 1 14 3 3 -1. - <_> - 1 15 3 1 3. - <_> - - <_> - 13 1 1 6 -1. - <_> - 13 3 1 2 3. - <_> - - <_> - 13 3 7 2 -1. - <_> - 13 4 7 1 2. - <_> - - <_> - 0 6 20 14 -1. - <_> - 0 13 20 7 2. - <_> - - <_> - 0 4 3 6 -1. - <_> - 0 6 3 2 3. - <_> - - <_> - 10 1 9 6 -1. - <_> - 10 3 9 2 3. - <_> - - <_> - 8 0 12 5 -1. - <_> - 8 0 6 5 2. - <_> - - <_> - 0 0 18 5 -1. - <_> - 6 0 6 5 3. - <_> - - <_> - 1 1 9 6 -1. - <_> - 1 3 9 2 3. - <_> - - <_> - 15 15 2 2 -1. - <_> - 15 16 2 1 2. - <_> - - <_> - 13 16 3 4 -1. - <_> - 13 18 3 2 2. - <_> - - <_> - 3 15 2 2 -1. - <_> - 3 16 2 1 2. - <_> - - <_> - 4 16 3 4 -1. - <_> - 4 18 3 2 2. - <_> - - <_> - 11 14 1 3 -1. - <_> - 11 15 1 1 3. - <_> - - <_> - 9 13 5 3 -1. - <_> - 9 14 5 1 3. - <_> - - <_> - 0 0 3 6 -1. - <_> - 0 2 3 2 3. - <_> - - <_> - 4 1 6 3 -1. - <_> - 6 1 2 3 3. - <_> - - <_> - 9 13 4 3 -1. - <_> - 9 14 4 1 3. - <_> - - <_> - 8 15 5 3 -1. - <_> - 8 16 5 1 3. - <_> - - <_> - 8 3 3 2 -1. - <_> - 9 3 1 2 3. - <_> - - <_> - 1 8 18 2 -1. - <_> - 1 9 18 1 2. - <_> - - <_> - 11 14 1 3 -1. - <_> - 11 15 1 1 3. - <_> - - <_> - 8 13 6 3 -1. - <_> - 8 14 6 1 3. - <_> - - <_> - 8 14 1 3 -1. - <_> - 8 15 1 1 3. - <_> - - <_> - 4 13 12 4 -1. - <_> - 4 13 6 2 2. - <_> - 10 15 6 2 2. - <_> - - <_> - 10 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 13 4 2 8 -1. - <_> - 14 4 1 4 2. - <_> - 13 8 1 4 2. - <_> - - <_> - 0 5 4 6 -1. - <_> - 0 7 4 2 3. - <_> - - <_> - 8 7 2 2 -1. - <_> - 9 7 1 2 2. - <_> - - <_> - 13 0 3 7 -1. - <_> - 14 0 1 7 3. - <_> - - <_> - 11 2 2 14 -1. - <_> - 11 2 1 14 2. - <_> - - <_> - 4 0 3 7 -1. - <_> - 5 0 1 7 3. - <_> - - <_> - 5 5 8 12 -1. - <_> - 5 5 4 6 2. - <_> - 9 11 4 6 2. - <_> - - <_> - 11 4 6 3 -1. - <_> - 11 5 6 1 3. - <_> - - <_> - 12 3 4 3 -1. - <_> - 12 4 4 1 3. - <_> - - <_> - 5 5 10 12 -1. - <_> - 5 5 5 6 2. - <_> - 10 11 5 6 2. - <_> - - <_> - 3 6 12 3 -1. - <_> - 9 6 6 3 2. - <_> - - <_> - 9 6 2 7 -1. - <_> - 9 6 1 7 2. - <_> - - <_> - 9 5 2 4 -1. - <_> - 9 5 1 4 2. - <_> - - <_> - 8 7 3 3 -1. - <_> - 9 7 1 3 3. - <_> - - <_> - 5 1 6 4 -1. - <_> - 7 1 2 4 3. - <_> - - <_> - 13 16 7 3 -1. - <_> - 13 17 7 1 3. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 0 16 7 3 -1. - <_> - 0 17 7 1 3. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 12 9 8 10 -1. - <_> - 12 9 4 10 2. - <_> - - <_> - 8 10 12 5 -1. - <_> - 12 10 4 5 3. - <_> - - <_> - 0 9 8 10 -1. - <_> - 4 9 4 10 2. - <_> - - <_> - 0 10 12 5 -1. - <_> - 4 10 4 5 3. - <_> - - <_> - 2 3 6 2 -1. - <_> - 5 3 3 2 2. - <_> - - <_> - 0 0 17 9 -1. - <_> - 0 3 17 3 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 10 4 6 4 -1. - <_> - 12 4 2 4 3. - <_> - - <_> - 0 10 20 4 -1. - <_> - 0 12 20 2 2. - <_> - - <_> - 4 3 6 5 -1. - <_> - 6 3 2 5 3. - <_> - - <_> - 1 1 18 4 -1. - <_> - 7 1 6 4 3. - <_> - - <_> - 13 9 2 3 -1. - <_> - 13 9 1 3 2. - <_> - - <_> - 6 15 7 4 -1. - <_> - 6 17 7 2 2. - <_> - - <_> - 3 17 4 2 -1. - <_> - 3 18 4 1 2. - <_> - - <_> - 9 4 8 10 -1. - <_> - 9 9 8 5 2. - <_> - - <_> - 9 17 3 2 -1. - <_> - 10 17 1 2 3. - <_> - - <_> - 8 2 4 8 -1. - <_> - 8 6 4 4 2. - <_> - - <_> - 3 4 14 12 -1. - <_> - 3 4 7 6 2. - <_> - 10 10 7 6 2. - <_> - - <_> - 7 7 6 4 -1. - <_> - 9 7 2 4 3. - <_> - - <_> - 6 7 9 4 -1. - <_> - 6 9 9 2 2. - <_> - - <_> - 2 10 3 3 -1. - <_> - 2 11 3 1 3. - <_> - - <_> - 4 6 2 9 -1. - <_> - 4 9 2 3 3. - <_> - - <_> - 9 11 3 3 -1. - <_> - 9 12 3 1 3. - <_> - - <_> - 3 1 15 2 -1. - <_> - 3 2 15 1 2. - <_> - - <_> - 9 8 2 3 -1. - <_> - 9 9 2 1 3. - <_> - - <_> - 9 6 2 5 -1. - <_> - 10 6 1 5 2. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 4 10 12 10 -1. - <_> - 4 15 12 5 2. - <_> - - <_> - 0 10 4 2 -1. - <_> - 0 11 4 1 2. - <_> - - <_> - 5 15 9 2 -1. - <_> - 5 16 9 1 2. - <_> - - <_> - 8 14 6 3 -1. - <_> - 8 15 6 1 3. - <_> - - <_> - 8 16 4 3 -1. - <_> - 8 17 4 1 3. - <_> - - <_> - 8 9 4 2 -1. - <_> - 8 10 4 1 2. - <_> - - <_> - 3 3 14 2 -1. - <_> - 3 4 14 1 2. - <_> - - <_> - 11 12 1 2 -1. - <_> - 11 13 1 1 2. - <_> - - <_> - 4 12 12 1 -1. - <_> - 8 12 4 1 3. - <_> - - <_> - 0 2 1 2 -1. - <_> - 0 3 1 1 2. - <_> - - <_> - 7 4 4 6 -1. - <_> - 9 4 2 6 2. - <_> - - <_> - 0 2 20 14 -1. - <_> - 10 2 10 7 2. - <_> - 0 9 10 7 2. - <_> - - <_> - 14 6 1 3 -1. - <_> - 14 7 1 1 3. - <_> - - <_> - 0 4 20 12 -1. - <_> - 0 4 10 6 2. - <_> - 10 10 10 6 2. - <_> - - <_> - 8 12 1 2 -1. - <_> - 8 13 1 1 2. - <_> - - <_> - 9 18 3 2 -1. - <_> - 10 18 1 2 3. - <_> - - <_> - 9 17 6 2 -1. - <_> - 11 17 2 2 3. - <_> - - <_> - 5 6 2 3 -1. - <_> - 5 7 2 1 3. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 14 15 3 2 -1. - <_> - 14 16 3 1 2. - <_> - - <_> - 11 3 3 4 -1. - <_> - 12 3 1 4 3. - <_> - - <_> - 3 15 3 2 -1. - <_> - 3 16 3 1 2. - <_> - - <_> - 9 12 2 3 -1. - <_> - 9 13 2 1 3. - <_> - - <_> - 9 13 3 7 -1. - <_> - 10 13 1 7 3. - <_> - - <_> - 12 12 5 3 -1. - <_> - 12 13 5 1 3. - <_> - - <_> - 8 18 3 2 -1. - <_> - 9 18 1 2 3. - <_> - - <_> - 4 7 12 4 -1. - <_> - 4 7 6 2 2. - <_> - 10 9 6 2 2. - <_> - - <_> - 6 19 14 1 -1. - <_> - 6 19 7 1 2. - <_> - - <_> - 16 14 3 2 -1. - <_> - 16 15 3 1 2. - <_> - - <_> - 1 0 6 10 -1. - <_> - 1 0 3 5 2. - <_> - 4 5 3 5 2. - <_> - - <_> - 1 0 4 10 -1. - <_> - 1 0 2 5 2. - <_> - 3 5 2 5 2. - <_> - - <_> - 15 3 5 6 -1. - <_> - 15 5 5 2 3. - <_> - - <_> - 9 5 2 15 -1. - <_> - 9 10 2 5 3. - <_> - - <_> - 0 3 5 6 -1. - <_> - 0 5 5 2 3. - <_> - - <_> - 6 0 3 2 -1. - <_> - 7 0 1 2 3. - <_> - - <_> - 12 8 8 2 -1. - <_> - 16 8 4 1 2. - <_> - 12 9 4 1 2. - <_> - - <_> - 5 8 12 1 -1. - <_> - 9 8 4 1 3. - <_> - - <_> - 3 13 3 3 -1. - <_> - 3 14 3 1 3. - <_> - - <_> - 5 13 3 2 -1. - <_> - 5 14 3 1 2. - <_> - - <_> - 9 15 3 3 -1. - <_> - 9 16 3 1 3. - <_> - - <_> - 7 15 7 3 -1. - <_> - 7 16 7 1 3. - <_> - - <_> - 3 14 11 6 -1. - <_> - 3 16 11 2 3. - <_> - - <_> - 0 19 14 1 -1. - <_> - 7 19 7 1 2. - <_> - - <_> - 9 17 6 2 -1. - <_> - 11 17 2 2 3. - <_> - - <_> - 12 11 6 2 -1. - <_> - 14 11 2 2 3. - <_> - - <_> - 5 17 6 2 -1. - <_> - 7 17 2 2 3. - <_> - - <_> - 0 1 9 10 -1. - <_> - 3 1 3 10 3. - <_> - - <_> - 10 1 3 3 -1. - <_> - 11 1 1 3 3. - <_> - - <_> - 9 5 6 4 -1. - <_> - 9 5 3 4 2. - <_> - - <_> - 7 1 3 3 -1. - <_> - 8 1 1 3 3. - <_> - - <_> - 0 4 4 11 -1. - <_> - 2 4 2 11 2. - <_> - - <_> - 9 5 6 4 -1. - <_> - 9 5 3 4 2. - <_> - - <_> - 6 0 8 10 -1. - <_> - 10 0 4 5 2. - <_> - 6 5 4 5 2. - <_> - - <_> - 6 6 5 14 -1. - <_> - 6 13 5 7 2. - <_> - - <_> - 8 5 4 14 -1. - <_> - 8 12 4 7 2. - <_> - - <_> - 7 7 6 5 -1. - <_> - 9 7 2 5 3. - <_> - - <_> - 9 3 3 9 -1. - <_> - 9 6 3 3 3. - <_> - - <_> - 8 1 3 3 -1. - <_> - 9 1 1 3 3. - <_> - - <_> - 9 6 2 4 -1. - <_> - 10 6 1 4 2. - <_> - - <_> - 10 8 6 9 -1. - <_> - 10 8 3 9 2. - <_> - - <_> - 16 4 3 8 -1. - <_> - 17 4 1 8 3. - <_> - - <_> - 5 9 10 6 -1. - <_> - 5 9 5 3 2. - <_> - 10 12 5 3 2. - <_> - - <_> - 5 5 6 4 -1. - <_> - 8 5 3 4 2. - <_> - - <_> - 9 8 4 2 -1. - <_> - 9 9 4 1 2. - <_> - - <_> - 11 7 2 2 -1. - <_> - 11 7 1 2 2. - <_> - - <_> - 8 12 4 8 -1. - <_> - 8 12 2 4 2. - <_> - 10 16 2 4 2. - <_> - - <_> - 0 1 4 9 -1. - <_> - 0 4 4 3 3. - <_> - - <_> - 9 10 3 3 -1. - <_> - 9 11 3 1 3. - <_> - - <_> - 8 11 4 2 -1. - <_> - 8 12 4 1 2. - <_> - - <_> - 7 8 4 2 -1. - <_> - 7 9 4 1 2. - <_> - - <_> - 7 8 6 1 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 16 0 4 9 -1. - <_> - 16 0 2 9 2. - <_> - - <_> - 16 0 3 6 -1. - <_> - 16 3 3 3 2. - <_> - - <_> - 0 0 4 9 -1. - <_> - 2 0 2 9 2. - <_> - - <_> - 1 0 3 6 -1. - <_> - 1 3 3 3 2. - <_> - - <_> - 9 7 6 9 -1. - <_> - 11 7 2 9 3. - <_> - - <_> - 10 6 3 6 -1. - <_> - 11 6 1 6 3. - <_> - - <_> - 1 2 18 2 -1. - <_> - 1 2 9 1 2. - <_> - 10 3 9 1 2. - <_> - - <_> - 5 8 6 8 -1. - <_> - 7 8 2 8 3. - <_> - - <_> - 9 0 6 16 -1. - <_> - 11 0 2 16 3. - <_> - - <_> - 14 1 6 18 -1. - <_> - 17 1 3 9 2. - <_> - 14 10 3 9 2. - <_> - - <_> - 2 9 2 3 -1. - <_> - 2 10 2 1 3. - <_> - - <_> - 0 1 6 18 -1. - <_> - 0 1 3 9 2. - <_> - 3 10 3 9 2. - <_> - - <_> - 11 8 4 12 -1. - <_> - 11 8 2 12 2. - <_> - - <_> - 2 1 18 18 -1. - <_> - 2 10 18 9 2. - <_> - - <_> - 6 3 3 1 -1. - <_> - 7 3 1 1 3. - <_> - - <_> - 4 12 2 2 -1. - <_> - 4 13 2 1 2. - <_> - - <_> - 8 13 5 3 -1. - <_> - 8 14 5 1 3. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 3 12 5 3 -1. - <_> - 3 13 5 1 3. - <_> - - <_> - 6 3 3 4 -1. - <_> - 7 3 1 4 3. - <_> - - <_> - 11 10 2 2 -1. - <_> - 12 10 1 1 2. - <_> - 11 11 1 1 2. - <_> - - <_> - 5 8 12 1 -1. - <_> - 9 8 4 1 3. - <_> - - <_> - 8 4 4 8 -1. - <_> - 10 4 2 8 2. - <_> - - <_> - 6 6 8 5 -1. - <_> - 10 6 4 5 2. - <_> - - <_> - 10 4 6 4 -1. - <_> - 12 4 2 4 3. - <_> - - <_> - 12 7 2 2 -1. - <_> - 13 7 1 1 2. - <_> - 12 8 1 1 2. - <_> - - <_> - 3 5 10 8 -1. - <_> - 3 9 10 4 2. - <_> - - <_> - 7 1 2 12 -1. - <_> - 7 7 2 6 2. - <_> - - <_> - 12 7 2 2 -1. - <_> - 13 7 1 1 2. - <_> - 12 8 1 1 2. - <_> - - <_> - 11 13 1 6 -1. - <_> - 11 16 1 3 2. - <_> - - <_> - 5 1 6 15 -1. - <_> - 7 1 2 15 3. - <_> - - <_> - 6 7 2 2 -1. - <_> - 6 7 1 1 2. - <_> - 7 8 1 1 2. - <_> - - <_> - 17 5 2 2 -1. - <_> - 17 6 2 1 2. - <_> - - <_> - 10 3 4 10 -1. - <_> - 12 3 2 5 2. - <_> - 10 8 2 5 2. - <_> - - <_> - 1 5 2 2 -1. - <_> - 1 6 2 1 2. - <_> - - <_> - 7 10 2 2 -1. - <_> - 7 10 1 1 2. - <_> - 8 11 1 1 2. - <_> - - <_> - 3 12 14 4 -1. - <_> - 10 12 7 2 2. - <_> - 3 14 7 2 2. - <_> - - <_> - 9 15 3 2 -1. - <_> - 9 16 3 1 2. - <_> - - <_> - 1 13 3 3 -1. - <_> - 1 14 3 1 3. - <_> - - <_> - 0 3 1 2 -1. - <_> - 0 4 1 1 2. - <_> - - <_> - 7 7 6 1 -1. - <_> - 9 7 2 1 3. - <_> - - <_> - 0 4 16 6 -1. - <_> - 0 6 16 2 3. - <_> - - <_> - 9 3 2 14 -1. - <_> - 9 10 2 7 2. - <_> - - <_> - 12 0 4 3 -1. - <_> - 12 0 2 3 2. - <_> - - <_> - 4 18 12 2 -1. - <_> - 8 18 4 2 3. - <_> - - <_> - 4 10 12 4 -1. - <_> - 8 10 4 4 3. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 14 1 2 8 -1. - <_> - 15 1 1 4 2. - <_> - 14 5 1 4 2. - <_> - - <_> - 3 4 9 1 -1. - <_> - 6 4 3 1 3. - <_> - - <_> - 3 3 4 2 -1. - <_> - 3 4 4 1 2. - <_> - - <_> - 11 15 2 4 -1. - <_> - 11 17 2 2 2. - <_> - - <_> - 14 13 2 6 -1. - <_> - 14 15 2 2 3. - <_> - - <_> - 6 6 1 6 -1. - <_> - 6 9 1 3 2. - <_> - - <_> - 6 10 8 8 -1. - <_> - 6 14 8 4 2. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 10 11 4 8 -1. - <_> - 10 15 4 4 2. - <_> - - <_> - 5 11 6 1 -1. - <_> - 7 11 2 1 3. - <_> - - <_> - 5 4 6 10 -1. - <_> - 8 4 3 10 2. - <_> - - <_> - 14 2 6 3 -1. - <_> - 14 3 6 1 3. - <_> - - <_> - 9 12 3 2 -1. - <_> - 9 13 3 1 2. - <_> - - <_> - 8 1 4 6 -1. - <_> - 8 3 4 2 3. - <_> - - <_> - 3 5 13 8 -1. - <_> - 3 9 13 4 2. - <_> - - <_> - 12 5 5 3 -1. - <_> - 12 6 5 1 3. - <_> - - <_> - 5 14 15 6 -1. - <_> - 5 16 15 2 3. - <_> - - <_> - 3 5 5 3 -1. - <_> - 3 6 5 1 3. - <_> - - <_> - 9 14 2 6 -1. - <_> - 9 14 1 3 2. - <_> - 10 17 1 3 2. - <_> - - <_> - 9 12 3 2 -1. - <_> - 9 13 3 1 2. - <_> - - <_> - 9 13 3 2 -1. - <_> - 9 14 3 1 2. - <_> - - <_> - 0 2 6 3 -1. - <_> - 0 3 6 1 3. - <_> - - <_> - 0 1 9 11 -1. - <_> - 3 1 3 11 3. - <_> - - <_> - 8 13 4 6 -1. - <_> - 10 13 2 3 2. - <_> - 8 16 2 3 2. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 3 12 14 4 -1. - <_> - 3 12 7 2 2. - <_> - 10 14 7 2 2. - <_> - - <_> - 7 14 1 4 -1. - <_> - 7 16 1 2 2. - <_> - - <_> - 8 13 4 6 -1. - <_> - 10 13 2 3 2. - <_> - 8 16 2 3 2. - <_> - - <_> - 10 14 1 3 -1. - <_> - 10 15 1 1 3. - <_> - - <_> - 8 13 4 6 -1. - <_> - 8 13 2 3 2. - <_> - 10 16 2 3 2. - <_> - - <_> - 9 14 1 3 -1. - <_> - 9 15 1 1 3. - <_> - - <_> - 10 15 2 3 -1. - <_> - 10 16 2 1 3. - <_> - - <_> - 11 16 1 2 -1. - <_> - 11 17 1 1 2. - <_> - - <_> - 9 0 2 2 -1. - <_> - 9 1 2 1 2. - <_> - - <_> - 0 1 5 8 -1. - <_> - 0 5 5 4 2. - <_> - - <_> - 10 14 2 3 -1. - <_> - 10 15 2 1 3. - <_> - - <_> - 10 13 2 3 -1. - <_> - 10 14 2 1 3. - <_> - - <_> - 0 3 16 6 -1. - <_> - 0 6 16 3 2. - <_> - - <_> - 4 1 2 2 -1. - <_> - 5 1 1 2 2. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 10 8 2 12 -1. - <_> - 10 12 2 4 3. - <_> - - <_> - 9 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 5 0 6 8 -1. - <_> - 7 0 2 8 3. - <_> - - <_> - 9 7 3 6 -1. - <_> - 10 7 1 6 3. - <_> - - <_> - 8 12 10 8 -1. - <_> - 8 16 10 4 2. - <_> - - <_> - 8 7 3 6 -1. - <_> - 9 7 1 6 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 10 7 6 2 2. - <_> - - <_> - 8 6 8 3 -1. - <_> - 8 6 4 3 2. - <_> - - <_> - 16 15 3 3 -1. - <_> - 16 16 3 1 3. - <_> - - <_> - 4 6 12 3 -1. - <_> - 10 6 6 3 2. - <_> - - <_> - 7 8 3 5 -1. - <_> - 8 8 1 5 3. - <_> - - <_> - 0 10 20 2 -1. - <_> - 10 10 10 1 2. - <_> - 0 11 10 1 2. - <_> - - <_> - 11 16 9 4 -1. - <_> - 14 16 3 4 3. - <_> - - <_> - 0 5 3 4 -1. - <_> - 1 5 1 4 3. - <_> - - <_> - 8 15 4 2 -1. - <_> - 8 15 2 1 2. - <_> - 10 16 2 1 2. - <_> - - <_> - 1 8 19 3 -1. - <_> - 1 9 19 1 3. - <_> - - <_> - 15 16 3 3 -1. - <_> - 15 17 3 1 3. - <_> - - <_> - 0 4 20 10 -1. - <_> - 0 4 10 5 2. - <_> - 10 9 10 5 2. - <_> - - <_> - 2 14 7 6 -1. - <_> - 2 16 7 2 3. - <_> - - <_> - 8 6 6 6 -1. - <_> - 10 6 2 6 3. - <_> - - <_> - 16 4 4 6 -1. - <_> - 16 6 4 2 3. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 7 13 4 3 -1. - <_> - 7 14 4 1 3. - <_> - - <_> - 13 13 6 2 -1. - <_> - 13 14 6 1 2. - <_> - - <_> - 14 12 2 3 -1. - <_> - 14 13 2 1 3. - <_> - - <_> - 1 13 6 2 -1. - <_> - 1 14 6 1 2. - <_> - - <_> - 4 12 2 3 -1. - <_> - 4 13 2 1 3. - <_> - - <_> - 17 4 3 5 -1. - <_> - 18 4 1 5 3. - <_> - - <_> - 5 5 14 8 -1. - <_> - 12 5 7 4 2. - <_> - 5 9 7 4 2. - <_> - - <_> - 6 8 6 5 -1. - <_> - 8 8 2 5 3. - <_> - - <_> - 0 4 4 6 -1. - <_> - 0 6 4 2 3. - <_> - - <_> - 9 1 3 6 -1. - <_> - 10 1 1 6 3. - <_> - - <_> - 10 4 6 3 -1. - <_> - 10 5 6 1 3. - <_> - - <_> - 8 1 3 6 -1. - <_> - 9 1 1 6 3. - <_> - - <_> - 4 4 6 3 -1. - <_> - 4 5 6 1 3. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 12 11 4 2 -1. - <_> - 12 12 4 1 2. - <_> - - <_> - 0 2 20 6 -1. - <_> - 0 2 10 3 2. - <_> - 10 5 10 3 2. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 2 10 16 4 -1. - <_> - 10 10 8 2 2. - <_> - 2 12 8 2 2. - <_> - - <_> - 3 10 16 6 -1. - <_> - 11 10 8 3 2. - <_> - 3 13 8 3 2. - <_> - - <_> - 1 10 16 6 -1. - <_> - 1 10 8 3 2. - <_> - 9 13 8 3 2. - <_> - - <_> - 4 7 2 4 -1. - <_> - 5 7 1 4 2. - <_> - - <_> - 11 16 9 4 -1. - <_> - 14 16 3 4 3. - <_> - - <_> - 3 16 14 4 -1. - <_> - 10 16 7 2 2. - <_> - 3 18 7 2 2. - <_> - - <_> - 0 16 9 4 -1. - <_> - 3 16 3 4 3. - <_> - - <_> - 1 14 6 6 -1. - <_> - 1 14 3 3 2. - <_> - 4 17 3 3 2. - <_> - - <_> - 9 0 2 1 -1. - <_> - 9 0 1 1 2. - <_> - - <_> - 6 7 8 10 -1. - <_> - 10 7 4 5 2. - <_> - 6 12 4 5 2. - <_> - - <_> - 2 15 1 2 -1. - <_> - 2 16 1 1 2. - <_> - - <_> - 0 14 7 6 -1. - <_> - 0 16 7 2 3. - <_> - - <_> - 7 8 6 2 -1. - <_> - 7 9 6 1 2. - <_> - - <_> - 9 2 2 15 -1. - <_> - 9 7 2 5 3. - <_> - - <_> - 5 6 2 2 -1. - <_> - 5 7 2 1 2. - <_> - - <_> - 6 6 8 3 -1. - <_> - 6 7 8 1 3. - <_> - - <_> - 12 13 5 6 -1. - <_> - 12 15 5 2 3. - <_> - - <_> - 0 0 20 18 -1. - <_> - 0 9 20 9 2. - <_> - - <_> - 5 1 6 6 -1. - <_> - 7 1 2 6 3. - <_> - - <_> - 5 1 4 9 -1. - <_> - 7 1 2 9 2. - <_> - - <_> - 1 19 18 1 -1. - <_> - 7 19 6 1 3. - <_> - - <_> - 14 16 5 2 -1. - <_> - 14 17 5 1 2. - <_> - - <_> - 0 5 15 10 -1. - <_> - 0 10 15 5 2. - <_> - - <_> - 7 15 4 2 -1. - <_> - 7 15 2 1 2. - <_> - 9 16 2 1 2. - <_> - - <_> - 14 11 2 2 -1. - <_> - 14 12 2 1 2. - <_> - - <_> - 9 8 3 3 -1. - <_> - 9 9 3 1 3. - <_> - - <_> - 4 11 2 2 -1. - <_> - 4 12 2 1 2. - <_> - - <_> - 8 8 3 3 -1. - <_> - 8 9 3 1 3. - <_> - - <_> - 9 10 2 3 -1. - <_> - 9 11 2 1 3. - <_> - - <_> - 8 8 4 3 -1. - <_> - 8 9 4 1 3. - <_> - - <_> - 1 9 4 10 -1. - <_> - 1 9 2 5 2. - <_> - 3 14 2 5 2. - <_> - - <_> - 0 12 6 8 -1. - <_> - 2 12 2 8 3. - <_> - - <_> - 9 1 4 2 -1. - <_> - 11 1 2 1 2. - <_> - 9 2 2 1 2. - <_> - - <_> - 12 13 7 6 -1. - <_> - 12 15 7 2 3. - <_> - - <_> - 7 0 2 3 -1. - <_> - 7 1 2 1 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 9 14 2 3 3. - <_> - - <_> - 9 6 6 4 -1. - <_> - 11 6 2 4 3. - <_> - - <_> - 8 10 8 3 -1. - <_> - 8 10 4 3 2. - <_> - - <_> - 6 10 4 3 -1. - <_> - 8 10 2 3 2. - <_> - - <_> - 6 8 3 5 -1. - <_> - 7 8 1 5 3. - <_> - - <_> - 0 4 8 1 -1. - <_> - 4 4 4 1 2. - <_> - - <_> - 8 2 2 6 -1. - <_> - 8 2 1 3 2. - <_> - 9 5 1 3 2. - <_> - - <_> - 0 7 20 6 -1. - <_> - 0 9 20 2 3. - <_> - - <_> - 12 10 3 6 -1. - <_> - 12 13 3 3 2. - <_> - - <_> - 8 15 1 4 -1. - <_> - 8 17 1 2 2. - <_> - - <_> - 5 16 2 4 -1. - <_> - 5 18 2 2 2. - <_> - - <_> - 6 2 8 12 -1. - <_> - 6 6 8 4 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 7 0 6 1 -1. - <_> - 9 0 2 1 3. - <_> - - <_> - 8 11 3 3 -1. - <_> - 8 12 3 1 3. - <_> - - <_> - 12 11 3 6 -1. - <_> - 12 14 3 3 2. - <_> - - <_> - 11 2 6 10 -1. - <_> - 14 2 3 5 2. - <_> - 11 7 3 5 2. - <_> - - <_> - 5 7 10 12 -1. - <_> - 5 7 5 6 2. - <_> - 10 13 5 6 2. - <_> - - <_> - 4 4 2 10 -1. - <_> - 4 9 2 5 2. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 7 1 3 2. - <_> - - <_> - 11 9 6 2 -1. - <_> - 11 9 3 2 2. - <_> - - <_> - 4 7 2 2 -1. - <_> - 5 7 1 2 2. - <_> - - <_> - 0 2 4 6 -1. - <_> - 0 4 4 2 3. - <_> - - <_> - 10 7 3 4 -1. - <_> - 11 7 1 4 3. - <_> - - <_> - 9 7 3 5 -1. - <_> - 10 7 1 5 3. - <_> - - <_> - 9 1 1 3 -1. - <_> - 9 2 1 1 3. - <_> - - <_> - 0 6 16 6 -1. - <_> - 0 6 8 3 2. - <_> - 8 9 8 3 2. - <_> - - <_> - 10 15 3 3 -1. - <_> - 10 16 3 1 3. - <_> - - <_> - 9 14 4 3 -1. - <_> - 9 15 4 1 3. - <_> - - <_> - 3 2 6 10 -1. - <_> - 3 2 3 5 2. - <_> - 6 7 3 5 2. - <_> - - <_> - 3 0 14 2 -1. - <_> - 3 1 14 1 2. - <_> - - <_> - 9 14 3 3 -1. - <_> - 9 15 3 1 3. - <_> - - <_> - 10 15 3 3 -1. - <_> - 10 16 3 1 3. - <_> - - <_> - 9 13 2 6 -1. - <_> - 9 16 2 3 2. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 12 11 3 6 -1. - <_> - 12 14 3 3 2. - <_> - - <_> - 8 12 5 2 -1. - <_> - 8 13 5 1 2. - <_> - - <_> - 5 11 3 6 -1. - <_> - 5 14 3 3 2. - <_> - - <_> - 8 12 3 2 -1. - <_> - 8 13 3 1 2. - <_> - - <_> - 11 13 7 6 -1. - <_> - 11 15 7 2 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 3 13 14 4 -1. - <_> - 3 13 7 2 2. - <_> - 10 15 7 2 2. - <_> - - <_> - 8 14 4 6 -1. - <_> - 8 14 2 3 2. - <_> - 10 17 2 3 2. - <_> - - <_> - 8 15 4 3 -1. - <_> - 8 16 4 1 3. - <_> - - <_> - 7 16 6 2 -1. - <_> - 9 16 2 2 3. - <_> - - <_> - 7 7 6 2 -1. - <_> - 7 8 6 1 2. - <_> - - <_> - 3 9 13 3 -1. - <_> - 3 10 13 1 3. - <_> - - <_> - 9 8 3 4 -1. - <_> - 9 10 3 2 2. - <_> - - <_> - 8 10 4 3 -1. - <_> - 8 11 4 1 3. - <_> - - <_> - 7 7 3 4 -1. - <_> - 8 7 1 4 3. - <_> - - <_> - 8 7 3 5 -1. - <_> - 9 7 1 5 3. - <_> - - <_> - 12 3 3 4 -1. - <_> - 13 3 1 4 3. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 7 1 3 2. - <_> - - <_> - 5 3 3 4 -1. - <_> - 6 3 1 4 3. - <_> - - <_> - 3 7 12 1 -1. - <_> - 7 7 4 1 3. - <_> - - <_> - 12 5 3 3 -1. - <_> - 12 6 3 1 3. - <_> - - <_> - 11 2 6 2 -1. - <_> - 11 3 6 1 2. - <_> - - <_> - 3 2 14 2 -1. - <_> - 3 2 7 1 2. - <_> - 10 3 7 1 2. - <_> - - <_> - 6 1 7 14 -1. - <_> - 6 8 7 7 2. - <_> - - <_> - 8 0 12 5 -1. - <_> - 8 0 6 5 2. - <_> - - <_> - 1 9 18 1 -1. - <_> - 7 9 6 1 3. - <_> - - <_> - 0 0 10 5 -1. - <_> - 5 0 5 5 2. - <_> - - <_> - 2 5 8 15 -1. - <_> - 2 10 8 5 3. - <_> - - <_> - 12 5 3 3 -1. - <_> - 12 6 3 1 3. - <_> - - <_> - 13 4 2 3 -1. - <_> - 13 5 2 1 3. - <_> - - <_> - 2 15 4 3 -1. - <_> - 2 16 4 1 3. - <_> - - <_> - 5 6 10 3 -1. - <_> - 10 6 5 3 2. - <_> - - <_> - 11 6 2 2 -1. - <_> - 12 6 1 1 2. - <_> - 11 7 1 1 2. - <_> - - <_> - 12 4 4 3 -1. - <_> - 12 5 4 1 3. - <_> - - <_> - 7 6 2 2 -1. - <_> - 7 6 1 1 2. - <_> - 8 7 1 1 2. - <_> - - <_> - 4 4 4 3 -1. - <_> - 4 5 4 1 3. - <_> - - <_> - 11 4 3 3 -1. - <_> - 12 4 1 3 3. - <_> - - <_> - 9 3 2 1 -1. - <_> - 9 3 1 1 2. - <_> - - <_> - 4 5 5 3 -1. - <_> - 4 6 5 1 3. - <_> - - <_> - 4 6 4 3 -1. - <_> - 4 7 4 1 3. - <_> - - <_> - 11 4 3 3 -1. - <_> - 12 4 1 3 3. - <_> - - <_> - 8 8 4 3 -1. - <_> - 8 9 4 1 3. - <_> - - <_> - 6 4 3 3 -1. - <_> - 7 4 1 3 3. - <_> - - <_> - 4 14 1 3 -1. - <_> - 4 15 1 1 3. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 7 1 3 2. - <_> - - <_> - 17 0 3 2 -1. - <_> - 17 1 3 1 2. - <_> - - <_> - 8 10 2 9 -1. - <_> - 8 13 2 3 3. - <_> - - <_> - 0 8 18 2 -1. - <_> - 0 9 18 1 2. - <_> - - <_> - 9 15 2 3 -1. - <_> - 9 16 2 1 3. - <_> - - <_> - 8 7 4 3 -1. - <_> - 8 8 4 1 3. - <_> - - <_> - 1 14 6 6 -1. - <_> - 1 14 3 3 2. - <_> - 4 17 3 3 2. - <_> - - <_> - 0 18 6 2 -1. - <_> - 0 19 6 1 2. - <_> - - <_> - 12 9 4 3 -1. - <_> - 12 9 2 3 2. - <_> - - <_> - 9 8 3 8 -1. - <_> - 10 8 1 8 3. - <_> - - <_> - 4 9 4 3 -1. - <_> - 6 9 2 3 2. - <_> - - <_> - 4 18 6 1 -1. - <_> - 6 18 2 1 3. - <_> - - <_> - 9 7 3 2 -1. - <_> - 10 7 1 2 3. - <_> - - <_> - 6 7 8 12 -1. - <_> - 10 7 4 6 2. - <_> - 6 13 4 6 2. - <_> - - <_> - 8 7 3 2 -1. - <_> - 9 7 1 2 3. - <_> - - <_> - 8 7 3 6 -1. - <_> - 9 7 1 6 3. - <_> - - <_> - 3 16 14 4 -1. - <_> - 10 16 7 2 2. - <_> - 3 18 7 2 2. - <_> - - <_> - 1 14 18 4 -1. - <_> - 10 14 9 2 2. - <_> - 1 16 9 2 2. - <_> - - <_> - 8 7 3 3 -1. - <_> - 8 8 3 1 3. - <_> - - <_> - 0 4 20 12 -1. - <_> - 0 4 10 6 2. - <_> - 10 10 10 6 2. - <_> - - <_> - 5 5 10 12 -1. - <_> - 10 5 5 6 2. - <_> - 5 11 5 6 2. - <_> - - <_> - 10 2 4 7 -1. - <_> - 10 2 2 7 2. - <_> - - <_> - 8 11 4 3 -1. - <_> - 8 12 4 1 3. - <_> - - <_> - 8 12 3 3 -1. - <_> - 8 13 3 1 3. - <_> - - <_> - 13 13 5 6 -1. - <_> - 13 15 5 2 3. - <_> - - <_> - 7 0 6 6 -1. - <_> - 9 0 2 6 3. - <_> - - <_> - 2 13 5 6 -1. - <_> - 2 15 5 2 3. - <_> - - <_> - 0 4 2 12 -1. - <_> - 0 4 1 6 2. - <_> - 1 10 1 6 2. - <_> - - <_> - 9 19 3 1 -1. - <_> - 10 19 1 1 3. - <_> - - <_> - 18 0 2 6 -1. - <_> - 18 2 2 2 3. - <_> - - <_> - 0 3 1 6 -1. - <_> - 0 5 1 2 3. - <_> - - <_> - 0 0 3 6 -1. - <_> - 0 2 3 2 3. - <_> - - <_> - 17 2 3 7 -1. - <_> - 18 2 1 7 3. - <_> - - <_> - 10 3 4 7 -1. - <_> - 10 3 2 7 2. - <_> - - <_> - 0 2 3 7 -1. - <_> - 1 2 1 7 3. - <_> - - <_> - 6 2 4 8 -1. - <_> - 8 2 2 8 2. - <_> - - <_> - 13 0 1 4 -1. - <_> - 13 2 1 2 2. - <_> - - <_> - 5 1 12 5 -1. - <_> - 9 1 4 5 3. - <_> - - <_> - 6 0 1 4 -1. - <_> - 6 2 1 2 2. - <_> - - <_> - 3 1 12 5 -1. - <_> - 7 1 4 5 3. - <_> - - <_> - 9 12 3 8 -1. - <_> - 10 12 1 8 3. - <_> - - <_> - 7 13 6 1 -1. - <_> - 9 13 2 1 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 5 16 7 3 -1. - <_> - 5 17 7 1 3. - <_> - - <_> - 0 12 20 6 -1. - <_> - 0 14 20 2 3. - <_> - - <_> - 4 18 14 2 -1. - <_> - 4 19 14 1 2. - <_> - - <_> - 8 12 3 8 -1. - <_> - 9 12 1 8 3. - <_> - - <_> - 7 13 3 3 -1. - <_> - 7 14 3 1 3. - <_> - - <_> - 5 5 12 10 -1. - <_> - 11 5 6 5 2. - <_> - 5 10 6 5 2. - <_> - - <_> - 8 1 5 10 -1. - <_> - 8 6 5 5 2. - <_> - - <_> - 5 4 9 12 -1. - <_> - 5 10 9 6 2. - <_> - - <_> - 7 13 6 6 -1. - <_> - 7 15 6 2 3. - <_> - - <_> - 8 4 5 16 -1. - <_> - 8 12 5 8 2. - <_> - - <_> - 8 12 4 6 -1. - <_> - 8 15 4 3 2. - <_> - - <_> - 7 13 2 2 -1. - <_> - 7 13 1 1 2. - <_> - 8 14 1 1 2. - <_> - - <_> - 7 12 2 2 -1. - <_> - 7 12 1 1 2. - <_> - 8 13 1 1 2. - <_> - - <_> - 18 0 2 14 -1. - <_> - 18 0 1 14 2. - <_> - - <_> - 12 11 7 2 -1. - <_> - 12 12 7 1 2. - <_> - - <_> - 1 18 1 2 -1. - <_> - 1 19 1 1 2. - <_> - - <_> - 2 18 1 2 -1. - <_> - 2 19 1 1 2. - <_> - - <_> - 9 7 2 1 -1. - <_> - 9 7 1 1 2. - <_> - - <_> - 9 6 2 3 -1. - <_> - 9 6 1 3 2. - <_> - - <_> - 3 1 2 2 -1. - <_> - 4 1 1 2 2. - <_> - - <_> - 3 0 3 2 -1. - <_> - 3 1 3 1 2. - <_> - - <_> - 12 10 3 4 -1. - <_> - 12 12 3 2 2. - <_> - - <_> - 7 7 8 2 -1. - <_> - 7 8 8 1 2. - <_> - - <_> - 8 8 3 4 -1. - <_> - 8 10 3 2 2. - <_> - - <_> - 7 12 6 3 -1. - <_> - 7 13 6 1 3. - <_> - - <_> - 0 2 10 3 -1. - <_> - 5 2 5 3 2. - <_> - - <_> - 0 1 20 6 -1. - <_> - 0 3 20 2 3. - <_> - - <_> - 7 6 6 3 -1. - <_> - 9 6 2 3 3. - <_> - - <_> - 3 7 14 4 -1. - <_> - 3 9 14 2 2. - <_> - - <_> - 5 7 3 6 -1. - <_> - 5 9 3 2 3. - <_> - - <_> - 8 8 3 12 -1. - <_> - 8 12 3 4 3. - <_> - - <_> - 9 17 6 2 -1. - <_> - 12 17 3 1 2. - <_> - 9 18 3 1 2. - <_> - - <_> - 10 17 4 3 -1. - <_> - 10 18 4 1 3. - <_> - - <_> - 4 2 4 2 -1. - <_> - 4 3 4 1 2. - <_> - - <_> - 7 3 6 14 -1. - <_> - 9 3 2 14 3. - <_> - - <_> - 15 13 1 6 -1. - <_> - 15 16 1 3 2. - <_> - - <_> - 13 14 2 6 -1. - <_> - 13 16 2 2 3. - <_> - - <_> - 4 11 5 6 -1. - <_> - 4 14 5 3 2. - <_> - - <_> - 4 17 4 2 -1. - <_> - 6 17 2 2 2. - <_> - - <_> - 0 6 20 2 -1. - <_> - 0 6 10 2 2. - <_> - - <_> - 6 5 10 12 -1. - <_> - 11 5 5 6 2. - <_> - 6 11 5 6 2. - <_> - - <_> - 4 0 2 12 -1. - <_> - 4 0 1 6 2. - <_> - 5 6 1 6 2. - <_> - - <_> - 4 1 6 2 -1. - <_> - 6 1 2 2 3. - <_> - - <_> - 13 7 2 1 -1. - <_> - 13 7 1 1 2. - <_> - - <_> - 5 5 15 6 -1. - <_> - 5 7 15 2 3. - <_> - - <_> - 1 10 18 2 -1. - <_> - 1 10 9 1 2. - <_> - 10 11 9 1 2. - <_> - - <_> - 1 6 15 7 -1. - <_> - 6 6 5 7 3. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 9 14 3 3 -1. - <_> - 9 15 3 1 3. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 8 13 3 2 -1. - <_> - 8 14 3 1 2. - <_> - - <_> - 15 14 5 3 -1. - <_> - 15 15 5 1 3. - <_> - - <_> - 0 14 20 1 -1. - <_> - 0 14 10 1 2. - <_> - - <_> - 0 14 6 3 -1. - <_> - 0 15 6 1 3. - <_> - - <_> - 5 3 4 2 -1. - <_> - 5 4 4 1 2. - <_> - - <_> - 0 6 20 1 -1. - <_> - 0 6 10 1 2. - <_> - - <_> - 6 3 10 14 -1. - <_> - 11 3 5 7 2. - <_> - 6 10 5 7 2. - <_> - - <_> - 8 12 4 2 -1. - <_> - 8 13 4 1 2. - <_> - - <_> - 6 3 8 6 -1. - <_> - 6 3 4 3 2. - <_> - 10 6 4 3 2. - <_> - - <_> - 13 7 2 1 -1. - <_> - 13 7 1 1 2. - <_> - - <_> - 6 3 10 14 -1. - <_> - 11 3 5 7 2. - <_> - 6 10 5 7 2. - <_> - - <_> - 5 7 2 1 -1. - <_> - 6 7 1 1 2. - <_> - - <_> - 4 3 10 14 -1. - <_> - 4 3 5 7 2. - <_> - 9 10 5 7 2. - <_> - - <_> - 9 7 2 2 -1. - <_> - 9 7 1 2 2. - <_> - - <_> - 0 3 20 1 -1. - <_> - 0 3 10 1 2. - <_> - - <_> - 2 1 10 3 -1. - <_> - 2 2 10 1 3. - <_> - - <_> - 9 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 9 17 3 2 -1. - <_> - 10 17 1 2 3. - <_> - - <_> - 9 7 3 6 -1. - <_> - 10 7 1 6 3. - <_> - - <_> - 8 17 3 2 -1. - <_> - 9 17 1 2 3. - <_> - - <_> - 8 7 3 6 -1. - <_> - 9 7 1 6 3. - <_> - - <_> - 16 3 4 6 -1. - <_> - 16 5 4 2 3. - <_> - - <_> - 15 6 2 12 -1. - <_> - 16 6 1 6 2. - <_> - 15 12 1 6 2. - <_> - - <_> - 1 4 18 10 -1. - <_> - 1 4 9 5 2. - <_> - 10 9 9 5 2. - <_> - - <_> - 9 4 2 4 -1. - <_> - 9 6 2 2 2. - <_> - - <_> - 12 5 3 2 -1. - <_> - 12 6 3 1 2. - <_> - - <_> - 5 12 10 4 -1. - <_> - 5 14 10 2 2. - <_> - - <_> - 5 5 3 2 -1. - <_> - 5 6 3 1 2. - <_> - - <_> - 4 6 12 6 -1. - <_> - 8 6 4 6 3. - <_> - - <_> - 14 4 6 6 -1. - <_> - 14 6 6 2 3. - <_> - - <_> - 16 0 4 6 -1. - <_> - 18 0 2 3 2. - <_> - 16 3 2 3 2. - <_> - - <_> - 0 4 6 6 -1. - <_> - 0 6 6 2 3. - <_> - - <_> - 0 0 4 6 -1. - <_> - 0 0 2 3 2. - <_> - 2 3 2 3 2. - <_> - - <_> - 12 0 8 5 -1. - <_> - 12 0 4 5 2. - <_> - - <_> - 16 0 4 17 -1. - <_> - 16 0 2 17 2. - <_> - - <_> - 1 0 18 20 -1. - <_> - 7 0 6 20 3. - <_> - - <_> - 6 0 2 5 -1. - <_> - 7 0 1 5 2. - <_> - - <_> - 0 6 20 1 -1. - <_> - 0 6 10 1 2. - <_> - - <_> - 8 7 6 4 -1. - <_> - 10 7 2 4 3. - <_> - - <_> - 1 1 16 4 -1. - <_> - 1 1 8 2 2. - <_> - 9 3 8 2 2. - <_> - - <_> - 7 2 4 2 -1. - <_> - 7 2 2 1 2. - <_> - 9 3 2 1 2. - <_> - - <_> - 7 4 9 3 -1. - <_> - 7 5 9 1 3. - <_> - - <_> - 10 4 5 12 -1. - <_> - 10 10 5 6 2. - <_> - - <_> - 3 12 2 3 -1. - <_> - 3 13 2 1 3. - <_> - - <_> - 8 8 3 5 -1. - <_> - 9 8 1 5 3. - <_> - - <_> - 13 9 2 3 -1. - <_> - 13 9 1 3 2. - <_> - - <_> - 15 11 2 2 -1. - <_> - 15 12 2 1 2. - <_> - - <_> - 5 6 2 3 -1. - <_> - 5 7 2 1 3. - <_> - - <_> - 2 11 6 2 -1. - <_> - 2 12 6 1 2. - <_> - - <_> - 15 11 4 3 -1. - <_> - 15 12 4 1 3. - <_> - - <_> - 16 0 4 17 -1. - <_> - 16 0 2 17 2. - <_> - - <_> - 1 11 4 3 -1. - <_> - 1 12 4 1 3. - <_> - - <_> - 9 11 1 3 -1. - <_> - 9 12 1 1 3. - <_> - - <_> - 10 9 6 7 -1. - <_> - 10 9 3 7 2. - <_> - - <_> - 8 15 4 2 -1. - <_> - 8 16 4 1 2. - <_> - - <_> - 4 9 6 7 -1. - <_> - 7 9 3 7 2. - <_> - - <_> - 9 14 2 3 -1. - <_> - 9 15 2 1 3. - <_> - - <_> - 0 2 20 2 -1. - <_> - 10 2 10 1 2. - <_> - 0 3 10 1 2. - <_> - - <_> - 6 7 8 2 -1. - <_> - 6 8 8 1 2. - <_> - - <_> - 0 2 20 2 -1. - <_> - 0 2 10 1 2. - <_> - 10 3 10 1 2. - <_> - - <_> - 3 1 2 10 -1. - <_> - 3 1 1 5 2. - <_> - 4 6 1 5 2. - <_> - - <_> - 13 4 1 10 -1. - <_> - 13 9 1 5 2. - <_> - - <_> - 9 8 4 3 -1. - <_> - 9 9 4 1 3. - <_> - - <_> - 2 11 16 4 -1. - <_> - 2 11 8 2 2. - <_> - 10 13 8 2 2. - <_> - - <_> - 5 1 3 5 -1. - <_> - 6 1 1 5 3. - <_> - - <_> - 9 10 2 3 -1. - <_> - 9 11 2 1 3. - <_> - - <_> - 9 11 2 2 -1. - <_> - 9 12 2 1 2. - <_> - - <_> - 0 10 20 2 -1. - <_> - 0 11 20 1 2. - <_> - - <_> - 1 7 6 4 -1. - <_> - 1 7 3 2 2. - <_> - 4 9 3 2 2. - <_> - - <_> - 12 0 8 8 -1. - <_> - 16 0 4 4 2. - <_> - 12 4 4 4 2. - <_> - - <_> - 14 1 6 4 -1. - <_> - 16 1 2 4 3. - <_> - - <_> - 6 3 2 14 -1. - <_> - 6 10 2 7 2. - <_> - - <_> - 6 1 7 12 -1. - <_> - 6 7 7 6 2. - <_> - - <_> - 5 0 15 5 -1. - <_> - 10 0 5 5 3. - <_> - - <_> - 15 0 4 10 -1. - <_> - 15 0 2 10 2. - <_> - - <_> - 1 0 18 3 -1. - <_> - 7 0 6 3 3. - <_> - - <_> - 0 0 17 2 -1. - <_> - 0 1 17 1 2. - <_> - - <_> - 10 0 3 3 -1. - <_> - 11 0 1 3 3. - <_> - - <_> - 10 0 3 12 -1. - <_> - 11 0 1 12 3. - <_> - - <_> - 1 3 4 16 -1. - <_> - 1 3 2 8 2. - <_> - 3 11 2 8 2. - <_> - - <_> - 7 0 3 3 -1. - <_> - 8 0 1 3 3. - <_> - - <_> - 9 13 2 6 -1. - <_> - 9 16 2 3 2. - <_> - - <_> - 9 0 6 13 -1. - <_> - 11 0 2 13 3. - <_> - - <_> - 7 7 3 2 -1. - <_> - 8 7 1 2 3. - <_> - - <_> - 8 2 1 12 -1. - <_> - 8 6 1 4 3. - <_> - - <_> - 4 10 12 6 -1. - <_> - 10 10 6 3 2. - <_> - 4 13 6 3 2. - <_> - - <_> - 13 5 2 3 -1. - <_> - 13 6 2 1 3. - <_> - - <_> - 4 10 12 6 -1. - <_> - 4 10 6 3 2. - <_> - 10 13 6 3 2. - <_> - - <_> - 5 5 2 3 -1. - <_> - 5 6 2 1 3. - <_> - - <_> - 8 6 6 7 -1. - <_> - 10 6 2 7 3. - <_> - - <_> - 9 6 2 4 -1. - <_> - 9 6 1 4 2. - <_> - - <_> - 6 6 6 7 -1. - <_> - 8 6 2 7 3. - <_> - - <_> - 9 6 2 4 -1. - <_> - 10 6 1 4 2. - <_> - - <_> - 12 9 2 3 -1. - <_> - 12 9 1 3 2. - <_> - - <_> - 0 6 20 1 -1. - <_> - 0 6 10 1 2. - <_> - - <_> - 5 7 10 2 -1. - <_> - 10 7 5 2 2. - <_> - - <_> - 1 16 4 3 -1. - <_> - 1 17 4 1 3. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 10 3 5 3 -1. - <_> - 10 4 5 1 3. - <_> - - <_> - 3 9 14 8 -1. - <_> - 3 9 7 4 2. - <_> - 10 13 7 4 2. - <_> - - <_> - 6 8 8 10 -1. - <_> - 6 8 4 5 2. - <_> - 10 13 4 5 2. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 10 3 5 3 -1. - <_> - 10 4 5 1 3. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 5 3 5 3 -1. - <_> - 5 4 5 1 3. - <_> - - <_> - 13 16 2 3 -1. - <_> - 13 17 2 1 3. - <_> - - <_> - 0 5 20 6 -1. - <_> - 0 7 20 2 3. - <_> - - <_> - 3 14 3 3 -1. - <_> - 3 15 3 1 3. - <_> - - <_> - 7 15 5 3 -1. - <_> - 7 16 5 1 3. - <_> - - <_> - 12 9 2 3 -1. - <_> - 12 9 1 3 2. - <_> - - <_> - 15 13 2 6 -1. - <_> - 15 13 1 6 2. - <_> - - <_> - 6 9 2 3 -1. - <_> - 7 9 1 3 2. - <_> - - <_> - 3 13 2 6 -1. - <_> - 4 13 1 6 2. - <_> - - <_> - 11 4 2 4 -1. - <_> - 11 4 1 4 2. - <_> - - <_> - 13 4 2 5 -1. - <_> - 13 4 1 5 2. - <_> - - <_> - 7 4 2 4 -1. - <_> - 8 4 1 4 2. - <_> - - <_> - 5 4 2 5 -1. - <_> - 6 4 1 5 2. - <_> - - <_> - 19 6 1 2 -1. - <_> - 19 7 1 1 2. - <_> - - <_> - 12 7 8 13 -1. - <_> - 12 7 4 13 2. - <_> - - <_> - 0 6 1 2 -1. - <_> - 0 7 1 1 2. - <_> - - <_> - 6 15 4 3 -1. - <_> - 6 16 4 1 3. - <_> - - <_> - 11 8 2 2 -1. - <_> - 11 9 2 1 2. - <_> - - <_> - 11 7 2 4 -1. - <_> - 11 7 1 4 2. - <_> - - <_> - 4 13 2 3 -1. - <_> - 4 14 2 1 3. - <_> - - <_> - 0 17 18 3 -1. - <_> - 6 17 6 3 3. - <_> - - <_> - 1 0 18 5 -1. - <_> - 7 0 6 5 3. - <_> - - <_> - 5 7 3 4 -1. - <_> - 5 9 3 2 2. - <_> - - <_> - 10 6 2 2 -1. - <_> - 10 6 1 2 2. - <_> - - <_> - 6 4 14 4 -1. - <_> - 13 4 7 2 2. - <_> - 6 6 7 2 2. - <_> - - <_> - 5 16 6 4 -1. - <_> - 5 16 3 2 2. - <_> - 8 18 3 2 2. - <_> - - <_> - 7 15 2 4 -1. - <_> - 7 17 2 2 2. - <_> - - <_> - 8 5 5 14 -1. - <_> - 8 12 5 7 2. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 7 5 3 7 -1. - <_> - 8 5 1 7 3. - <_> - - <_> - 0 0 3 9 -1. - <_> - 0 3 3 3 3. - <_> - - <_> - 8 6 8 8 -1. - <_> - 12 6 4 4 2. - <_> - 8 10 4 4 2. - <_> - - <_> - 4 8 13 2 -1. - <_> - 4 9 13 1 2. - <_> - - <_> - 4 3 6 1 -1. - <_> - 6 3 2 1 3. - <_> - - <_> - 9 1 2 6 -1. - <_> - 9 3 2 2 3. - <_> - - <_> - 10 5 6 4 -1. - <_> - 12 5 2 4 3. - <_> - - <_> - 9 5 2 12 -1. - <_> - 9 9 2 4 3. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 8 12 4 3 -1. - <_> - 8 13 4 1 3. - <_> - - <_> - 10 3 6 7 -1. - <_> - 12 3 2 7 3. - <_> - - <_> - 3 10 16 6 -1. - <_> - 3 12 16 2 3. - <_> - - <_> - 5 5 3 10 -1. - <_> - 5 10 3 5 2. - <_> - - <_> - 6 10 3 6 -1. - <_> - 6 13 3 3 2. - <_> - - <_> - 17 2 2 12 -1. - <_> - 17 2 1 12 2. - <_> - - <_> - 16 6 2 14 -1. - <_> - 16 13 2 7 2. - <_> - - <_> - 3 11 12 9 -1. - <_> - 3 14 12 3 3. - <_> - - <_> - 0 2 4 12 -1. - <_> - 2 2 2 12 2. - <_> - - <_> - 18 0 2 18 -1. - <_> - 18 0 1 18 2. - <_> - - <_> - 16 12 3 2 -1. - <_> - 16 13 3 1 2. - <_> - - <_> - 0 2 2 15 -1. - <_> - 1 2 1 15 2. - <_> - - <_> - 1 10 2 4 -1. - <_> - 1 12 2 2 2. - <_> - - <_> - 11 1 2 18 -1. - <_> - 11 1 1 18 2. - <_> - - <_> - 3 2 14 2 -1. - <_> - 10 2 7 1 2. - <_> - 3 3 7 1 2. - <_> - - <_> - 7 1 2 18 -1. - <_> - 8 1 1 18 2. - <_> - - <_> - 6 1 8 12 -1. - <_> - 6 7 8 6 2. - <_> - - <_> - 8 14 4 3 -1. - <_> - 8 15 4 1 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 0 13 5 2 -1. - <_> - 0 14 5 1 2. - <_> - - <_> - 9 0 2 6 -1. - <_> - 9 0 1 3 2. - <_> - 10 3 1 3 2. - <_> - - <_> - 9 0 2 6 -1. - <_> - 10 0 1 3 2. - <_> - 9 3 1 3 2. - <_> - - <_> - 9 7 3 6 -1. - <_> - 10 7 1 6 3. - <_> - - <_> - 9 0 2 6 -1. - <_> - 9 0 1 3 2. - <_> - 10 3 1 3 2. - <_> - - <_> - 8 7 3 6 -1. - <_> - 9 7 1 6 3. - <_> - - <_> - 9 6 2 6 -1. - <_> - 9 6 1 6 2. - <_> - - <_> - 9 4 4 3 -1. - <_> - 9 4 2 3 2. - <_> - - <_> - 0 4 4 3 -1. - <_> - 0 5 4 1 3. - <_> - - <_> - 8 7 4 2 -1. - <_> - 8 8 4 1 2. - <_> - - <_> - 10 6 6 3 -1. - <_> - 12 6 2 3 3. - <_> - - <_> - 9 6 3 12 -1. - <_> - 9 10 3 4 3. - <_> - - <_> - 5 4 2 3 -1. - <_> - 5 5 2 1 3. - <_> - - <_> - 5 6 1 3 -1. - <_> - 5 7 1 1 3. - <_> - - <_> - 9 17 3 2 -1. - <_> - 10 17 1 2 3. - <_> - - <_> - 0 7 20 2 -1. - <_> - 0 8 20 1 2. - <_> - - <_> - 4 3 6 7 -1. - <_> - 6 3 2 7 3. - <_> - - <_> - 5 10 6 10 -1. - <_> - 5 10 3 5 2. - <_> - 8 15 3 5 2. - <_> - - <_> - 9 17 3 2 -1. - <_> - 10 17 1 2 3. - <_> - - <_> - 9 10 2 2 -1. - <_> - 9 11 2 1 2. - <_> - - <_> - 8 17 3 2 -1. - <_> - 9 17 1 2 3. - <_> - - <_> - 5 6 1 3 -1. - <_> - 5 7 1 1 3. - <_> - - <_> - 0 1 20 2 -1. - <_> - 10 1 10 1 2. - <_> - 0 2 10 1 2. - <_> - - <_> - 14 2 6 9 -1. - <_> - 14 5 6 3 3. - <_> - - <_> - 5 3 3 2 -1. - <_> - 5 4 3 1 2. - <_> - - <_> - 5 4 4 2 -1. - <_> - 7 4 2 2 2. - <_> - - <_> - 14 2 6 9 -1. - <_> - 14 5 6 3 3. - <_> - - <_> - 0 12 20 6 -1. - <_> - 0 14 20 2 3. - <_> - - <_> - 2 2 16 4 -1. - <_> - 2 2 8 2 2. - <_> - 10 4 8 2 2. - <_> - - <_> - 7 12 5 3 -1. - <_> - 7 13 5 1 3. - <_> - - <_> - 14 9 6 10 -1. - <_> - 14 9 3 10 2. - <_> - - <_> - 16 6 3 2 -1. - <_> - 16 7 3 1 2. - <_> - - <_> - 0 9 6 10 -1. - <_> - 3 9 3 10 2. - <_> - - <_> - 0 16 5 2 -1. - <_> - 0 17 5 1 2. - <_> - - <_> - 9 12 2 3 -1. - <_> - 9 13 2 1 3. - <_> - - <_> - 9 7 2 12 -1. - <_> - 9 11 2 4 3. - <_> - - <_> - 3 2 6 2 -1. - <_> - 5 2 2 2 3. - <_> - - <_> - 4 1 1 2 -1. - <_> - 4 2 1 1 2. - <_> - - <_> - 11 15 1 2 -1. - <_> - 11 16 1 1 2. - <_> - - <_> - 3 1 16 2 -1. - <_> - 11 1 8 1 2. - <_> - 3 2 8 1 2. - <_> - - <_> - 3 6 2 2 -1. - <_> - 3 6 1 1 2. - <_> - 4 7 1 1 2. - <_> - - <_> - 5 11 10 6 -1. - <_> - 5 11 5 3 2. - <_> - 10 14 5 3 2. - <_> - - <_> - 10 11 4 6 -1. - <_> - 10 14 4 3 2. - <_> - - <_> - 14 9 6 11 -1. - <_> - 16 9 2 11 3. - <_> - - <_> - 0 9 6 11 -1. - <_> - 2 9 2 11 3. - <_> - - <_> - 2 11 16 6 -1. - <_> - 2 11 8 3 2. - <_> - 10 14 8 3 2. - <_> - - <_> - 12 0 8 10 -1. - <_> - 16 0 4 5 2. - <_> - 12 5 4 5 2. - <_> - - <_> - 14 2 6 4 -1. - <_> - 16 2 2 4 3. - <_> - - <_> - 0 0 8 10 -1. - <_> - 0 0 4 5 2. - <_> - 4 5 4 5 2. - <_> - - <_> - 0 2 6 4 -1. - <_> - 2 2 2 4 3. - <_> - - <_> - 4 9 15 2 -1. - <_> - 9 9 5 2 3. - <_> - - <_> - 12 3 4 8 -1. - <_> - 14 3 2 4 2. - <_> - 12 7 2 4 2. - <_> - - <_> - 9 2 2 9 -1. - <_> - 10 2 1 9 2. - <_> - - <_> - 0 2 20 1 -1. - <_> - 10 2 10 1 2. - <_> - - <_> - 16 1 4 5 -1. - <_> - 16 1 2 5 2. - <_> - - <_> - 16 0 4 6 -1. - <_> - 16 3 4 3 2. - <_> - - <_> - 4 3 6 4 -1. - <_> - 6 3 2 4 3. - <_> - - <_> - 0 0 18 5 -1. - <_> - 6 0 6 5 3. - <_> - - <_> - 6 2 12 14 -1. - <_> - 12 2 6 7 2. - <_> - 6 9 6 7 2. - <_> - - <_> - 11 8 3 5 -1. - <_> - 12 8 1 5 3. - <_> - - <_> - 5 12 2 2 -1. - <_> - 5 13 2 1 2. - <_> - - <_> - 5 10 4 3 -1. - <_> - 7 10 2 3 2. - <_> - - <_> - 4 9 15 2 -1. - <_> - 9 9 5 2 3. - <_> - - <_> - 10 7 6 2 -1. - <_> - 12 7 2 2 3. - <_> - - <_> - 1 9 15 2 -1. - <_> - 6 9 5 2 3. - <_> - - <_> - 5 0 2 10 -1. - <_> - 5 0 1 5 2. - <_> - 6 5 1 5 2. - <_> - - <_> - 0 0 20 14 -1. - <_> - 0 7 20 7 2. - <_> - - <_> - 12 7 8 4 -1. - <_> - 12 7 4 4 2. - <_> - - <_> - 0 7 8 4 -1. - <_> - 4 7 4 4 2. - <_> - - <_> - 8 1 3 3 -1. - <_> - 9 1 1 3 3. - <_> - - <_> - 9 7 3 4 -1. - <_> - 10 7 1 4 3. - <_> - - <_> - 9 9 3 1 -1. - <_> - 10 9 1 1 3. - <_> - - <_> - 8 9 3 2 -1. - <_> - 8 10 3 1 2. - <_> - - <_> - 8 4 2 8 -1. - <_> - 8 4 1 4 2. - <_> - 9 8 1 4 2. - <_> - - <_> - 5 8 12 3 -1. - <_> - 5 9 12 1 3. - <_> - - <_> - 11 14 1 3 -1. - <_> - 11 15 1 1 3. - <_> - - <_> - 6 10 3 6 -1. - <_> - 6 12 3 2 3. - <_> - - <_> - 4 17 8 3 -1. - <_> - 4 18 8 1 3. - <_> - - <_> - 17 6 2 3 -1. - <_> - 17 7 2 1 3. - <_> - - <_> - 9 12 2 2 -1. - <_> - 10 12 1 1 2. - <_> - 9 13 1 1 2. - <_> - - <_> - 9 13 2 4 -1. - <_> - 9 13 1 2 2. - <_> - 10 15 1 2 2. - <_> - - <_> - 9 11 2 3 -1. - <_> - 9 12 2 1 3. - <_> - - <_> - 5 5 12 10 -1. - <_> - 11 5 6 5 2. - <_> - 5 10 6 5 2. - <_> - - <_> - 6 3 12 12 -1. - <_> - 12 3 6 6 2. - <_> - 6 9 6 6 2. - <_> - - <_> - 5 7 2 2 -1. - <_> - 5 7 1 1 2. - <_> - 6 8 1 1 2. - <_> - - <_> - 4 3 3 2 -1. - <_> - 5 3 1 2 3. - <_> - - <_> - 6 2 12 14 -1. - <_> - 12 2 6 7 2. - <_> - 6 9 6 7 2. - <_> - - <_> - 5 2 12 3 -1. - <_> - 9 2 4 3 3. - <_> - - <_> - 1 1 18 17 -1. - <_> - 7 1 6 17 3. - <_> - - <_> - 0 9 10 1 -1. - <_> - 5 9 5 1 2. - <_> - - <_> - 16 8 4 3 -1. - <_> - 16 9 4 1 3. - <_> - - <_> - 7 13 6 6 -1. - <_> - 7 16 6 3 2. - <_> - - <_> - 6 14 1 6 -1. - <_> - 6 16 1 2 3. - <_> - - <_> - 6 17 4 2 -1. - <_> - 6 18 4 1 2. - <_> - - <_> - 10 18 6 2 -1. - <_> - 13 18 3 1 2. - <_> - 10 19 3 1 2. - <_> - - <_> - 16 8 1 3 -1. - <_> - 16 9 1 1 3. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 9 15 1 2 -1. - <_> - 9 16 1 1 2. - <_> - - <_> - 13 0 3 12 -1. - <_> - 14 0 1 12 3. - <_> - - <_> - 15 11 1 3 -1. - <_> - 15 12 1 1 3. - <_> - - <_> - 8 15 3 3 -1. - <_> - 8 16 3 1 3. - <_> - - <_> - 4 0 3 12 -1. - <_> - 5 0 1 12 3. - <_> - - <_> - 9 7 3 3 -1. - <_> - 10 7 1 3 3. - <_> - - <_> - 9 9 3 1 -1. - <_> - 10 9 1 1 3. - <_> - - <_> - 2 2 12 14 -1. - <_> - 2 2 6 7 2. - <_> - 8 9 6 7 2. - <_> - - <_> - 4 2 12 3 -1. - <_> - 8 2 4 3 3. - <_> - - <_> - 18 18 2 2 -1. - <_> - 18 18 1 2 2. - <_> - - <_> - 17 2 3 8 -1. - <_> - 18 2 1 8 3. - <_> - - <_> - 0 18 2 2 -1. - <_> - 1 18 1 2 2. - <_> - - <_> - 6 11 2 6 -1. - <_> - 6 14 2 3 2. - <_> - - <_> - 13 10 5 6 -1. - <_> - 13 12 5 2 3. - <_> - - <_> - 5 8 15 3 -1. - <_> - 5 9 15 1 3. - <_> - - <_> - 2 10 5 6 -1. - <_> - 2 12 5 2 3. - <_> - - <_> - 0 8 15 3 -1. - <_> - 0 9 15 1 3. - <_> - - <_> - 16 2 3 1 -1. - <_> - 17 2 1 1 3. - <_> - - <_> - 17 4 3 2 -1. - <_> - 18 4 1 2 3. - <_> - - <_> - 0 8 8 12 -1. - <_> - 0 8 4 6 2. - <_> - 4 14 4 6 2. - <_> - - <_> - 1 7 8 6 -1. - <_> - 1 7 4 3 2. - <_> - 5 10 4 3 2. - <_> - - <_> - 14 1 6 2 -1. - <_> - 16 1 2 2 3. - <_> - - <_> - 15 0 4 4 -1. - <_> - 17 0 2 2 2. - <_> - 15 2 2 2 2. - <_> - - <_> - 1 1 4 11 -1. - <_> - 3 1 2 11 2. - <_> - - <_> - 5 5 1 8 -1. - <_> - 5 9 1 4 2. - <_> - - <_> - 7 7 6 1 -1. - <_> - 9 7 2 1 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 8 4 4 4 -1. - <_> - 8 6 4 2 2. - <_> - - <_> - 2 4 9 1 -1. - <_> - 5 4 3 1 3. - <_> - - <_> - 9 12 2 8 -1. - <_> - 9 16 2 4 2. - <_> - - <_> - 3 8 14 12 -1. - <_> - 3 14 14 6 2. - <_> - - <_> - 6 13 7 3 -1. - <_> - 6 14 7 1 3. - <_> - - <_> - 5 9 6 3 -1. - <_> - 7 9 2 3 3. - <_> - - <_> - 12 1 6 3 -1. - <_> - 12 2 6 1 3. - <_> - - <_> - 8 12 6 2 -1. - <_> - 8 13 6 1 2. - <_> - - <_> - 0 2 18 2 -1. - <_> - 0 2 9 1 2. - <_> - 9 3 9 1 2. - <_> - - <_> - 6 10 3 6 -1. - <_> - 6 13 3 3 2. - <_> - - <_> - 14 0 6 6 -1. - <_> - 14 0 3 6 2. - <_> - - <_> - 15 0 5 8 -1. - <_> - 15 4 5 4 2. - <_> - - <_> - 7 16 6 4 -1. - <_> - 9 16 2 4 3. - <_> - - <_> - 2 11 14 4 -1. - <_> - 2 11 7 2 2. - <_> - 9 13 7 2 2. - <_> - - <_> - 14 10 6 10 -1. - <_> - 14 10 3 10 2. - <_> - - <_> - 9 8 10 12 -1. - <_> - 14 8 5 6 2. - <_> - 9 14 5 6 2. - <_> - - <_> - 0 10 6 10 -1. - <_> - 3 10 3 10 2. - <_> - - <_> - 1 8 10 12 -1. - <_> - 1 8 5 6 2. - <_> - 6 14 5 6 2. - <_> - - <_> - 9 3 6 1 -1. - <_> - 11 3 2 1 3. - <_> - - <_> - 7 4 6 3 -1. - <_> - 9 4 2 3 3. - <_> - - <_> - 5 3 6 1 -1. - <_> - 7 3 2 1 3. - <_> - - <_> - 4 5 6 3 -1. - <_> - 6 5 2 3 3. - <_> - - <_> - 9 16 3 3 -1. - <_> - 9 17 3 1 3. - <_> - - <_> - 8 14 6 3 -1. - <_> - 8 15 6 1 3. - <_> - - <_> - 6 0 8 12 -1. - <_> - 6 0 4 6 2. - <_> - 10 6 4 6 2. - <_> - - <_> - 4 12 2 3 -1. - <_> - 4 13 2 1 3. - <_> - - <_> - 12 16 6 3 -1. - <_> - 12 17 6 1 3. - <_> - - <_> - 7 12 7 2 -1. - <_> - 7 13 7 1 2. - <_> - - <_> - 2 16 6 3 -1. - <_> - 2 17 6 1 3. - <_> - - <_> - 0 7 16 6 -1. - <_> - 0 10 16 3 2. - <_> - - <_> - 9 7 3 3 -1. - <_> - 10 7 1 3 3. - <_> - - <_> - 9 7 3 5 -1. - <_> - 10 7 1 5 3. - <_> - - <_> - 0 5 20 10 -1. - <_> - 0 5 10 5 2. - <_> - 10 10 10 5 2. - <_> - - <_> - 3 1 4 2 -1. - <_> - 5 1 2 2 2. - <_> - - <_> - 7 6 8 10 -1. - <_> - 11 6 4 5 2. - <_> - 7 11 4 5 2. - <_> - - <_> - 17 6 3 2 -1. - <_> - 17 7 3 1 2. - <_> - - <_> - 5 6 8 10 -1. - <_> - 5 6 4 5 2. - <_> - 9 11 4 5 2. - <_> - - <_> - 5 12 10 6 -1. - <_> - 5 14 10 2 3. - <_> - - <_> - 9 7 3 3 -1. - <_> - 10 7 1 3 3. - <_> - - <_> - 10 3 2 6 -1. - <_> - 11 3 1 3 2. - <_> - 10 6 1 3 2. - <_> - - <_> - 0 4 3 3 -1. - <_> - 0 5 3 1 3. - <_> - - <_> - 3 16 8 4 -1. - <_> - 3 16 4 2 2. - <_> - 7 18 4 2 2. - <_> - - <_> - 8 13 5 2 -1. - <_> - 8 14 5 1 2. - <_> - - <_> - 8 7 4 12 -1. - <_> - 8 11 4 4 3. - <_> - - <_> - 5 9 2 2 -1. - <_> - 6 9 1 2 2. - <_> - - <_> - 9 15 2 3 -1. - <_> - 9 16 2 1 3. - <_> - - <_> - 13 9 2 3 -1. - <_> - 13 9 1 3 2. - <_> - - <_> - 14 0 6 17 -1. - <_> - 16 0 2 17 3. - <_> - - <_> - 5 10 2 2 -1. - <_> - 6 10 1 2 2. - <_> - - <_> - 2 9 9 1 -1. - <_> - 5 9 3 1 3. - <_> - - <_> - 9 11 2 3 -1. - <_> - 9 12 2 1 3. - <_> - - <_> - 7 11 6 3 -1. - <_> - 7 12 6 1 3. - <_> - - <_> - 0 6 3 2 -1. - <_> - 0 7 3 1 2. - <_> - - <_> - 7 0 6 1 -1. - <_> - 9 0 2 1 3. - <_> - - <_> - 9 16 3 3 -1. - <_> - 9 17 3 1 3. - <_> - - <_> - 2 13 17 6 -1. - <_> - 2 16 17 3 2. - <_> - - <_> - 1 3 3 7 -1. - <_> - 2 3 1 7 3. - <_> - - <_> - 1 1 6 4 -1. - <_> - 3 1 2 4 3. - <_> - - <_> - 14 1 6 5 -1. - <_> - 14 1 3 5 2. - <_> - - <_> - 13 2 3 2 -1. - <_> - 13 3 3 1 2. - <_> - - <_> - 0 1 6 5 -1. - <_> - 3 1 3 5 2. - <_> - - <_> - 2 3 2 6 -1. - <_> - 2 5 2 2 3. - <_> - - <_> - 9 10 3 2 -1. - <_> - 9 11 3 1 2. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 6 3 3 1 -1. - <_> - 7 3 1 1 3. - <_> - - <_> - 8 2 3 12 -1. - <_> - 8 6 3 4 3. - <_> - - <_> - 11 12 1 2 -1. - <_> - 11 13 1 1 2. - <_> - - <_> - 11 12 2 2 -1. - <_> - 12 12 1 1 2. - <_> - 11 13 1 1 2. - <_> - - <_> - 5 5 2 2 -1. - <_> - 5 6 2 1 2. - <_> - - <_> - 5 4 1 3 -1. - <_> - 5 5 1 1 3. - <_> - - <_> - 3 11 16 4 -1. - <_> - 11 11 8 2 2. - <_> - 3 13 8 2 2. - <_> - - <_> - 0 10 20 3 -1. - <_> - 0 11 20 1 3. - <_> - - <_> - 1 11 16 4 -1. - <_> - 1 11 8 2 2. - <_> - 9 13 8 2 2. - <_> - - <_> - 4 2 4 2 -1. - <_> - 4 3 4 1 2. - <_> - - <_> - 12 6 2 2 -1. - <_> - 13 6 1 1 2. - <_> - 12 7 1 1 2. - <_> - - <_> - 12 11 6 6 -1. - <_> - 12 13 6 2 3. - <_> - - <_> - 6 6 2 2 -1. - <_> - 6 6 1 1 2. - <_> - 7 7 1 1 2. - <_> - - <_> - 6 4 4 16 -1. - <_> - 8 4 2 16 2. - <_> - - <_> - 11 18 3 2 -1. - <_> - 11 19 3 1 2. - <_> - - <_> - 9 17 6 2 -1. - <_> - 12 17 3 1 2. - <_> - 9 18 3 1 2. - <_> - - <_> - 2 13 5 2 -1. - <_> - 2 14 5 1 2. - <_> - - <_> - 3 15 2 2 -1. - <_> - 3 16 2 1 2. - <_> - - <_> - 9 7 3 3 -1. - <_> - 10 7 1 3 3. - <_> - - <_> - 9 6 2 6 -1. - <_> - 9 6 1 6 2. - <_> - - <_> - 1 14 7 6 -1. - <_> - 1 16 7 2 3. - <_> - - <_> - 8 1 2 11 -1. - <_> - 9 1 1 11 2. - <_> - - <_> - 9 7 2 4 -1. - <_> - 9 7 1 4 2. - <_> - - <_> - 11 10 2 1 -1. - <_> - 11 10 1 1 2. - <_> - - <_> - 0 3 3 9 -1. - <_> - 1 3 1 9 3. - <_> - - <_> - 0 3 3 6 -1. - <_> - 0 5 3 2 3. - <_> - - <_> - 11 15 2 2 -1. - <_> - 12 15 1 1 2. - <_> - 11 16 1 1 2. - <_> - - <_> - 11 14 2 2 -1. - <_> - 12 14 1 1 2. - <_> - 11 15 1 1 2. - <_> - - <_> - 7 15 2 2 -1. - <_> - 7 15 1 1 2. - <_> - 8 16 1 1 2. - <_> - - <_> - 7 14 2 2 -1. - <_> - 7 14 1 1 2. - <_> - 8 15 1 1 2. - <_> - - <_> - 8 13 4 6 -1. - <_> - 10 13 2 3 2. - <_> - 8 16 2 3 2. - <_> - - <_> - 2 14 16 4 -1. - <_> - 10 14 8 2 2. - <_> - 2 16 8 2 2. - <_> - - <_> - 9 8 2 2 -1. - <_> - 9 9 2 1 2. - <_> - - <_> - 7 7 5 3 -1. - <_> - 7 8 5 1 3. - <_> - - <_> - 7 5 6 2 -1. - <_> - 9 5 2 2 3. - <_> - - <_> - 9 1 6 18 -1. - <_> - 11 1 2 18 3. - <_> - - <_> - 8 6 3 4 -1. - <_> - 9 6 1 4 3. - <_> - - <_> - 8 5 2 4 -1. - <_> - 8 5 1 2 2. - <_> - 9 7 1 2 2. - <_> - - <_> - 9 13 2 6 -1. - <_> - 10 13 1 3 2. - <_> - 9 16 1 3 2. - <_> - - <_> - 11 0 3 18 -1. - <_> - 12 0 1 18 3. - <_> - - <_> - 6 0 3 18 -1. - <_> - 7 0 1 18 3. - <_> - - <_> - 5 15 4 2 -1. - <_> - 7 15 2 2 2. - <_> - - <_> - 1 9 18 1 -1. - <_> - 7 9 6 1 3. - <_> - - <_> - 0 0 20 3 -1. - <_> - 0 1 20 1 3. - <_> - - <_> - 9 6 2 4 -1. - <_> - 10 6 1 4 2. - <_> - - <_> - 6 10 6 2 -1. - <_> - 8 10 2 2 3. - <_> - - <_> - 0 7 20 1 -1. - <_> - 0 7 10 1 2. - <_> - - <_> - 11 3 5 4 -1. - <_> - 11 5 5 2 2. - <_> - - <_> - 5 7 10 1 -1. - <_> - 10 7 5 1 2. - <_> - - <_> - 8 10 3 3 -1. - <_> - 8 11 3 1 3. - <_> - - <_> - 2 0 16 8 -1. - <_> - 10 0 8 4 2. - <_> - 2 4 8 4 2. - <_> - - <_> - 11 0 9 10 -1. - <_> - 11 5 9 5 2. - <_> - - <_> - 0 2 8 18 -1. - <_> - 4 2 4 18 2. - <_> - - <_> - 0 0 2 6 -1. - <_> - 0 2 2 2 3. - <_> - - <_> - 6 0 9 2 -1. - <_> - 6 1 9 1 2. - <_> - - <_> - 4 1 12 2 -1. - <_> - 4 2 12 1 2. - <_> - - <_> - 2 1 16 14 -1. - <_> - 2 8 16 7 2. - <_> - - <_> - 5 1 8 12 -1. - <_> - 5 7 8 6 2. - <_> - - <_> - 9 11 2 2 -1. - <_> - 9 12 2 1 2. - <_> - - <_> - 9 10 5 6 -1. - <_> - 9 12 5 2 3. - <_> - - <_> - 3 0 13 8 -1. - <_> - 3 4 13 4 2. - <_> - - <_> - 6 7 5 8 -1. - <_> - 6 11 5 4 2. - <_> - - <_> - 9 5 2 3 -1. - <_> - 9 6 2 1 3. - <_> - - <_> - 6 8 8 3 -1. - <_> - 6 9 8 1 3. - <_> - - <_> - 2 2 7 6 -1. - <_> - 2 5 7 3 2. - <_> - - <_> - 2 1 14 4 -1. - <_> - 2 1 7 2 2. - <_> - 9 3 7 2 2. - <_> - - <_> - 11 14 1 3 -1. - <_> - 11 15 1 1 3. - <_> - - <_> - 6 15 8 2 -1. - <_> - 6 16 8 1 2. - <_> - - <_> - 8 14 1 3 -1. - <_> - 8 15 1 1 3. - <_> - - <_> - 8 11 2 8 -1. - <_> - 8 15 2 4 2. - <_> - - <_> - 6 15 8 2 -1. - <_> - 6 16 8 1 2. - <_> - - <_> - 7 16 8 3 -1. - <_> - 7 17 8 1 3. - <_> - - <_> - 0 16 2 2 -1. - <_> - 0 17 2 1 2. - <_> - - <_> - 1 16 8 4 -1. - <_> - 1 16 4 2 2. - <_> - 5 18 4 2 2. - <_> - - <_> - 2 9 16 3 -1. - <_> - 2 10 16 1 3. - <_> - - <_> - 13 11 2 4 -1. - <_> - 13 11 1 4 2. - <_> - - <_> - 0 13 16 6 -1. - <_> - 0 15 16 2 3. - <_> - - <_> - 5 11 2 4 -1. - <_> - 6 11 1 4 2. - <_> - - <_> - 18 2 2 18 -1. - <_> - 19 2 1 9 2. - <_> - 18 11 1 9 2. - <_> - - <_> - 19 7 1 9 -1. - <_> - 19 10 1 3 3. - <_> - - <_> - 0 2 2 18 -1. - <_> - 0 2 1 9 2. - <_> - 1 11 1 9 2. - <_> - - <_> - 0 7 1 9 -1. - <_> - 0 10 1 3 3. - <_> - - <_> - 14 12 2 2 -1. - <_> - 14 13 2 1 2. - <_> - - <_> - 11 14 2 3 -1. - <_> - 11 15 2 1 3. - <_> - - <_> - 7 8 6 2 -1. - <_> - 7 9 6 1 2. - <_> - - <_> - 7 12 4 6 -1. - <_> - 7 12 2 3 2. - <_> - 9 15 2 3 2. - <_> - - <_> - 8 13 5 3 -1. - <_> - 8 14 5 1 3. - <_> - - <_> - 12 14 2 2 -1. - <_> - 13 14 1 1 2. - <_> - 12 15 1 1 2. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 7 13 5 2 -1. - <_> - 7 14 5 1 2. - <_> - - <_> - 2 10 16 4 -1. - <_> - 10 10 8 2 2. - <_> - 2 12 8 2 2. - <_> - - <_> - 7 0 6 6 -1. - <_> - 9 0 2 6 3. - <_> - - <_> - 7 1 6 3 -1. - <_> - 7 2 6 1 3. - <_> - - <_> - 0 12 6 2 -1. - <_> - 0 13 6 1 2. - <_> - - <_> - 6 3 11 2 -1. - <_> - 6 4 11 1 2. - <_> - - <_> - 12 0 8 6 -1. - <_> - 16 0 4 3 2. - <_> - 12 3 4 3 2. - <_> - - <_> - 8 12 1 2 -1. - <_> - 8 13 1 1 2. - <_> - - <_> - 8 8 1 12 -1. - <_> - 8 12 1 4 3. - <_> - - <_> - 11 11 2 2 -1. - <_> - 12 11 1 1 2. - <_> - 11 12 1 1 2. - <_> - - <_> - 12 7 3 13 -1. - <_> - 13 7 1 13 3. - <_> - - <_> - 7 11 2 2 -1. - <_> - 7 11 1 1 2. - <_> - 8 12 1 1 2. - <_> - - <_> - 3 13 1 3 -1. - <_> - 3 14 1 1 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 11 11 2 1 -1. - <_> - 11 11 1 1 2. - <_> - - <_> - 1 10 5 9 -1. - <_> - 1 13 5 3 3. - <_> - - <_> - 4 8 6 4 -1. - <_> - 6 8 2 4 3. - <_> - - <_> - 13 12 1 4 -1. - <_> - 13 14 1 2 2. - <_> - - <_> - 11 3 4 14 -1. - <_> - 13 3 2 7 2. - <_> - 11 10 2 7 2. - <_> - - <_> - 6 12 1 4 -1. - <_> - 6 14 1 2 2. - <_> - - <_> - 5 3 4 14 -1. - <_> - 5 3 2 7 2. - <_> - 7 10 2 7 2. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 9 12 3 3 -1. - <_> - 9 13 3 1 3. - <_> - - <_> - 2 2 12 6 -1. - <_> - 2 2 6 3 2. - <_> - 8 5 6 3 2. - <_> - - <_> - 6 6 6 2 -1. - <_> - 9 6 3 2 2. - <_> - - <_> - 1 0 18 12 -1. - <_> - 7 0 6 12 3. - <_> - - <_> - 5 7 6 4 -1. - <_> - 5 7 3 2 2. - <_> - 8 9 3 2 2. - <_> - - <_> - 5 7 10 4 -1. - <_> - 5 9 10 2 2. - <_> - - <_> - 7 7 6 4 -1. - <_> - 9 7 2 4 3. - <_> - - <_> - 9 5 2 2 -1. - <_> - 9 6 2 1 2. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 6 17 8 3 -1. - <_> - 6 18 8 1 3. - <_> - - <_> - 9 17 6 2 -1. - <_> - 12 17 3 1 2. - <_> - 9 18 3 1 2. - <_> - - <_> - 4 12 2 2 -1. - <_> - 4 13 2 1 2. - <_> - - <_> - 3 12 9 2 -1. - <_> - 3 13 9 1 2. - <_> - - <_> - 8 3 6 1 -1. - <_> - 10 3 2 1 3. - <_> - - <_> - 9 3 4 6 -1. - <_> - 11 3 2 3 2. - <_> - 9 6 2 3 2. - <_> - - <_> - 0 3 6 5 -1. - <_> - 3 3 3 5 2. - <_> - - <_> - 2 0 2 18 -1. - <_> - 2 6 2 6 3. - <_> - - <_> - 14 2 4 9 -1. - <_> - 14 5 4 3 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 2 2 4 9 -1. - <_> - 2 5 4 3 3. - <_> - - <_> - 7 18 3 2 -1. - <_> - 8 18 1 2 3. - <_> - - <_> - 10 14 3 3 -1. - <_> - 10 15 3 1 3. - <_> - - <_> - 10 12 2 6 -1. - <_> - 10 15 2 3 2. - <_> - - <_> - 7 5 3 6 -1. - <_> - 7 7 3 2 3. - <_> - - <_> - 3 3 6 2 -1. - <_> - 3 4 6 1 2. - <_> - - <_> - 8 4 7 3 -1. - <_> - 8 5 7 1 3. - <_> - - <_> - 13 6 2 3 -1. - <_> - 13 7 2 1 3. - <_> - - <_> - 8 8 2 12 -1. - <_> - 8 12 2 4 3. - <_> - - <_> - 5 4 8 14 -1. - <_> - 5 4 4 7 2. - <_> - 9 11 4 7 2. - <_> - - <_> - 0 1 20 8 -1. - <_> - 10 1 10 4 2. - <_> - 0 5 10 4 2. - <_> - - <_> - 4 0 12 2 -1. - <_> - 4 1 12 1 2. - <_> - - <_> - 0 1 20 8 -1. - <_> - 0 1 10 4 2. - <_> - 10 5 10 4 2. - <_> - - <_> - 4 0 12 2 -1. - <_> - 4 1 12 1 2. - <_> - - <_> - 9 5 6 3 -1. - <_> - 9 5 3 3 2. - <_> - - <_> - 8 13 10 6 -1. - <_> - 8 15 10 2 3. - <_> - - <_> - 5 5 6 3 -1. - <_> - 8 5 3 3 2. - <_> - - <_> - 6 3 6 1 -1. - <_> - 8 3 2 1 3. - <_> - - <_> - 11 18 9 2 -1. - <_> - 14 18 3 2 3. - <_> - - <_> - 13 11 6 7 -1. - <_> - 13 11 3 7 2. - <_> - - <_> - 4 6 12 10 -1. - <_> - 4 6 6 5 2. - <_> - 10 11 6 5 2. - <_> - - <_> - 8 17 3 3 -1. - <_> - 9 17 1 3 3. - <_> - - <_> - 11 18 9 2 -1. - <_> - 14 18 3 2 3. - <_> - - <_> - 13 11 6 8 -1. - <_> - 13 11 3 8 2. - <_> - - <_> - 4 16 2 2 -1. - <_> - 4 17 2 1 2. - <_> - - <_> - 7 15 4 4 -1. - <_> - 7 17 4 2 2. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 13 6 2 3 -1. - <_> - 13 7 2 1 3. - <_> - - <_> - 5 11 6 1 -1. - <_> - 7 11 2 1 3. - <_> - - <_> - 7 10 3 1 -1. - <_> - 8 10 1 1 3. - <_> - - <_> - 0 12 20 4 -1. - <_> - 0 14 20 2 2. - <_> - - <_> - 10 2 3 2 -1. - <_> - 10 3 3 1 2. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 5 5 4 3 -1. - <_> - 5 6 4 1 3. - <_> - - <_> - 8 8 4 3 -1. - <_> - 8 9 4 1 3. - <_> - - <_> - 10 4 2 12 -1. - <_> - 10 8 2 4 3. - <_> - - <_> - 0 3 4 3 -1. - <_> - 0 4 4 1 3. - <_> - - <_> - 1 3 2 3 -1. - <_> - 1 4 2 1 3. - <_> - - <_> - 16 1 4 11 -1. - <_> - 16 1 2 11 2. - <_> - - <_> - 18 2 2 16 -1. - <_> - 19 2 1 8 2. - <_> - 18 10 1 8 2. - <_> - - <_> - 1 8 6 12 -1. - <_> - 3 8 2 12 3. - <_> - - <_> - 7 2 6 2 -1. - <_> - 7 2 3 1 2. - <_> - 10 3 3 1 2. - <_> - - <_> - 12 4 8 2 -1. - <_> - 16 4 4 1 2. - <_> - 12 5 4 1 2. - <_> - - <_> - 10 6 6 2 -1. - <_> - 12 6 2 2 3. - <_> - - <_> - 0 4 8 2 -1. - <_> - 0 4 4 1 2. - <_> - 4 5 4 1 2. - <_> - - <_> - 1 3 3 5 -1. - <_> - 2 3 1 5 3. - <_> - - <_> - 16 3 4 6 -1. - <_> - 16 5 4 2 3. - <_> - - <_> - 8 6 4 3 -1. - <_> - 8 7 4 1 3. - <_> - - <_> - 8 14 1 3 -1. - <_> - 8 15 1 1 3. - <_> - - <_> - 4 11 1 2 -1. - <_> - 4 12 1 1 2. - <_> - - <_> - 8 14 6 3 -1. - <_> - 8 15 6 1 3. - <_> - - <_> - 7 15 7 3 -1. - <_> - 7 16 7 1 3. - <_> - - <_> - 9 12 2 8 -1. - <_> - 9 16 2 4 2. - <_> - - <_> - 4 6 6 2 -1. - <_> - 6 6 2 2 3. - <_> - - <_> - 12 7 4 2 -1. - <_> - 12 8 4 1 2. - <_> - - <_> - 5 3 13 10 -1. - <_> - 5 8 13 5 2. - <_> - - <_> - 4 7 4 2 -1. - <_> - 4 8 4 1 2. - <_> - - <_> - 0 8 16 2 -1. - <_> - 0 8 8 1 2. - <_> - 8 9 8 1 2. - <_> - - <_> - 11 8 2 5 -1. - <_> - 11 8 1 5 2. - <_> - - <_> - 10 0 6 13 -1. - <_> - 10 0 3 13 2. - <_> - - <_> - 1 6 4 2 -1. - <_> - 1 7 4 1 2. - <_> - - <_> - 4 3 2 1 -1. - <_> - 5 3 1 1 2. - <_> - - <_> - 11 8 2 5 -1. - <_> - 11 8 1 5 2. - <_> - - <_> - 12 10 4 8 -1. - <_> - 12 10 2 8 2. - <_> - - <_> - 7 8 2 5 -1. - <_> - 8 8 1 5 2. - <_> - - <_> - 4 10 4 8 -1. - <_> - 6 10 2 8 2. - <_> - - <_> - 6 7 9 12 -1. - <_> - 9 7 3 12 3. - <_> - - <_> - 11 13 2 3 -1. - <_> - 11 13 1 3 2. - <_> - - <_> - 7 10 6 10 -1. - <_> - 10 10 3 10 2. - <_> - - <_> - 8 11 4 8 -1. - <_> - 8 11 2 4 2. - <_> - 10 15 2 4 2. - <_> - - <_> - 16 1 4 11 -1. - <_> - 16 1 2 11 2. - <_> - - <_> - 18 2 2 4 -1. - <_> - 18 2 1 4 2. - <_> - - <_> - 5 6 6 2 -1. - <_> - 5 6 3 1 2. - <_> - 8 7 3 1 2. - <_> - - <_> - 5 4 1 3 -1. - <_> - 5 5 1 1 3. - <_> - - <_> - 11 1 4 14 -1. - <_> - 11 1 2 14 2. - <_> - - <_> - 4 2 12 3 -1. - <_> - 8 2 4 3 3. - <_> - - <_> - 5 1 4 14 -1. - <_> - 7 1 2 14 2. - <_> - - <_> - 7 3 6 2 -1. - <_> - 9 3 2 2 3. - <_> - - <_> - 2 0 18 4 -1. - <_> - 8 0 6 4 3. - <_> - - <_> - 9 5 2 10 -1. - <_> - 9 10 2 5 2. - <_> - - <_> - 8 6 3 4 -1. - <_> - 9 6 1 4 3. - <_> - - <_> - 5 5 9 11 -1. - <_> - 8 5 3 11 3. - <_> - - <_> - 10 6 3 5 -1. - <_> - 11 6 1 5 3. - <_> - - <_> - 8 9 6 5 -1. - <_> - 8 9 3 5 2. - <_> - - <_> - 7 6 3 5 -1. - <_> - 8 6 1 5 3. - <_> - - <_> - 6 10 6 3 -1. - <_> - 9 10 3 3 2. - <_> - - <_> - 10 0 3 7 -1. - <_> - 11 0 1 7 3. - <_> - - <_> - 0 3 20 12 -1. - <_> - 0 9 20 6 2. - <_> - - <_> - 9 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 5 9 4 1 -1. - <_> - 7 9 2 1 2. - <_> - - <_> - 13 13 3 2 -1. - <_> - 13 14 3 1 2. - <_> - - <_> - 16 9 4 6 -1. - <_> - 16 9 2 6 2. - <_> - - <_> - 7 15 6 3 -1. - <_> - 7 16 6 1 3. - <_> - - <_> - 6 16 7 3 -1. - <_> - 6 17 7 1 3. - <_> - - <_> - 11 14 9 6 -1. - <_> - 11 16 9 2 3. - <_> - - <_> - 19 14 1 3 -1. - <_> - 19 15 1 1 3. - <_> - - <_> - 0 9 6 6 -1. - <_> - 3 9 3 6 2. - <_> - - <_> - 0 19 9 1 -1. - <_> - 3 19 3 1 3. - <_> - - <_> - 11 14 9 6 -1. - <_> - 11 16 9 2 3. - <_> - - <_> - 12 12 6 6 -1. - <_> - 12 14 6 2 3. - <_> - - <_> - 1 14 8 6 -1. - <_> - 1 16 8 2 3. - <_> - - <_> - 8 1 3 2 -1. - <_> - 9 1 1 2 3. - <_> - - <_> - 18 2 2 4 -1. - <_> - 18 2 1 4 2. - <_> - - <_> - 14 0 6 3 -1. - <_> - 16 0 2 3 3. - <_> - - <_> - 0 2 2 4 -1. - <_> - 1 2 1 4 2. - <_> - - <_> - 0 0 6 3 -1. - <_> - 2 0 2 3 3. - <_> - - <_> - 9 0 3 2 -1. - <_> - 10 0 1 2 3. - <_> - - <_> - 12 1 2 2 -1. - <_> - 12 1 1 2 2. - <_> - - <_> - 8 0 3 2 -1. - <_> - 9 0 1 2 3. - <_> - - <_> - 6 1 2 2 -1. - <_> - 7 1 1 2 2. - <_> - - <_> - 10 8 2 3 -1. - <_> - 10 9 2 1 3. - <_> - - <_> - 13 15 6 2 -1. - <_> - 13 16 6 1 2. - <_> - - <_> - 8 12 2 2 -1. - <_> - 8 12 1 1 2. - <_> - 9 13 1 1 2. - <_> - - <_> - 8 15 3 5 -1. - <_> - 9 15 1 5 3. - <_> - - <_> - 8 6 4 12 -1. - <_> - 8 12 4 6 2. - <_> - - <_> - 7 6 7 8 -1. - <_> - 7 10 7 4 2. - <_> - - <_> - 0 11 8 2 -1. - <_> - 0 12 8 1 2. - <_> - - <_> - 8 11 2 2 -1. - <_> - 8 11 1 1 2. - <_> - 9 12 1 1 2. - <_> - - <_> - 7 7 12 1 -1. - <_> - 11 7 4 1 3. - <_> - - <_> - 10 8 3 2 -1. - <_> - 11 8 1 2 3. - <_> - - <_> - 1 7 12 1 -1. - <_> - 5 7 4 1 3. - <_> - - <_> - 6 5 8 2 -1. - <_> - 6 5 4 1 2. - <_> - 10 6 4 1 2. - <_> - - <_> - 9 10 3 10 -1. - <_> - 10 10 1 10 3. - <_> - - <_> - 16 0 2 4 -1. - <_> - 16 0 1 4 2. - <_> - - <_> - 8 10 3 10 -1. - <_> - 9 10 1 10 3. - <_> - - <_> - 9 10 2 3 -1. - <_> - 9 11 2 1 3. - <_> - - <_> - 8 9 4 2 -1. - <_> - 10 9 2 1 2. - <_> - 8 10 2 1 2. - <_> - - <_> - 12 14 7 6 -1. - <_> - 12 16 7 2 3. - <_> - - <_> - 6 1 3 1 -1. - <_> - 7 1 1 1 3. - <_> - - <_> - 2 0 2 4 -1. - <_> - 3 0 1 4 2. - <_> - - <_> - 11 11 2 2 -1. - <_> - 12 11 1 1 2. - <_> - 11 12 1 1 2. - <_> - - <_> - 12 12 6 6 -1. - <_> - 12 14 6 2 3. - <_> - - <_> - 1 0 6 10 -1. - <_> - 1 0 3 5 2. - <_> - 4 5 3 5 2. - <_> - - <_> - 3 0 2 9 -1. - <_> - 3 3 2 3 3. - <_> - - <_> - 14 13 3 2 -1. - <_> - 14 14 3 1 2. - <_> - - <_> - 15 2 3 2 -1. - <_> - 15 3 3 1 2. - <_> - - <_> - 2 13 5 2 -1. - <_> - 2 14 5 1 2. - <_> - - <_> - 3 4 12 10 -1. - <_> - 3 4 6 5 2. - <_> - 9 9 6 5 2. - <_> - - <_> - 5 1 14 6 -1. - <_> - 5 3 14 2 3. - <_> - - <_> - 15 3 3 2 -1. - <_> - 15 4 3 1 2. - <_> - - <_> - 7 11 2 2 -1. - <_> - 7 11 1 1 2. - <_> - 8 12 1 1 2. - <_> - - <_> - 2 14 6 6 -1. - <_> - 2 16 6 2 3. - <_> - - <_> - 6 13 8 3 -1. - <_> - 6 14 8 1 3. - <_> - - <_> - 1 19 18 1 -1. - <_> - 7 19 6 1 3. - <_> - - <_> - 8 12 1 6 -1. - <_> - 8 15 1 3 2. - <_> - - <_> - 0 0 14 15 -1. - <_> - 0 5 14 5 3. - <_> - - <_> - 3 0 16 8 -1. - <_> - 3 4 16 4 2. - <_> - - <_> - 6 1 8 12 -1. - <_> - 6 7 8 6 2. - <_> - - <_> - 5 3 3 3 -1. - <_> - 6 3 1 3 3. - <_> - - <_> - 5 1 3 4 -1. - <_> - 6 1 1 4 3. - <_> - - <_> - 15 14 4 6 -1. - <_> - 17 14 2 3 2. - <_> - 15 17 2 3 2. - <_> - - <_> - 12 11 6 8 -1. - <_> - 15 11 3 4 2. - <_> - 12 15 3 4 2. - <_> - - <_> - 8 7 2 4 -1. - <_> - 9 7 1 4 2. - <_> - - <_> - 6 11 3 1 -1. - <_> - 7 11 1 1 3. - <_> - - <_> - 12 3 2 14 -1. - <_> - 12 3 1 14 2. - <_> - - <_> - 12 11 6 2 -1. - <_> - 15 11 3 1 2. - <_> - 12 12 3 1 2. - <_> - - <_> - 0 2 5 2 -1. - <_> - 0 3 5 1 2. - <_> - - <_> - 0 0 15 1 -1. - <_> - 5 0 5 1 3. - <_> - - <_> - 12 11 6 2 -1. - <_> - 15 11 3 1 2. - <_> - 12 12 3 1 2. - <_> - - <_> - 10 5 2 2 -1. - <_> - 10 5 1 2 2. - <_> - - <_> - 9 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 9 0 2 10 -1. - <_> - 9 0 1 5 2. - <_> - 10 5 1 5 2. - <_> - - <_> - 18 14 2 2 -1. - <_> - 18 15 2 1 2. - <_> - - <_> - 13 11 4 9 -1. - <_> - 13 14 4 3 3. - <_> - - <_> - 8 13 2 2 -1. - <_> - 8 13 1 1 2. - <_> - 9 14 1 1 2. - <_> - - <_> - 7 8 4 3 -1. - <_> - 7 9 4 1 3. - <_> - - <_> - 8 9 4 2 -1. - <_> - 8 10 4 1 2. - <_> - - <_> - 13 12 4 2 -1. - <_> - 13 13 4 1 2. - <_> - - <_> - 6 14 2 2 -1. - <_> - 6 14 1 1 2. - <_> - 7 15 1 1 2. - <_> - - <_> - 0 14 2 2 -1. - <_> - 0 15 2 1 2. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 7 9 10 6 -1. - <_> - 7 11 10 2 3. - <_> - - <_> - 2 9 12 4 -1. - <_> - 6 9 4 4 3. - <_> - - <_> - 7 9 6 11 -1. - <_> - 10 9 3 11 2. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 9 14 4 3 -1. - <_> - 9 15 4 1 3. - <_> - - <_> - 2 3 3 17 -1. - <_> - 3 3 1 17 3. - <_> - - <_> - 0 11 6 3 -1. - <_> - 0 12 6 1 3. - <_> - - <_> - 4 3 11 9 -1. - <_> - 4 6 11 3 3. - <_> - - <_> - 0 2 6 11 -1. - <_> - 3 2 3 11 2. - <_> - - <_> - 13 0 4 5 -1. - <_> - 13 0 2 5 2. - <_> - - <_> - 9 7 6 4 -1. - <_> - 12 7 3 2 2. - <_> - 9 9 3 2 2. - <_> - - <_> - 5 7 8 2 -1. - <_> - 9 7 4 2 2. - <_> - - <_> - 1 8 15 1 -1. - <_> - 6 8 5 1 3. - <_> - - <_> - 4 12 12 2 -1. - <_> - 8 12 4 2 3. - <_> - - <_> - 13 0 4 10 -1. - <_> - 15 0 2 5 2. - <_> - 13 5 2 5 2. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 3 9 6 2 -1. - <_> - 6 9 3 2 2. - <_> - - <_> - 8 17 4 3 -1. - <_> - 8 18 4 1 3. - <_> - - <_> - 8 3 9 2 -1. - <_> - 11 3 3 2 3. - <_> - - <_> - 3 3 9 2 -1. - <_> - 6 3 3 2 3. - <_> - - <_> - 5 0 9 14 -1. - <_> - 8 0 3 14 3. - <_> - - <_> - 7 3 7 10 -1. - <_> - 7 8 7 5 2. - <_> - - <_> - 4 8 13 3 -1. - <_> - 4 9 13 1 3. - <_> - - <_> - 3 12 14 4 -1. - <_> - 3 12 7 2 2. - <_> - 10 14 7 2 2. - <_> - - <_> - 8 12 4 2 -1. - <_> - 8 13 4 1 2. - <_> - - <_> - 6 10 9 8 -1. - <_> - 6 14 9 4 2. - <_> - - <_> - 9 12 2 8 -1. - <_> - 9 16 2 4 2. - <_> - - <_> - 8 12 3 3 -1. - <_> - 8 13 3 1 3. - <_> - - <_> - 5 5 4 10 -1. - <_> - 7 5 2 10 2. - <_> - - <_> - 14 15 3 3 -1. - <_> - 14 16 3 1 3. - <_> - - <_> - 4 6 13 3 -1. - <_> - 4 7 13 1 3. - <_> - - <_> - 3 15 3 3 -1. - <_> - 3 16 3 1 3. - <_> - - <_> - 3 9 4 2 -1. - <_> - 3 9 2 1 2. - <_> - 5 10 2 1 2. - <_> - - <_> - 0 11 20 4 -1. - <_> - 10 11 10 2 2. - <_> - 0 13 10 2 2. - <_> - - <_> - 8 15 4 3 -1. - <_> - 8 16 4 1 3. - <_> - - <_> - 0 11 20 4 -1. - <_> - 0 11 10 2 2. - <_> - 10 13 10 2 2. - <_> - - <_> - 8 15 4 3 -1. - <_> - 8 16 4 1 3. - <_> - - <_> - 10 13 1 6 -1. - <_> - 10 16 1 3 2. - <_> - - <_> - 2 1 18 2 -1. - <_> - 11 1 9 1 2. - <_> - 2 2 9 1 2. - <_> - - <_> - 8 14 3 3 -1. - <_> - 8 15 3 1 3. - <_> - - <_> - 4 1 6 1 -1. - <_> - 6 1 2 1 3. - <_> - - <_> - 11 13 1 3 -1. - <_> - 11 14 1 1 3. - <_> - - <_> - 13 5 2 12 -1. - <_> - 13 11 2 6 2. - <_> - - <_> - 1 14 18 6 -1. - <_> - 1 16 18 2 3. - <_> - - <_> - 8 13 1 3 -1. - <_> - 8 14 1 1 3. - <_> - - <_> - 7 13 6 3 -1. - <_> - 7 14 6 1 3. - <_> - - <_> - 9 10 3 2 -1. - <_> - 9 11 3 1 2. - <_> - - <_> - 5 1 3 3 -1. - <_> - 6 1 1 3 3. - <_> - - <_> - 5 5 6 5 -1. - <_> - 8 5 3 5 2. - <_> - - <_> - 7 5 6 14 -1. - <_> - 7 12 6 7 2. - <_> - - <_> - 7 16 6 2 -1. - <_> - 9 16 2 2 3. - <_> - - <_> - 0 2 2 12 -1. - <_> - 1 2 1 12 2. - <_> - - <_> - 1 0 5 3 -1. - <_> - 1 1 5 1 3. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 12 6 3 3 -1. - <_> - 12 7 3 1 3. - <_> - - <_> - 5 4 3 3 -1. - <_> - 5 5 3 1 3. - <_> - - <_> - 5 6 3 3 -1. - <_> - 5 7 3 1 3. - <_> - - <_> - 8 12 4 8 -1. - <_> - 10 12 2 4 2. - <_> - 8 16 2 4 2. - <_> - - <_> - 2 17 18 2 -1. - <_> - 11 17 9 1 2. - <_> - 2 18 9 1 2. - <_> - - <_> - 9 3 2 2 -1. - <_> - 9 4 2 1 2. - <_> - - <_> - 8 5 4 6 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 9 0 8 6 -1. - <_> - 9 2 8 2 3. - <_> - - <_> - 1 0 18 4 -1. - <_> - 7 0 6 4 3. - <_> - - <_> - 0 0 4 8 -1. - <_> - 2 0 2 8 2. - <_> - - <_> - 0 4 6 9 -1. - <_> - 2 4 2 9 3. - <_> - - <_> - 1 4 18 2 -1. - <_> - 7 4 6 2 3. - <_> - - <_> - 8 16 12 4 -1. - <_> - 14 16 6 2 2. - <_> - 8 18 6 2 2. - <_> - - <_> - 0 0 18 2 -1. - <_> - 0 0 9 1 2. - <_> - 9 1 9 1 2. - <_> - - <_> - 3 0 3 18 -1. - <_> - 4 0 1 18 3. - <_> - - <_> - 14 9 4 7 -1. - <_> - 14 9 2 7 2. - <_> - - <_> - 15 14 2 2 -1. - <_> - 15 15 2 1 2. - <_> - - <_> - 2 9 4 7 -1. - <_> - 4 9 2 7 2. - <_> - - <_> - 3 14 2 2 -1. - <_> - 3 15 2 1 2. - <_> - - <_> - 11 0 6 6 -1. - <_> - 11 2 6 2 3. - <_> - - <_> - 14 0 2 6 -1. - <_> - 15 0 1 3 2. - <_> - 14 3 1 3 2. - <_> - - <_> - 7 11 2 2 -1. - <_> - 7 11 1 1 2. - <_> - 8 12 1 1 2. - <_> - - <_> - 7 10 2 2 -1. - <_> - 8 10 1 2 2. - <_> - - <_> - 9 14 2 6 -1. - <_> - 9 17 2 3 2. - <_> - - <_> - 12 18 4 2 -1. - <_> - 12 19 4 1 2. - <_> - - <_> - 8 17 4 3 -1. - <_> - 8 18 4 1 3. - <_> - - <_> - 2 18 8 2 -1. - <_> - 2 19 8 1 2. - <_> - - <_> - 2 9 16 3 -1. - <_> - 2 10 16 1 3. - <_> - - <_> - 9 9 2 2 -1. - <_> - 9 10 2 1 2. - <_> - - <_> - 5 14 2 4 -1. - <_> - 5 14 1 2 2. - <_> - 6 16 1 2 2. - <_> - - <_> - 8 9 4 2 -1. - <_> - 8 9 2 1 2. - <_> - 10 10 2 1 2. - <_> - - <_> - 9 5 2 5 -1. - <_> - 9 5 1 5 2. - <_> - - <_> - 9 9 3 2 -1. - <_> - 10 9 1 2 3. - <_> - - <_> - 8 9 3 2 -1. - <_> - 9 9 1 2 3. - <_> - - <_> - 8 8 3 6 -1. - <_> - 9 8 1 6 3. - <_> - - <_> - 8 12 4 8 -1. - <_> - 10 12 2 4 2. - <_> - 8 16 2 4 2. - <_> - - <_> - 2 17 16 2 -1. - <_> - 10 17 8 1 2. - <_> - 2 18 8 1 2. - <_> - - <_> - 8 12 3 8 -1. - <_> - 9 12 1 8 3. - <_> - - <_> - 3 10 1 3 -1. - <_> - 3 11 1 1 3. - <_> - - <_> - 9 14 10 6 -1. - <_> - 14 14 5 3 2. - <_> - 9 17 5 3 2. - <_> - - <_> - 14 13 3 6 -1. - <_> - 14 15 3 2 3. - <_> - - <_> - 1 19 18 1 -1. - <_> - 7 19 6 1 3. - <_> - - <_> - 2 10 15 2 -1. - <_> - 7 10 5 2 3. - <_> - - <_> - 4 17 16 3 -1. - <_> - 4 18 16 1 3. - <_> - - <_> - 8 6 4 9 -1. - <_> - 8 9 4 3 3. - <_> - - <_> - 9 16 2 4 -1. - <_> - 9 16 1 2 2. - <_> - 10 18 1 2 2. - <_> - - <_> - 5 5 10 8 -1. - <_> - 5 9 10 4 2. - <_> - - <_> - 13 1 4 2 -1. - <_> - 13 1 2 2 2. - <_> - - <_> - 14 0 3 6 -1. - <_> - 14 2 3 2 3. - <_> - - <_> - 6 7 2 2 -1. - <_> - 6 7 1 1 2. - <_> - 7 8 1 1 2. - <_> - - <_> - 7 1 6 1 -1. - <_> - 9 1 2 1 3. - <_> - - <_> - 9 11 3 3 -1. - <_> - 9 12 3 1 3. - <_> - - <_> - 12 9 3 3 -1. - <_> - 13 9 1 3 3. - <_> - - <_> - 8 11 3 3 -1. - <_> - 8 12 3 1 3. - <_> - - <_> - 5 9 3 3 -1. - <_> - 6 9 1 3 3. - <_> - - <_> - 10 11 1 3 -1. - <_> - 10 12 1 1 3. - <_> - - <_> - 7 9 6 4 -1. - <_> - 10 9 3 2 2. - <_> - 7 11 3 2 2. - <_> - - <_> - 4 7 2 2 -1. - <_> - 4 7 1 1 2. - <_> - 5 8 1 1 2. - <_> - - <_> - 5 7 3 1 -1. - <_> - 6 7 1 1 3. - <_> - - <_> - 18 3 2 3 -1. - <_> - 18 4 2 1 3. - <_> - - <_> - 13 1 4 2 -1. - <_> - 13 1 2 2 2. - <_> - - <_> - 3 1 4 2 -1. - <_> - 5 1 2 2 2. - <_> - - <_> - 3 0 5 2 -1. - <_> - 3 1 5 1 2. - <_> - - <_> - 14 7 6 4 -1. - <_> - 17 7 3 2 2. - <_> - 14 9 3 2 2. - <_> - - <_> - 4 8 16 2 -1. - <_> - 4 9 16 1 2. - <_> - - <_> - 2 11 5 6 -1. - <_> - 2 13 5 2 3. - <_> - - <_> - 5 16 2 4 -1. - <_> - 5 16 1 2 2. - <_> - 6 18 1 2 2. - <_> - - <_> - 15 6 2 12 -1. - <_> - 16 6 1 6 2. - <_> - 15 12 1 6 2. - <_> - - <_> - 13 3 6 16 -1. - <_> - 15 3 2 16 3. - <_> - - <_> - 4 5 12 12 -1. - <_> - 4 5 6 6 2. - <_> - 10 11 6 6 2. - <_> - - <_> - 5 1 10 13 -1. - <_> - 10 1 5 13 2. - <_> - - <_> - 11 5 2 2 -1. - <_> - 12 5 1 1 2. - <_> - 11 6 1 1 2. - <_> - - <_> - 13 5 1 3 -1. - <_> - 13 6 1 1 3. - <_> - - <_> - 7 4 2 4 -1. - <_> - 7 4 1 2 2. - <_> - 8 6 1 2 2. - <_> - - <_> - 7 5 6 4 -1. - <_> - 10 5 3 4 2. - <_> - - <_> - 12 4 4 6 -1. - <_> - 14 4 2 3 2. - <_> - 12 7 2 3 2. - <_> - - <_> - 12 11 7 6 -1. - <_> - 12 13 7 2 3. - <_> - - <_> - 5 6 6 6 -1. - <_> - 7 6 2 6 3. - <_> - - <_> - 9 8 2 2 -1. - <_> - 9 9 2 1 2. - <_> - - <_> - 15 6 2 2 -1. - <_> - 16 6 1 1 2. - <_> - 15 7 1 1 2. - <_> - - <_> - 14 7 4 4 -1. - <_> - 16 7 2 2 2. - <_> - 14 9 2 2 2. - <_> - - <_> - 5 5 6 2 -1. - <_> - 7 5 2 2 3. - <_> - - <_> - 1 19 18 1 -1. - <_> - 7 19 6 1 3. - <_> - - <_> - 12 3 3 3 -1. - <_> - 12 4 3 1 3. - <_> - - <_> - 16 0 2 3 -1. - <_> - 16 1 2 1 3. - <_> - - <_> - 5 3 3 3 -1. - <_> - 5 4 3 1 3. - <_> - - <_> - 2 0 2 3 -1. - <_> - 2 1 2 1 3. - <_> - - <_> - 15 6 2 2 -1. - <_> - 16 6 1 1 2. - <_> - 15 7 1 1 2. - <_> - - <_> - 10 13 1 6 -1. - <_> - 10 16 1 3 2. - <_> - - <_> - 0 7 10 2 -1. - <_> - 0 7 5 1 2. - <_> - 5 8 5 1 2. - <_> - - <_> - 3 10 6 2 -1. - <_> - 3 11 6 1 2. - <_> - - <_> - 12 18 4 2 -1. - <_> - 12 19 4 1 2. - <_> - - <_> - 12 18 2 2 -1. - <_> - 13 18 1 1 2. - <_> - 12 19 1 1 2. - <_> - - <_> - 6 19 2 1 -1. - <_> - 7 19 1 1 2. - <_> - - <_> - 0 4 2 16 -1. - <_> - 0 4 1 8 2. - <_> - 1 12 1 8 2. - <_> - - <_> - 16 1 4 9 -1. - <_> - 16 4 4 3 3. - <_> - - <_> - 10 2 1 2 -1. - <_> - 10 3 1 1 2. - <_> - - <_> - 4 14 4 6 -1. - <_> - 4 14 2 3 2. - <_> - 6 17 2 3 2. - <_> - - <_> - 4 15 1 4 -1. - <_> - 4 17 1 2 2. - <_> - - <_> - 0 2 20 4 -1. - <_> - 10 2 10 2 2. - <_> - 0 4 10 2 2. - <_> - - <_> - 14 5 2 8 -1. - <_> - 14 9 2 4 2. - <_> - - <_> - 5 12 4 5 -1. - <_> - 7 12 2 5 2. - <_> - - <_> - 0 13 9 6 -1. - <_> - 0 15 9 2 3. - <_> - - <_> - 9 14 11 3 -1. - <_> - 9 15 11 1 3. - <_> - - <_> - 7 14 7 3 -1. - <_> - 7 15 7 1 3. - <_> - - <_> - 3 6 2 2 -1. - <_> - 3 6 1 1 2. - <_> - 4 7 1 1 2. - <_> - - <_> - 6 7 2 7 -1. - <_> - 7 7 1 7 2. - <_> - - <_> - 14 5 1 3 -1. - <_> - 14 6 1 1 3. - <_> - - <_> - 13 4 4 3 -1. - <_> - 13 5 4 1 3. - <_> - - <_> - 2 7 4 4 -1. - <_> - 2 7 2 2 2. - <_> - 4 9 2 2 2. - <_> - - <_> - 2 9 13 6 -1. - <_> - 2 12 13 3 2. - <_> - - <_> - 10 1 3 4 -1. - <_> - 11 1 1 4 3. - <_> - - <_> - 9 8 5 2 -1. - <_> - 9 9 5 1 2. - <_> - - <_> - 0 14 11 3 -1. - <_> - 0 15 11 1 3. - <_> - - <_> - 8 11 2 8 -1. - <_> - 8 15 2 4 2. - <_> - - <_> - 5 11 10 6 -1. - <_> - 5 14 10 3 2. - <_> - - <_> - 5 13 15 5 -1. - <_> - 10 13 5 5 3. - <_> - - <_> - 8 10 1 10 -1. - <_> - 8 15 1 5 2. - <_> - - <_> - 4 14 6 2 -1. - <_> - 6 14 2 2 3. - <_> - - <_> - 7 14 7 3 -1. - <_> - 7 15 7 1 3. - <_> - - <_> - 7 16 9 3 -1. - <_> - 7 17 9 1 3. - <_> - - <_> - 8 7 3 3 -1. - <_> - 8 8 3 1 3. - <_> - - <_> - 3 5 1 6 -1. - <_> - 3 8 1 3 2. - <_> - - <_> - 6 5 11 2 -1. - <_> - 6 6 11 1 2. - <_> - - <_> - 9 0 3 2 -1. - <_> - 10 0 1 2 3. - <_> - - <_> - 5 5 1 3 -1. - <_> - 5 6 1 1 3. - <_> - - <_> - 8 7 3 2 -1. - <_> - 9 7 1 2 3. - <_> - - <_> - 5 2 10 6 -1. - <_> - 10 2 5 3 2. - <_> - 5 5 5 3 2. - <_> - - <_> - 8 4 6 4 -1. - <_> - 8 4 3 4 2. - <_> - - <_> - 8 16 3 4 -1. - <_> - 9 16 1 4 3. - <_> - - <_> - 9 13 2 6 -1. - <_> - 9 13 1 3 2. - <_> - 10 16 1 3 2. - <_> - - <_> - 9 8 3 1 -1. - <_> - 10 8 1 1 3. - <_> - - <_> - 2 5 18 15 -1. - <_> - 2 10 18 5 3. - <_> - - <_> - 1 3 6 2 -1. - <_> - 4 3 3 2 2. - <_> - - <_> - 7 6 6 2 -1. - <_> - 9 6 2 2 3. - <_> - - <_> - 8 17 4 3 -1. - <_> - 8 18 4 1 3. - <_> - - <_> - 10 13 2 3 -1. - <_> - 10 14 2 1 3. - <_> - - <_> - 0 10 20 4 -1. - <_> - 0 12 20 2 2. - <_> - - <_> - 5 7 6 4 -1. - <_> - 5 7 3 2 2. - <_> - 8 9 3 2 2. - <_> - - <_> - 11 12 1 2 -1. - <_> - 11 13 1 1 2. - <_> - - <_> - 10 10 2 3 -1. - <_> - 10 11 2 1 3. - <_> - - <_> - 9 5 2 2 -1. - <_> - 9 6 2 1 2. - <_> - - <_> - 4 4 1 10 -1. - <_> - 4 9 1 5 2. - <_> - - <_> - 11 18 4 2 -1. - <_> - 11 18 2 2 2. - <_> - - <_> - 12 18 3 2 -1. - <_> - 12 19 3 1 2. - <_> - - <_> - 0 6 16 6 -1. - <_> - 0 6 8 3 2. - <_> - 8 9 8 3 2. - <_> - - <_> - 7 6 4 12 -1. - <_> - 7 12 4 6 2. - <_> - - <_> - 11 18 4 2 -1. - <_> - 11 18 2 2 2. - <_> - - <_> - 12 18 3 2 -1. - <_> - 12 19 3 1 2. - <_> - - <_> - 8 12 1 2 -1. - <_> - 8 13 1 1 2. - <_> - - <_> - 8 13 1 3 -1. - <_> - 8 14 1 1 3. - <_> - - <_> - 11 18 4 2 -1. - <_> - 11 18 2 2 2. - <_> - - <_> - 14 12 4 6 -1. - <_> - 14 12 2 6 2. - <_> - - <_> - 6 0 3 4 -1. - <_> - 7 0 1 4 3. - <_> - - <_> - 4 0 2 8 -1. - <_> - 4 0 1 4 2. - <_> - 5 4 1 4 2. - <_> - - <_> - 11 17 9 3 -1. - <_> - 14 17 3 3 3. - <_> - - <_> - 16 2 4 5 -1. - <_> - 16 2 2 5 2. - <_> - - <_> - 0 2 5 9 -1. - <_> - 0 5 5 3 3. - <_> - - <_> - 7 2 3 2 -1. - <_> - 8 2 1 2 3. - <_> - - <_> - 11 17 9 3 -1. - <_> - 14 17 3 3 3. - <_> - - <_> - 16 2 4 5 -1. - <_> - 16 2 2 5 2. - <_> - - <_> - 0 17 9 3 -1. - <_> - 3 17 3 3 3. - <_> - - <_> - 0 2 4 5 -1. - <_> - 2 2 2 5 2. - <_> - - <_> - 5 11 10 9 -1. - <_> - 5 14 10 3 3. - <_> - - <_> - 9 6 3 3 -1. - <_> - 9 7 3 1 3. - <_> - - <_> - 3 17 5 3 -1. - <_> - 3 18 5 1 3. - <_> - - <_> - 7 5 4 7 -1. - <_> - 9 5 2 7 2. - <_> - - <_> - 9 8 2 5 -1. - <_> - 9 8 1 5 2. - <_> - - <_> - 2 2 18 2 -1. - <_> - 2 3 18 1 2. - <_> - - <_> - 2 8 15 6 -1. - <_> - 7 8 5 6 3. - <_> - - <_> - 9 8 2 5 -1. - <_> - 10 8 1 5 2. - <_> - - <_> - 12 10 4 6 -1. - <_> - 12 12 4 2 3. - <_> - - <_> - 14 3 6 2 -1. - <_> - 14 4 6 1 2. - <_> - - <_> - 5 5 2 3 -1. - <_> - 5 6 2 1 3. - <_> - - <_> - 4 6 3 3 -1. - <_> - 4 7 3 1 3. - <_> - - <_> - 14 12 3 3 -1. - <_> - 14 13 3 1 3. - <_> - - <_> - 6 12 11 3 -1. - <_> - 6 13 11 1 3. - <_> - - <_> - 1 2 3 6 -1. - <_> - 1 4 3 2 3. - <_> - - <_> - 1 0 4 7 -1. - <_> - 3 0 2 7 2. - <_> - - <_> - 9 8 3 4 -1. - <_> - 10 8 1 4 3. - <_> - - <_> - 10 9 2 2 -1. - <_> - 10 10 2 1 2. - <_> - - <_> - 8 8 3 4 -1. - <_> - 9 8 1 4 3. - <_> - - <_> - 4 4 10 10 -1. - <_> - 4 9 10 5 2. - <_> - - <_> - 9 10 3 2 -1. - <_> - 10 10 1 2 3. - <_> - - <_> - 9 10 3 2 -1. - <_> - 9 11 3 1 2. - <_> - - <_> - 8 10 3 2 -1. - <_> - 9 10 1 2 3. - <_> - - <_> - 2 4 14 12 -1. - <_> - 2 4 7 6 2. - <_> - 9 10 7 6 2. - <_> - - <_> - 10 12 1 6 -1. - <_> - 10 15 1 3 2. - <_> - - <_> - 7 3 8 16 -1. - <_> - 11 3 4 8 2. - <_> - 7 11 4 8 2. - <_> - - <_> - 5 6 8 10 -1. - <_> - 5 6 4 5 2. - <_> - 9 11 4 5 2. - <_> - - <_> - 6 2 8 8 -1. - <_> - 6 2 4 4 2. - <_> - 10 6 4 4 2. - <_> - - <_> - 10 5 4 2 -1. - <_> - 12 5 2 1 2. - <_> - 10 6 2 1 2. - <_> - - <_> - 12 4 3 3 -1. - <_> - 12 5 3 1 3. - <_> - - <_> - 4 19 12 1 -1. - <_> - 8 19 4 1 3. - <_> - - <_> - 8 2 3 1 -1. - <_> - 9 2 1 1 3. - <_> - - <_> - 13 17 4 3 -1. - <_> - 13 18 4 1 3. - <_> - - <_> - 7 14 6 3 -1. - <_> - 7 15 6 1 3. - <_> - - <_> - 9 14 2 3 -1. - <_> - 9 15 2 1 3. - <_> - - <_> - 7 15 6 3 -1. - <_> - 7 16 6 1 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 14 12 2 3 -1. - <_> - 14 13 2 1 3. - <_> - - <_> - 4 10 4 6 -1. - <_> - 4 12 4 2 3. - <_> - - <_> - 4 13 3 2 -1. - <_> - 4 14 3 1 2. - <_> - - <_> - 9 16 2 3 -1. - <_> - 9 17 2 1 3. - <_> - - <_> - 10 18 3 2 -1. - <_> - 11 18 1 2 3. - <_> - - <_> - 7 18 3 2 -1. - <_> - 8 18 1 2 3. - <_> - - <_> - 1 10 4 2 -1. - <_> - 1 11 4 1 2. - <_> - - <_> - 12 4 6 3 -1. - <_> - 12 5 6 1 3. - <_> - - <_> - 14 4 1 3 -1. - <_> - 14 5 1 1 3. - <_> - - <_> - 2 4 6 3 -1. - <_> - 2 5 6 1 3. - <_> - - <_> - 5 4 1 3 -1. - <_> - 5 5 1 1 3. - <_> - - <_> - 14 12 3 3 -1. - <_> - 14 13 3 1 3. - <_> - - <_> - 15 12 2 3 -1. - <_> - 15 13 2 1 3. - <_> - - <_> - 3 16 4 3 -1. - <_> - 3 17 4 1 3. - <_> - - <_> - 8 0 4 2 -1. - <_> - 8 1 4 1 2. - <_> - - <_> - 0 0 20 1 -1. - <_> - 0 0 10 1 2. - <_> - - <_> - 9 7 3 4 -1. - <_> - 10 7 1 4 3. - <_> - - <_> - 0 0 20 1 -1. - <_> - 10 0 10 1 2. - <_> - - <_> - 8 7 3 4 -1. - <_> - 9 7 1 4 3. - <_> - - <_> - 1 6 19 3 -1. - <_> - 1 7 19 1 3. - <_> - - <_> - 12 7 4 2 -1. - <_> - 12 8 4 1 2. - <_> - - <_> - 7 8 3 3 -1. - <_> - 7 9 3 1 3. - <_> - - <_> - 7 7 3 3 -1. - <_> - 8 7 1 3 3. - <_> - - <_> - 2 9 16 3 -1. - <_> - 2 10 16 1 3. - <_> - - <_> - 9 4 2 12 -1. - <_> - 9 8 2 4 3. - <_> - - <_> - 7 3 2 5 -1. - <_> - 8 3 1 5 2. - <_> - - <_> - 9 7 2 3 -1. - <_> - 9 8 2 1 3. - <_> - - <_> - 9 14 4 3 -1. - <_> - 9 15 4 1 3. - <_> - - <_> - 7 8 6 4 -1. - <_> - 10 8 3 2 2. - <_> - 7 10 3 2 2. - <_> - - <_> - 9 7 2 2 -1. - <_> - 10 7 1 2 2. - <_> - - <_> - 5 5 6 6 -1. - <_> - 7 5 2 6 3. - <_> - - <_> - 9 1 3 6 -1. - <_> - 10 1 1 6 3. - <_> - - <_> - 4 5 12 2 -1. - <_> - 8 5 4 2 3. - <_> - - <_> - 4 2 6 4 -1. - <_> - 6 2 2 4 3. - <_> - - <_> - 4 7 8 2 -1. - <_> - 4 8 8 1 2. - <_> - - <_> - 3 6 14 6 -1. - <_> - 10 6 7 3 2. - <_> - 3 9 7 3 2. - <_> - - <_> - 3 6 14 3 -1. - <_> - 3 6 7 3 2. - <_> - - <_> - 0 5 2 2 -1. - <_> - 0 6 2 1 2. - <_> - - <_> - 8 13 4 3 -1. - <_> - 8 14 4 1 3. - <_> - - <_> - 13 0 3 20 -1. - <_> - 14 0 1 20 3. - <_> - - <_> - 10 8 10 3 -1. - <_> - 10 9 10 1 3. - <_> - - <_> - 4 0 3 20 -1. - <_> - 5 0 1 20 3. - <_> - - <_> - 0 8 10 3 -1. - <_> - 0 9 10 1 3. - <_> - - <_> - 12 5 3 4 -1. - <_> - 13 5 1 4 3. - <_> - - <_> - 6 7 12 4 -1. - <_> - 10 7 4 4 3. - <_> - - <_> - 1 14 6 6 -1. - <_> - 1 14 3 3 2. - <_> - 4 17 3 3 2. - <_> - - <_> - 1 17 6 2 -1. - <_> - 1 18 6 1 2. - <_> - - <_> - 14 8 6 12 -1. - <_> - 17 8 3 6 2. - <_> - 14 14 3 6 2. - <_> - - <_> - 18 5 2 2 -1. - <_> - 18 6 2 1 2. - <_> - - <_> - 3 16 4 2 -1. - <_> - 3 16 2 1 2. - <_> - 5 17 2 1 2. - <_> - - <_> - 2 16 6 2 -1. - <_> - 4 16 2 2 3. - <_> - - <_> - 14 8 6 12 -1. - <_> - 17 8 3 6 2. - <_> - 14 14 3 6 2. - <_> - - <_> - 18 5 2 2 -1. - <_> - 18 6 2 1 2. - <_> - - <_> - 5 16 9 2 -1. - <_> - 8 16 3 2 3. - <_> - - <_> - 3 14 6 6 -1. - <_> - 3 14 3 3 2. - <_> - 6 17 3 3 2. - <_> - - <_> - 14 8 6 12 -1. - <_> - 17 8 3 6 2. - <_> - 14 14 3 6 2. - <_> - - <_> - 11 7 2 12 -1. - <_> - 11 11 2 4 3. - <_> - - <_> - 0 8 6 12 -1. - <_> - 0 8 3 6 2. - <_> - 3 14 3 6 2. - <_> - - <_> - 7 7 2 12 -1. - <_> - 7 11 2 4 3. - <_> - - <_> - 14 12 1 2 -1. - <_> - 14 13 1 1 2. - <_> - - <_> - 12 13 8 1 -1. - <_> - 12 13 4 1 2. - <_> - - <_> - 0 3 16 6 -1. - <_> - 0 6 16 3 2. - <_> - - <_> - 1 4 8 2 -1. - <_> - 1 4 4 1 2. - <_> - 5 5 4 1 2. - <_> - - <_> - 14 12 1 2 -1. - <_> - 14 13 1 1 2. - <_> - - <_> - 15 12 2 3 -1. - <_> - 15 13 2 1 3. - <_> - - <_> - 8 16 3 3 -1. - <_> - 8 17 3 1 3. - <_> - - <_> - 5 12 1 2 -1. - <_> - 5 13 1 1 2. - <_> - - <_> - 13 4 3 15 -1. - <_> - 14 4 1 15 3. - <_> - - <_> - 17 3 2 6 -1. - <_> - 18 3 1 3 2. - <_> - 17 6 1 3 2. - <_> - - <_> - 4 4 3 15 -1. - <_> - 5 4 1 15 3. - <_> - - <_> - 1 3 2 6 -1. - <_> - 1 3 1 3 2. - <_> - 2 6 1 3 2. - <_> - - <_> - 7 15 12 4 -1. - <_> - 7 17 12 2 2. - <_> - - <_> - 1 0 19 3 -1. - <_> - 1 1 19 1 3. - <_> - - <_> - 3 17 10 2 -1. - <_> - 3 17 5 1 2. - <_> - 8 18 5 1 2. - <_> - - <_> - 2 5 10 15 -1. - <_> - 2 10 10 5 3. - <_> - - <_> - 13 8 3 4 -1. - <_> - 13 10 3 2 2. - <_> - - <_> - 19 13 1 2 -1. - <_> - 19 14 1 1 2. - <_> - - <_> - 4 8 3 4 -1. - <_> - 4 10 3 2 2. - <_> - - <_> - 0 13 1 2 -1. - <_> - 0 14 1 1 2. - <_> - - <_> - 12 7 2 12 -1. - <_> - 12 13 2 6 2. - <_> - - <_> - 14 7 2 2 -1. - <_> - 15 7 1 1 2. - <_> - 14 8 1 1 2. - <_> - - <_> - 5 3 8 2 -1. - <_> - 5 4 8 1 2. - <_> - - <_> - 0 2 2 6 -1. - <_> - 0 4 2 2 3. - <_> - - <_> - 18 2 2 12 -1. - <_> - 19 2 1 6 2. - <_> - 18 8 1 6 2. - <_> - - <_> - 18 1 1 2 -1. - <_> - 18 2 1 1 2. - <_> - - <_> - 0 2 2 12 -1. - <_> - 0 2 1 6 2. - <_> - 1 8 1 6 2. - <_> - - <_> - 1 1 1 2 -1. - <_> - 1 2 1 1 2. - <_> - - <_> - 16 4 4 14 -1. - <_> - 18 4 2 7 2. - <_> - 16 11 2 7 2. - <_> - - <_> - 10 14 1 6 -1. - <_> - 10 17 1 3 2. - <_> - - <_> - 0 4 4 14 -1. - <_> - 0 4 2 7 2. - <_> - 2 11 2 7 2. - <_> - - <_> - 9 14 1 6 -1. - <_> - 9 17 1 3 2. - <_> - - <_> - 9 14 4 3 -1. - <_> - 9 15 4 1 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 8 7 4 2 3. - <_> - - <_> - 0 8 4 3 -1. - <_> - 0 9 4 1 3. - <_> - - <_> - 4 7 2 2 -1. - <_> - 4 7 1 1 2. - <_> - 5 8 1 1 2. - <_> - - <_> - 13 7 2 1 -1. - <_> - 13 7 1 1 2. - <_> - - <_> - 11 4 4 5 -1. - <_> - 11 4 2 5 2. - <_> - - <_> - 4 8 3 3 -1. - <_> - 5 8 1 3 3. - <_> - - <_> - 0 3 8 1 -1. - <_> - 4 3 4 1 2. - <_> - - <_> - 13 7 2 1 -1. - <_> - 13 7 1 1 2. - <_> - - <_> - 14 7 3 2 -1. - <_> - 15 7 1 2 3. - <_> - - <_> - 5 7 2 1 -1. - <_> - 6 7 1 1 2. - <_> - - <_> - 3 7 3 2 -1. - <_> - 4 7 1 2 3. - <_> - - <_> - 18 5 2 2 -1. - <_> - 18 6 2 1 2. - <_> - - <_> - 12 14 2 2 -1. - <_> - 13 14 1 1 2. - <_> - 12 15 1 1 2. - <_> - - <_> - 0 5 2 2 -1. - <_> - 0 6 2 1 2. - <_> - - <_> - 6 14 2 2 -1. - <_> - 6 14 1 1 2. - <_> - 7 15 1 1 2. - <_> - - <_> - 7 12 6 5 -1. - <_> - 9 12 2 5 3. - <_> - - <_> - 12 17 5 2 -1. - <_> - 12 18 5 1 2. - <_> - - <_> - 1 11 6 3 -1. - <_> - 4 11 3 3 2. - <_> - - <_> - 1 9 6 3 -1. - <_> - 4 9 3 3 2. - <_> - - <_> - 12 7 2 12 -1. - <_> - 12 13 2 6 2. - <_> - - <_> - 8 7 5 3 -1. - <_> - 8 8 5 1 3. - <_> - - <_> - 6 7 2 12 -1. - <_> - 6 13 2 6 2. - <_> - - <_> - 1 2 9 18 -1. - <_> - 4 2 3 18 3. - <_> - - <_> - 12 17 5 2 -1. - <_> - 12 18 5 1 2. - <_> - - <_> - 4 7 12 2 -1. - <_> - 4 7 6 2 2. - <_> - - <_> - 6 7 6 1 -1. - <_> - 8 7 2 1 3. - <_> - - <_> - 7 3 3 2 -1. - <_> - 8 3 1 2 3. - <_> - - <_> - 9 4 3 1 -1. - <_> - 10 4 1 1 3. - <_> - - <_> - 11 11 3 1 -1. - <_> - 12 11 1 1 3. - <_> - - <_> - 8 4 3 1 -1. - <_> - 9 4 1 1 3. - <_> - - <_> - 6 11 3 1 -1. - <_> - 7 11 1 1 3. - <_> - - <_> - 12 13 6 6 -1. - <_> - 12 15 6 2 3. - <_> - - <_> - 14 13 1 6 -1. - <_> - 14 15 1 2 3. - <_> - - <_> - 2 13 6 6 -1. - <_> - 2 15 6 2 3. - <_> - - <_> - 1 5 18 1 -1. - <_> - 7 5 6 1 3. - <_> - - <_> - 4 7 12 2 -1. - <_> - 10 7 6 1 2. - <_> - 4 8 6 1 2. - <_> - - <_> - 6 1 8 10 -1. - <_> - 10 1 4 5 2. - <_> - 6 6 4 5 2. - <_> - - <_> - 3 13 4 3 -1. - <_> - 3 14 4 1 3. - <_> - - <_> - 6 13 4 3 -1. - <_> - 6 14 4 1 3. - <_> - - <_> - 9 14 4 3 -1. - <_> - 9 15 4 1 3. - <_> - - <_> - 12 9 2 3 -1. - <_> - 12 10 2 1 3. - <_> - - <_> - 7 14 4 3 -1. - <_> - 7 15 4 1 3. - <_> - - <_> - 9 0 2 1 -1. - <_> - 10 0 1 1 2. - <_> - - <_> - 5 0 10 5 -1. - <_> - 5 0 5 5 2. - <_> - - <_> - 6 6 8 7 -1. - <_> - 6 6 4 7 2. - <_> - - <_> - 5 0 10 5 -1. - <_> - 10 0 5 5 2. - <_> - - <_> - 6 6 8 7 -1. - <_> - 10 6 4 7 2. - <_> - - <_> - 5 9 10 8 -1. - <_> - 10 9 5 4 2. - <_> - 5 13 5 4 2. - <_> - - <_> - 10 0 4 10 -1. - <_> - 12 0 2 5 2. - <_> - 10 5 2 5 2. - <_> - - <_> - 1 4 8 3 -1. - <_> - 1 5 8 1 3. - <_> - - <_> - 4 4 8 3 -1. - <_> - 4 5 8 1 3. - <_> - - <_> - 9 7 4 3 -1. - <_> - 9 8 4 1 3. - <_> - - <_> - 12 8 3 12 -1. - <_> - 12 14 3 6 2. - <_> - - <_> - 7 7 4 3 -1. - <_> - 7 8 4 1 3. - <_> - - <_> - 5 8 3 12 -1. - <_> - 5 14 3 6 2. - <_> - - <_> - 10 0 7 6 -1. - <_> - 10 2 7 2 3. - <_> - - <_> - 2 1 18 1 -1. - <_> - 8 1 6 1 3. - <_> - - <_> - 5 0 3 8 -1. - <_> - 6 0 1 8 3. - <_> - - <_> - 4 7 4 2 -1. - <_> - 4 8 4 1 2. - diff --git a/data/map/aire_0.dat b/data/map/aire_0.dat new file mode 100644 index 000000000..08d5457c3 --- /dev/null +++ b/data/map/aire_0.dat @@ -0,0 +1,6 @@ +2 +5.97270011901855,45.5285783467813 +5.976382791996,45.5285783467813 +5.976382791996,45.5258330159471 +5.97270011901855,45.5258330159471 +(SASPlanet) diff --git a/data/map/aire_0.jpg b/data/map/aire_0.jpg new file mode 100644 index 000000000..6224da23a Binary files /dev/null and b/data/map/aire_0.jpg differ diff --git a/data/map/geo_bind.ini b/data/map/geo_bind.ini new file mode 100644 index 000000000..d3b448e1b --- /dev/null +++ b/data/map/geo_bind.ini @@ -0,0 +1,57 @@ +[points] + +# Points pair select manual: from maps.google [lat, lon] <-> [x, y] in pixels from imge editor (gimp, paint etc) +# Pixel coordinates are in [0, 1] - independed from frame resolution +lat0 = 45.526646 +lon0 = 5.974535 +px_x0 = 0.328125 +px_y0 = 0.483333333333333 + +lat1 = 45.527566 +lon1 = 5.973849 +px_x1 = 0.39765625 +px_y1 = 0.393055555555556 + +lat2 = 45.527904 +lon2 = 5.974135 +px_x2 = 0.57109375 +px_y2 = 0.390277777777778 + +lat3 = 45.526867 +lon3 = 5.974826 +px_x3 = 0.65625 +px_y3 = 0.476388888888889 + + +[lines] + +# Line coordinates are in [0, 1] - independed from frame resolution +line0_x0 = 0.1 +line0_y0 = 0.7 +line0_x1 = 0.47 +line0_y1 = 0.7 + +line1_x0 = 0.52 +line1_y0 = 0.6 +line1_x1 = 0.8 +line1_y1 = 0.6 + + +[map] + +# optional: map exported from SASPlanet with *.dat file in Merkator projection + +file = map\aire_0.jpg + +left_top_lat = 45.5285783467813 +left_top_lon = 5.97270011901855 + +right_top_lat = 45.5285783467813 +right_top_lon = 5.976382791996 + +right_bottom_lat = 45.5258330159471 +right_bottom_lon = 5.976382791996 + +left_bottom_lat = 45.5258330159471 +left_bottom_lon = 5.97270011901855 + diff --git a/data/map/manual/france_bind.jpg b/data/map/manual/france_bind.jpg new file mode 100644 index 000000000..bbeca503d Binary files /dev/null and b/data/map/manual/france_bind.jpg differ diff --git a/data/map/manual/paint1.png b/data/map/manual/paint1.png new file mode 100644 index 000000000..d7d95172c Binary files /dev/null and b/data/map/manual/paint1.png differ diff --git a/data/map/manual/photo_2023-08-10_09-44-05.jpg b/data/map/manual/photo_2023-08-10_09-44-05.jpg new file mode 100644 index 000000000..1801574bb Binary files /dev/null and b/data/map/manual/photo_2023-08-10_09-44-05.jpg differ diff --git a/data/map/manual/text_editor1.png b/data/map/manual/text_editor1.png new file mode 100644 index 000000000..86186616e Binary files /dev/null and b/data/map/manual/text_editor1.png differ diff --git a/data/map/run_cars.bat b/data/map/run_cars.bat new file mode 100644 index 000000000..cb696bd85 --- /dev/null +++ b/data/map/run_cars.bat @@ -0,0 +1,3 @@ + +MultitargetTracker.exe map/Relaxing_highway_traffic.mp4 -e=7 --geo_bind=map/geo_bind.ini --settings=settings.ini + diff --git a/data/map/run_cars.sh b/data/map/run_cars.sh new file mode 100644 index 000000000..f48fbe52a --- /dev/null +++ b/data/map/run_cars.sh @@ -0,0 +1,2 @@ +./MultitargetTracker map/Relaxing_highway_traffic.mp4 -e=7 --geo_bind=map/geo_bind.ini --settings=settings.ini + diff --git a/data/reid/osnet_x0_25_msmt17.onnx b/data/reid/osnet_x0_25_msmt17.onnx new file mode 100644 index 000000000..43a689e71 Binary files /dev/null and b/data/reid/osnet_x0_25_msmt17.onnx differ diff --git a/data/settings.ini b/data/settings.ini index 3deeff20c..d91395d5d 100644 --- a/data/settings.ini +++ b/data/settings.ini @@ -1,3 +1,55 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 6 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = ../../data/yolov4.weights +nn_config = ../../data/yolov4.cfg +class_names = ../../data/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = -1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV4 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precison = FP32 + + [tracking] #----------------------------- @@ -69,12 +121,12 @@ min_area_radius_pix = -1 min_area_radius_k = 0.8 #----------------------------- -# If the object do not assignment more than this frames then it will be removed -max_skip_frames = 50 +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 #----------------------------- # The maximum trajectory length -max_trace_len = 50 +max_trace_len = 2 #----------------------------- # Detection abandoned objects @@ -84,4 +136,4 @@ min_static_time = 5 # After this time (in seconds) the abandoned object will be removed max_static_time = 25 # Speed in pixels. If speed of object is more that this value than object is non static -max_speed_for_static = 10 \ No newline at end of file +max_speed_for_static = 10 diff --git a/data/settings_coco.ini b/data/settings_coco.ini new file mode 100644 index 000000000..b016a5554 --- /dev/null +++ b/data/settings_coco.ini @@ -0,0 +1,154 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +# nn_weights = data/coco/yolov7.onnx +# nn_config = data/coco/yolov7.onnx + +# nn_weights = data/coco/yolov6s.onnx +# nn_config = data/coco/yolov6s.onnx + +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/yolov4.weights +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/yolov4.cfg + +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco.names + +#----------------------------- +confidence_threshold = 0.2 + +max_crop_ratio = -1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV4 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP32 + +#----------------------------- +# Detect only set of types, ";" +white_list = + +#----------------------------- +# For TensorRT optimization, bytes +video_memory = 0; + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_deimv2.ini b/data/settings_deimv2.ini new file mode 100644 index 000000000..e691d9ac1 --- /dev/null +++ b/data/settings_deimv2.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/deimv2_dinov3_m_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/deimv2_dinov3_m_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.1 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_dfine.ini b/data/settings_dfine.ini new file mode 100644 index 000000000..0ad660cf1 --- /dev/null +++ b/data/settings_dfine.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_m_obj2coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_m_obj2coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_dfine_seg.ini b/data/settings_dfine_seg.ini new file mode 100644 index 000000000..e82c504ae --- /dev/null +++ b/data/settings_dfine_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/dfine_seg_s_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE_IS + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_rfdetr.ini b/data/settings_rfdetr.ini new file mode 100644 index 000000000..10037acdd --- /dev/null +++ b/data/settings_rfdetr.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_sim_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_sim_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco_91.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = RFDETR + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_rfdetr_seg.ini b/data/settings_rfdetr_seg.ini new file mode 100644 index 000000000..f1cc5da66 --- /dev/null +++ b/data/settings_rfdetr_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/rfdetr_seg_coco.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco_91.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = RFDETR_IS + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_rtdetrv4.ini b/data/settings_rtdetrv4.ini new file mode 100644 index 000000000..4734cf9fb --- /dev/null +++ b/data/settings_rtdetrv4.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/RTv4-M-hgnet.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = DFINE + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov10.ini b/data/settings_yolov10.ini new file mode 100644 index 000000000..a7db951b1 --- /dev/null +++ b/data/settings_yolov10.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov10s.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov10s.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV10 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_obb.ini b/data/settings_yolov11_obb.ini new file mode 100644 index 000000000..36641f010 --- /dev/null +++ b/data/settings_yolov11_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo11s-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo11s-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11_seg.ini b/data/settings_yolov11_seg.ini new file mode 100644 index 000000000..a296bb651 --- /dev/null +++ b/data/settings_yolov11_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11s-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11m.ini b/data/settings_yolov11m.ini new file mode 100644 index 000000000..e2d29ee39 --- /dev/null +++ b/data/settings_yolov11m.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11x.ini b/data/settings_yolov11x.ini new file mode 100644 index 000000000..d8043afa7 --- /dev/null +++ b/data/settings_yolov11x.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11x.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo11x.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov11x_obb.ini b/data/settings_yolov11x_obb.ini new file mode 100644 index 000000000..bb65f3b5b --- /dev/null +++ b/data/settings_yolov11x_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo11x-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo11x-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV11_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov12m.ini b/data/settings_yolov12m.ini new file mode 100644 index 000000000..ca6d381b7 --- /dev/null +++ b/data/settings_yolov12m.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_INFERENCE_ENGINE + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov12m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov12m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV12 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov12x.ini b/data/settings_yolov12x.ini new file mode 100644 index 000000000..6f1e7be6a --- /dev/null +++ b/data/settings_yolov12x.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov12x.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov12x.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV12 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov13s.ini b/data/settings_yolov13s.ini new file mode 100644 index 000000000..cfd75f5ba --- /dev/null +++ b/data/settings_yolov13s.ini @@ -0,0 +1,148 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov13s.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov13s.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV13 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# UniversalTracker = 0 +# ByteTrack = 1 +tracker_type = 1 + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackCSRT = 2 +# TrackDaSiamRPN = 3 +# TrackNano = 4 +# TrackVit = 5 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchLAPJV = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 + +#----------------------------- +# Settings only for m_tracker = tracking::ByteTrack +bytetrack_track_buffer = 30 +bytetrack_track_thresh = 0.5 +bytetrack_high_thresh = 0.5 +bytetrack_match_thresh = 0.8 diff --git a/data/settings_yolov26m.ini b/data/settings_yolov26m.ini new file mode 100644 index 000000000..23dd24f48 --- /dev/null +++ b/data/settings_yolov26m.ini @@ -0,0 +1,142 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_INFERENCE_ENGINE + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26 + +#----------------------------- +# INT8 +# FP16 +# FP32 +# FP8 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_obb.ini b/data/settings_yolov26m_obb.ini new file mode 100644 index 000000000..d31e8425b --- /dev/null +++ b/data/settings_yolov26m_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolo26m-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov26m_seg.ini b/data/settings_yolov26m_seg.ini new file mode 100644 index 000000000..3a4ed1d05 --- /dev/null +++ b/data/settings_yolov26m_seg.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolo26m-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV26Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov5.ini b/data/settings_yolov5.ini new file mode 100644 index 000000000..85cc89cc7 --- /dev/null +++ b/data/settings_yolov5.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov5m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov5m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1.5 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV5 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov6.ini b/data/settings_yolov6.ini new file mode 100644 index 000000000..140e07d8e --- /dev/null +++ b/data/settings_yolov6.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov6m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov6m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1.5 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV6 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov7.ini b/data/settings_yolov7.ini new file mode 100644 index 000000000..201ab70b7 --- /dev/null +++ b/data/settings_yolov7.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b1.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b1.onnx +#nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b2.onnx +#nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b2.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV7 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov7mask.ini b/data/settings_yolov7mask.ini new file mode 100644 index 000000000..bc6ca37c1 --- /dev/null +++ b/data/settings_yolov7mask.ini @@ -0,0 +1,146 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7-seg_orig.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7-seg_orig.onnx +#nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b1.onnx +#nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b1.onnx +#nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b2.onnx +#nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov7_b2.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV7Mask +#net_type = YOLOV7 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov8.ini b/data/settings_yolov8.ini new file mode 100644 index 000000000..6be1c1db1 --- /dev/null +++ b/data/settings_yolov8.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov8m.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov8m.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1.5 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV8 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov8mask.ini b/data/settings_yolov8mask.ini new file mode 100644 index 000000000..edec20eba --- /dev/null +++ b/data/settings_yolov8mask.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov8s-seg.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov8s-seg.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV8Mask + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov8x_obb.ini b/data/settings_yolov8x_obb.ini new file mode 100644 index 000000000..05ebf94f6 --- /dev/null +++ b/data/settings_yolov8x_obb.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CPU + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_OPENCV + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolov8x-obb.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolov8x-obb.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.0.names + +#----------------------------- +confidence_threshold = 0.5 + +max_crop_ratio = 1 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV8_OBB + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov9.ini b/data/settings_yolov9.ini new file mode 100644 index 000000000..ec1382e5f --- /dev/null +++ b/data/settings_yolov9.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CUDA_FP16 + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_CUDA + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov9-e.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/coco/yolov9-e.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/coco/coco.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV9 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/settings_yolov9_dota.ini b/data/settings_yolov9_dota.ini new file mode 100644 index 000000000..6597ccf31 --- /dev/null +++ b/data/settings_yolov9_dota.ini @@ -0,0 +1,141 @@ +[detection] + +#----------------------------- +# opencv_dnn = 6 +# tensorrt = 5 +detector_backend = 5 + +#----------------------------- +# Target and backend for opencv_dnn detector +# DNN_TARGET_CPU +# DNN_TARGET_OPENCL +# DNN_TARGET_OPENCL_FP16 +# DNN_TARGET_MYRIAD +# DNN_TARGET_CUDA +# DNN_TARGET_CUDA_FP16 +ocv_dnn_target = DNN_TARGET_CUDA_FP16 + +# DNN_BACKEND_DEFAULT +# DNN_BACKEND_HALIDE +# DNN_BACKEND_INFERENCE_ENGINE +# DNN_BACKEND_OPENCV +# DNN_BACKEND_VKCOM +# DNN_BACKEND_CUDA +# DNN_BACKEND_INFERENCE_ENGINE_NGRAPH +# DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 +ocv_dnn_backend = DNN_BACKEND_CUDA + +#----------------------------- +nn_weights = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolov9_DOTA.onnx +nn_config = C:/work/home/mtracker/Multitarget-tracker/data/dota/yolov9_DOTA.onnx +class_names = C:/work/home/mtracker/Multitarget-tracker/data/dota/DOTA_v1.5.names + +#----------------------------- +confidence_threshold = 0.3 + +max_crop_ratio = 0 +max_batch = 1 +gpu_id = 0 + +#----------------------------- +# YOLOV3 +# YOLOV4 +# YOLOV5 +net_type = YOLOV9 + +#----------------------------- +# INT8 +# FP16 +# FP32 +inference_precision = FP16 + + +[tracking] + +#----------------------------- +# DistCenters = 0 // Euclidean distance between centers, pixels +# DistRects = 1 // Euclidean distance between bounding rectangles, pixels +# DistJaccard = 2 // Intersection over Union, IoU, [0, 1] +# DistHist = 3 // Bhatacharia distance between histograms, [0, 1] + +distance_type = 0 + +#----------------------------- +# KalmanLinear = 0 +# KalmanUnscented = 1 + +kalman_type = 0 + +#----------------------------- +# FilterCenter = 0 +# FilterRect = 1 +# FilterRRect = 2 + +filter_goal = 0 + +#----------------------------- +# TrackNone = 0 +# TrackKCF = 1 +# TrackMIL = 2 +# TrackMedianFlow = 3 +# TrackGOTURN = 4 +# TrackMOSSE = 5 +# TrackCSRT = 6 +# TrackDAT = 7 +# TrackSTAPLE = 8 +# TrackLDES = 9 +# TrackDaSiamRPN = 10 +# Used if filter_goal == FilterRect + +lost_track_type = 0 + +#----------------------------- +# MatchHungrian = 0 +# MatchBipart = 1 + +match_type = 0 + +#----------------------------- +# Use constant acceleration motion model: +# 0 - unused (stable) +# 1 - use acceleration in Kalman filter (experimental) +use_aceleration = 0 + +#----------------------------- +# Delta time for Kalman filter +delta_time = 0.4 + +#----------------------------- +# Accel noise magnitude for Kalman filter +accel_noise = 0.2 + +#----------------------------- +# Distance threshold between region and object on two frames +dist_thresh = 0.8 + +#----------------------------- +# If this value > 0 than will be used circle with this radius +# If this value <= 0 than will be used ellipse with size (3*vx, 3*vy), vx and vy - horizontal and vertical speed in pixelsa +min_area_radius_pix = -1 + +#----------------------------- +# Minimal area radius in ration for object size. Used if min_area_radius_pix < 0 +min_area_radius_k = 0.8 + +#----------------------------- +# If the object do not assignment more than this seconds then it will be removed +max_lost_time = 2 + +#----------------------------- +# The maximum trajectory length +max_trace_len = 2 + +#----------------------------- +# Detection abandoned objects +detect_abandoned = 0 +# After this time (in seconds) the object is considered abandoned +min_static_time = 5 +# After this time (in seconds) the abandoned object will be removed +max_static_time = 25 +# Speed in pixels. If speed of object is more that this value than object is non static +max_speed_for_static = 10 diff --git a/data/tiny-yolo.cfg b/data/tiny-yolo.cfg deleted file mode 100644 index 9a4a184f1..000000000 --- a/data/tiny-yolo.cfg +++ /dev/null @@ -1,139 +0,0 @@ -[net] -# Training -# batch=64 -# subdivisions=2 -# Testing -batch=1 -subdivisions=1 -width=416 -height=416 -channels=3 -momentum=0.9 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500200 -policy=steps -steps=400000,450000 -scales=.1,.1 - -[convolutional] -batch_normalize=1 -filters=16 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=1 - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -########### - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=425 -activation=linear - -[region] -anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828 -bias_match=1 -classes=80 -coords=4 -num=5 -softmax=1 -jitter=.2 -rescore=0 - -object_scale=5 -noobject_scale=1 -class_scale=1 -coord_scale=1 - -absolute=1 -thresh = .6 -random=1 diff --git a/data/tiny-yolo.weights b/data/tiny-yolo.weights deleted file mode 100644 index 0bb96d616..000000000 Binary files a/data/tiny-yolo.weights and /dev/null differ diff --git a/data/voc.names b/data/voc.names deleted file mode 100644 index 8420ab35e..000000000 --- a/data/voc.names +++ /dev/null @@ -1,20 +0,0 @@ -aeroplane -bicycle -bird -boat -bottle -bus -car -cat -chair -cow -diningtable -dog -horse -motorbike -person -pottedplant -sheep -sofa -train -tvmonitor diff --git a/data/yolov3-tiny.cfg b/data/yolov3-tiny.cfg deleted file mode 100644 index 42c0fcf91..000000000 --- a/data/yolov3-tiny.cfg +++ /dev/null @@ -1,182 +0,0 @@ -[net] -# Testing -batch=1 -subdivisions=1 -# Training -# batch=64 -# subdivisions=2 -width=416 -height=416 -channels=3 -momentum=0.9 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500200 -policy=steps -steps=400000,450000 -scales=.1,.1 - -[convolutional] -batch_normalize=1 -filters=16 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=2 - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[maxpool] -size=2 -stride=1 - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -########### - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - - -[yolo] -mask = 3,4,5 -anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 -classes=80 -num=6 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = -1, 8 - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - -[yolo] -mask = 1,2,3 -anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 -classes=80 -num=6 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 diff --git a/data/yolov3-tiny.weights b/data/yolov3-tiny.weights deleted file mode 100644 index aad7e6c80..000000000 Binary files a/data/yolov3-tiny.weights and /dev/null differ diff --git a/data/yolov3.cfg b/data/yolov3.cfg deleted file mode 100644 index 938ffff23..000000000 --- a/data/yolov3.cfg +++ /dev/null @@ -1,789 +0,0 @@ -[net] -# Testing -# batch=1 -# subdivisions=1 -# Training -batch=64 -subdivisions=16 -width=608 -height=608 -channels=3 -momentum=0.9 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.001 -burn_in=1000 -max_batches = 500200 -policy=steps -steps=400000,450000 -scales=.1,.1 - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=leaky - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=1 -pad=1 -activation=leaky - -[shortcut] -from=-3 -activation=linear - -###################### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 6,7,8 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = -1, 61 - - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 3,4,5 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = -1, 36 - - - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 0,1,2 -anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 - diff --git a/data/yolov4.cfg b/data/yolov4.cfg deleted file mode 100644 index 47b9db61a..000000000 --- a/data/yolov4.cfg +++ /dev/null @@ -1,1155 +0,0 @@ -[net] -batch=64 -subdivisions=8 -# Training -#width=512 -#height=512 -width=608 -height=608 -channels=3 -momentum=0.949 -decay=0.0005 -angle=0 -saturation = 1.5 -exposure = 1.5 -hue=.1 - -learning_rate=0.00261 -burn_in=1000 -max_batches = 500500 -policy=steps -steps=400000,450000 -scales=.1,.1 - -#cutmix=1 -mosaic=1 - -#:104x104 54:52x52 85:26x26 104:13x13 for 416 - -[convolutional] -batch_normalize=1 -filters=32 -size=3 -stride=1 -pad=1 -activation=mish - -# Downsample - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=2 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=32 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1,-7 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -# Downsample - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=2 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=64 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=64 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1,-10 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -# Downsample - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=2 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=128 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1,-28 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -# Downsample - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=2 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=256 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1,-28 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -# Downsample - -[convolutional] -batch_normalize=1 -filters=1024 -size=3 -stride=2 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -2 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[convolutional] -batch_normalize=1 -filters=512 -size=3 -stride=1 -pad=1 -activation=mish - -[shortcut] -from=-3 -activation=linear - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=mish - -[route] -layers = -1,-16 - -[convolutional] -batch_normalize=1 -filters=1024 -size=1 -stride=1 -pad=1 -activation=mish - -########################## - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -### SPP ### -[maxpool] -stride=1 -size=5 - -[route] -layers=-2 - -[maxpool] -stride=1 -size=9 - -[route] -layers=-4 - -[maxpool] -stride=1 -size=13 - -[route] -layers=-1,-3,-5,-6 -### End SPP ### - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = 85 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[upsample] -stride=2 - -[route] -layers = 54 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[route] -layers = -1, -3 - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=128 -size=1 -stride=1 -pad=1 -activation=leaky - -########################## - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=256 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 0,1,2 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -scale_x_y = 1.2 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=256 -activation=leaky - -[route] -layers = -1, -16 - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=256 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=512 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 3,4,5 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -scale_x_y = 1.1 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - - -[route] -layers = -4 - -[convolutional] -batch_normalize=1 -size=3 -stride=2 -pad=1 -filters=512 -activation=leaky - -[route] -layers = -1, -37 - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -batch_normalize=1 -filters=512 -size=1 -stride=1 -pad=1 -activation=leaky - -[convolutional] -batch_normalize=1 -size=3 -stride=1 -pad=1 -filters=1024 -activation=leaky - -[convolutional] -size=1 -stride=1 -pad=1 -filters=255 -activation=linear - - -[yolo] -mask = 6,7,8 -anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 -classes=80 -num=9 -jitter=.3 -ignore_thresh = .7 -truth_thresh = 1 -random=1 -scale_x_y = 1.05 -iou_thresh=0.213 -cls_normalizer=1.0 -iou_normalizer=0.07 -iou_loss=ciou -nms_kind=greedynms -beta_nms=0.6 - diff --git a/demo.py b/demo.py new file mode 100644 index 000000000..7f467e45e --- /dev/null +++ b/demo.py @@ -0,0 +1,103 @@ +import sys +import glob +import getopt +import numpy as np +import cv2 as cv +import pymtracking as mt + +print("OpenCV Version: {}".format(cv.__version__)) + + +def draw_regions(img, regions, color): + for reg in regions: + brect = reg.brect + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), color, 2) + + +def draw_tracks(img, tracks, fps): + for track in tracks: + brect = track.GetBoundingRect() + if track.isStatic: + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), (255, 0, 255), 2) + elif track.IsRobust(int(fps / 4), 0.7, (0.1, 10.), 3): + cv.rectangle(img, (brect.x, brect.y, brect.width, brect.height), (0, 255, 0), 2) + trajectory = track.GetTrajectory() + for i in range(0, len(trajectory) - 1): + cv.line(img, trajectory[i], trajectory[i+1], (0, 255, 0), 1) + + +def main(): + args, video_src = getopt.getopt(sys.argv[1:], '', ['cascade=', 'nested-cascade=']) + try: + video_src = video_src[0] + except: + video_src = 0 + args = dict(args) + + cam = cv.VideoCapture(video_src) + + _ret, img = cam.read() + print("cam.read res = ", _ret, ", im size = ", img.shape) + + fps = cam.get(cv.CAP_PROP_FPS) + print(video_src, " fps = ", fps) + + configBGFG = mt.KeyVal() + configBGFG.Add('useRotatedRect', '20') + configBGFG.Add('history', '1000') + configBGFG.Add("nmixtures", "3") + configBGFG.Add("backgroundRatio", "0.7") + configBGFG.Add("noiseSigma", "0") + print("configBGFG = ", configBGFG) + mdetector = mt.BaseDetector(mt.BaseDetector.Detectors.MOG, configBGFG, img) + print("CanGrayProcessing: ", mdetector.CanGrayProcessing()) + mdetector.SetMinObjectSize((1, 1)) + + tracker_settings = mt.TrackerSettings() + + tracker_settings.SetDistance(mt.MTracker.DistRects) + tracker_settings.kalmanType = mt.MTracker.KalmanLinear + tracker_settings.filterGoal = mt.MTracker.FilterCenter + tracker_settings.lostTrackType = mt.MTracker.TrackNone + tracker_settings.matchType = mt.MTracker.MatchHungrian + tracker_settings.useAcceleration = False + tracker_settings.dt = 0.5 + tracker_settings.accelNoiseMag = 0.1 + tracker_settings.distThres = 0.95 + tracker_settings.minAreaRadiusPix = img.shape[0] / 5. + tracker_settings.minAreaRadiusK = 0.8 + tracker_settings.useAbandonedDetection = False + tracker_settings.maximumAllowedSkippedFrames = int(2 * fps) + tracker_settings.maxTraceLength = int(2 * fps) + + mtracker = mt.MTracker(tracker_settings) + + while True: + _ret, img = cam.read() + if _ret: + print("cam.read res = ", _ret, ", im size = ", img.shape, ", fps = ", fps) + else: + break + + mdetector.Detect(img) + regions = mdetector.GetDetects() + print("mdetector.Detect:", len(regions)) + + mtracker.Update(regions, img, fps) + tracks = mtracker.GetTracks() + print("mtracker.Update:", len(tracks)) + + vis = img.copy() + # draw_regions(vis, regions, (255, 0, 255)) + draw_tracks(vis, tracks, fps) + cv.imshow('detect', vis) + + if cv.waitKey(int(1000 / fps)) == 27: + break + + print('Done') + + +if __name__ == '__main__': + main() + cv.destroyAllWindows() diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index 74c6c0708..163680cb1 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -2,56 +2,53 @@ cmake_minimum_required (VERSION 3.5) project(MultitargetTracker) -set(SOURCES - main.cpp - VideoExample.cpp -) +set(SOURCES main.cpp + VideoExample.cpp) -set(HEADERS - MouseExample.h - VideoExample.h - examples.h -) +set(HEADERS MouseExample.h + VideoExample.h + examples.h + MotionDetectorExample.h + FileLogger.h) + +if (BUILD_CARS_COUNTING) + set(SOURCES ${SOURCES} CarsCounting.cpp) + set(HEADERS ${HEADERS} CarsCounting.h) +endif(BUILD_CARS_COUNTING) # ---------------------------------------------------------------------------- # добавляем include директории # ---------------------------------------------------------------------------- -INCLUDE_DIRECTORIES( - ${PROJECT_SOURCE_DIR}/../src - ${PROJECT_SOURCE_DIR}/../src/common +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/../src + ${PROJECT_SOURCE_DIR}/../src/mtracking ${PROJECT_SOURCE_DIR}/../src/Detector ${PROJECT_SOURCE_DIR}/../src/Detector/vibe_src ${PROJECT_SOURCE_DIR}/../src/Detector/Subsense ${PROJECT_SOURCE_DIR}/../src/Tracker ${PROJECT_SOURCE_DIR}/../src/Tracker/HungarianAlg ${PROJECT_SOURCE_DIR}/../thirdparty -) - -set(LIBS - ${OpenCV_LIBS} - mtracking - mdetection - inih -) + ${PROJECT_SOURCE_DIR}/../thirdparty/spdlog/include) -if (BUILD_YOLO_LIB) - if (MSVC) - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(BIT_SYSTEM x32) - else() - set(BIT_SYSTEM x64) - endif() +set(LIBS ${OpenCV_LIBS} + mtracking + mdetection + inih) - link_directories(${PROJECT_SOURCE_DIR}/../src/Detector/darknet/3rdparty/lib/${BIT_SYSTEM}) - endif(MSVC) +if (BUILD_ONNX_TENSORRT) + add_definitions(-DBUILD_ONNX_TENSORRT) +endif(BUILD_ONNX_TENSORRT) - add_definitions(-DBUILD_YOLO_LIB) -endif(BUILD_YOLO_LIB) +if (BUILD_CARS_COUNTING) + add_definitions(-DBUILD_CARS_COUNTING) +endif(BUILD_CARS_COUNTING) -if (BUILD_YOLO_TENSORRT) - add_definitions(-DBUILD_YOLO_TENSORRT) -endif(BUILD_YOLO_TENSORRT) +if (USE_CLIP) + add_definitions(-DUSE_CLIP) + set(LIBS ${LIBS} ruclip) +endif(USE_CLIP) ADD_EXECUTABLE(${PROJECT_NAME} ${SOURCES} ${HEADERS}) -TARGET_LINK_LIBRARIES(${PROJECT_NAME} ${LIBS}) +TARGET_LINK_LIBRARIES(${PROJECT_NAME} PRIVATE ${LIBS}) + +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "apps") \ No newline at end of file diff --git a/example/CarsCounting.cpp b/example/CarsCounting.cpp new file mode 100644 index 000000000..5ff8aca56 --- /dev/null +++ b/example/CarsCounting.cpp @@ -0,0 +1,472 @@ +#include "CarsCounting.h" +#include + +/// +/// \brief CarsCounting::CarsCounting +/// \param parser +/// +CarsCounting::CarsCounting(const cv::CommandLineParser& parser) + : VideoExample(parser) +{ +#ifdef _WIN32 + std::string pathToModel = "../../data/"; +#else + std::string pathToModel = "../data/"; +#endif + + m_drawHeatMap = parser.get("heat_map") != 0; + + std::string settingsFile = parser.get("settings"); + m_trackerSettingsLoaded = ParseTrackerSettings(settingsFile, m_trackerSettings); + + m_logger->info("Inference loaded ({0}) from {1}: used {2} backend, weights: {3}, config: {4}, names: {5}", + m_trackerSettingsLoaded, settingsFile, m_trackerSettings.m_detectorBackend, m_trackerSettings.m_nnWeights, m_trackerSettings.m_nnConfig, m_trackerSettings.m_classNames); + + m_geoBindFile = parser.get("geo_bind"); +} + +/// +/// \brief CarsCounting::DrawTrack +/// \param frame +/// \param track +/// \param drawTrajectory +/// \param framesCounters +/// +void CarsCounting::DrawTrack(cv::Mat frame, const TrackingObject& track, bool drawTrajectory, int framesCounter, const std::string& /*userLabel*/) +{ + cv::Rect brect = track.m_rrect.boundingRect(); + + m_resultsLog.AddTrack(framesCounter, track.m_ID, brect, track.m_type, track.m_confidence); + m_resultsLog.AddRobustTrack(track.m_ID); + + if (track.m_isStatic) + { +#if (CV_VERSION_MAJOR >= 4) + cv::rectangle(frame, brect, cv::Scalar(255, 0, 255), 2, cv::LINE_AA); +#else + cv::rectangle(frame, brect, cv::Scalar(255, 0, 255), 2, CV_AA); +#endif + } + else + { +#if (CV_VERSION_MAJOR >= 4) + cv::rectangle(frame, brect, cv::Scalar(0, 255, 0), 1, cv::LINE_AA); +#else + cv::rectangle(frame, brect, cv::Scalar(0, 255, 0), 1, CV_AA); +#endif + + if (!m_geoParams.Empty()) + { + int traceSize = static_cast(track.m_trace.size()); + int period = std::min(2 * cvRound(m_fps), traceSize); + const auto& from = m_geoParams.Pix2Geo(track.m_trace[traceSize - period]); + const auto& to = m_geoParams.Pix2Geo(track.m_trace[traceSize - 1]); + auto dist = DistanceInMeters(from, to); + + std::stringstream label; + if (period >= cvRound(m_fps) / 4) + { + auto velocity = (3.6f * dist * m_fps) / period; + //std::cout << TypeConverter::Type2Str(track.m_type) << ": distance " << std::fixed << std::setw(2) << std::setprecision(2) << dist << " on time " << (period / m_fps) << " with velocity " << velocity << " km/h: " << track.m_confidence << std::endl; + if (velocity < 1.f || std::isnan(velocity)) + velocity = 0; + //label << TypeConverter::Type2Str(track.m_type) << " " << std::fixed << std::setw(2) << std::setprecision(2) << velocity << " km/h"; + label << TypeConverter::Type2Str(track.m_type) << " " << cvRound(velocity) << " km/h"; + + int baseLine = 0; + double fontScale = (frame.cols < 2000) ? 0.5 : 1.; + cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_TRIPLEX, fontScale, 1, &baseLine); + + if (brect.x < 0) + { + brect.width = std::min(brect.width, frame.cols - 1); + brect.x = 0; + } + else if (brect.x + brect.width >= frame.cols) + { + brect.x = std::max(0, frame.cols - brect.width - 1); + brect.width = std::min(brect.width, frame.cols - 1); + } + if (brect.y - labelSize.height < 0) + { + brect.height = std::min(brect.height, frame.rows - 1); + brect.y = labelSize.height; + } + else if (brect.y + brect.height >= frame.rows) + { + brect.y = std::max(0, frame.rows - brect.height - 1); + brect.height = std::min(brect.height, frame.rows - 1); + } + cv::rectangle(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), cv::FILLED); + cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_TRIPLEX, fontScale, cv::Scalar(0, 0, 0)); + + if (velocity > 3) + AddToHeatMap(brect); + } + } + } + + if (drawTrajectory) + { + cv::Scalar cl = m_colors[track.m_ID.ID2Module(m_colors.size())]; + + for (size_t j = 0; j < track.m_trace.size() - 1; ++j) + { + const TrajectoryPoint& pt1 = track.m_trace.at(j); + const TrajectoryPoint& pt2 = track.m_trace.at(j + 1); +#if (CV_VERSION_MAJOR >= 4) + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, cv::LINE_AA); +#else + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, CV_AA); +#endif + if (!pt2.m_hasRaw) + { +#if (CV_VERSION_MAJOR >= 4) + cv::circle(frame, pt2.m_prediction, 4, cl, 1, cv::LINE_AA); +#else + cv::circle(frame, pt2.m_prediction, 4, cl, 1, CV_AA); +#endif + } + } + } +} + +/// +/// \brief CarsCounting::InitDetector +/// \param frame +/// +bool CarsCounting::InitDetector(cv::UMat frame) +{ + if (!m_trackerSettingsLoaded) + return false; + + config_t config; + + config.emplace("modelConfiguration", m_trackerSettings.m_nnConfig); + config.emplace("modelBinary", m_trackerSettings.m_nnWeights); + config.emplace("confidenceThreshold", std::to_string(m_trackerSettings.m_confidenceThreshold)); + config.emplace("classNames", m_trackerSettings.m_classNames); + config.emplace("maxCropRatio", std::to_string(m_trackerSettings.m_maxCropRatio)); + config.emplace("maxBatch", std::to_string(m_trackerSettings.m_maxBatch)); + config.emplace("gpuId", std::to_string(m_trackerSettings.m_gpuId)); + config.emplace("net_type", m_trackerSettings.m_netType); + config.emplace("inference_precision", m_trackerSettings.m_inferencePrecision); + config.emplace("video_memory", std::to_string(m_trackerSettings.m_maxVideoMemory)); + config.emplace("dnnTarget", m_trackerSettings.m_dnnTarget); + config.emplace("dnnBackend", m_trackerSettings.m_dnnBackend); + config.emplace("inWidth", std::to_string(m_trackerSettings.m_inputSize.width)); + config.emplace("inHeight", std::to_string(m_trackerSettings.m_inputSize.height)); + + for (auto wname : m_trackerSettings.m_whiteList) + { + config.emplace("white_list", wname); + } + + m_detector = BaseDetector::CreateDetector((tracking::Detectors)m_trackerSettings.m_detectorBackend, config, frame); + + return m_detector.operator bool(); +} + +/// +/// \brief CarsCounting::InitTracker +/// \param grayFrame +/// +bool CarsCounting::InitTracker(cv::UMat frame) +{ + if (!m_trackerSettingsLoaded) + return false; + + if (m_drawHeatMap) + { + if (frame.channels() == 3) + m_keyFrame = frame.getMat(cv::ACCESS_READ).clone(); + else + cv::cvtColor(frame, m_keyFrame, cv::COLOR_GRAY2BGR); + m_heatMap = cv::Mat(m_keyFrame.size(), CV_32FC1, cv::Scalar::all(0)); + } + + const int minStaticTime = 5; + + TrackerSettings settings; + settings.SetDistance(tracking::DistJaccard); + settings.m_kalmanType = tracking::KalmanLinear; + settings.m_filterGoal = tracking::FilterCenter; + settings.m_lostTrackType = tracking::TrackCSRT; // Use KCF tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect + settings.m_matchType = tracking::MatchHungrian; + settings.m_dt = 0.3f; // Delta time for Kalman filter + settings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter + settings.m_distThres = 0.7f; // Distance threshold between region and object on two frames + settings.m_minAreaRadiusPix = frame.rows / 20.f; + settings.m_maximumAllowedLostTime = 2.; // Maximum allowed lost time + + settings.AddNearTypes(TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), false); + settings.AddNearTypes(TypeConverter::Str2Type("car"), TypeConverter::Str2Type("truck"), false); + settings.AddNearTypes(TypeConverter::Str2Type("person"), TypeConverter::Str2Type("bicycle"), true); + settings.AddNearTypes(TypeConverter::Str2Type("person"), TypeConverter::Str2Type("motorbike"), true); + + settings.m_useAbandonedDetection = false; + if (settings.m_useAbandonedDetection) + { + settings.m_minStaticTime = minStaticTime; + settings.m_maxStaticTime = 60; + settings.m_maximumAllowedLostTime = settings.m_minStaticTime; // Maximum allowed lost time + settings.m_maxTraceLength = 2 * settings.m_maximumAllowedLostTime; // Maximum trace length + } + else + { + settings.m_maximumAllowedLostTime = 10.; // Maximum allowed lost time + settings.m_maxTraceLength = 4.; // Maximum trace length + } + + m_tracker = BaseTracker::CreateTracker(settings, m_fps); + + ReadGeobindings(frame.size()); + return true; +} + +/// +/// \brief CarsCounting::DrawData +/// \param frame +/// +void CarsCounting::DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) +{ + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); + +#if 1 // Debug output + if (!m_geoParams.Empty()) + { + std::vector points = m_geoParams.GetFramePoints(); + for (size_t i = 0; i < points.size(); ++i) + { + cv::line(frame, points[i % points.size()], points[(i + 1) % points.size()], cv::Scalar(255, 255, 255), 1, cv::LINE_AA); + } + } +#endif + + for (const auto& track : tracks) + { + if (track.m_isStatic) + { + DrawTrack(frame, track, true, framesCounter); + } + else + { + if (track.IsRobust(cvRound(m_fps / 4), // Minimal trajectory size + 0.8f, // Minimal ratio raw_trajectory_points / trajectory_lenght + cv::Size2f(0.1f, 8.0f)) // Min and max ratio: width / height + ) + { + DrawTrack(frame, track, true, framesCounter); + + CheckLinesIntersection(track, static_cast(frame.cols), static_cast(frame.rows)); + } + } + } + //m_detector->CalcMotionMap(frame); + + if (!m_geoParams.Empty()) + { + cv::Mat geoMap = m_geoParams.DrawTracksOnMap(tracks); + if (!geoMap.empty()) + { +#ifndef SILENT_WORK + cv::namedWindow("Geo map", cv::WINDOW_NORMAL); + cv::imshow("Geo map", geoMap); +#endif + if (true) + { + double k = 0.25; + cv::Size mapPreview(cvRound(frame.cols * k), cvRound(((frame.cols * k) / geoMap.cols) * geoMap.rows)); + cv::resize(geoMap, frame(cv::Rect(frame.cols - mapPreview.width - 1, frame.rows - mapPreview.height - 1, mapPreview.width, mapPreview.height)), mapPreview, 0, 0, cv::INTER_CUBIC); + } + } + } + + for (const auto& rl : m_lines) + { + rl.Draw(frame); + } + + cv::Mat heatMap = DrawHeatMap(); +#ifndef SILENT_WORK + if (!heatMap.empty()) + cv::imshow("Heat map", heatMap); +#endif +} + +/// +/// \brief CarsCounting::AddLine +/// \param newLine +/// +void CarsCounting::AddLine(const RoadLine& newLine) +{ + m_lines.push_back(newLine); +} + +/// +/// \brief CarsCounting::GetLine +/// \param lineUid +/// \return +/// +bool CarsCounting::GetLine(unsigned int lineUid, RoadLine& line) +{ + for (const auto& rl : m_lines) + { + if (rl.m_uid == lineUid) + { + line = rl; + return true; + } + } + return false; +} + +/// +/// \brief CarsCounting::RemoveLine +/// \param lineUid +/// \return +/// +bool CarsCounting::RemoveLine(unsigned int lineUid) +{ + for (auto it = std::begin(m_lines); it != std::end(m_lines);) + { + if (it->m_uid == lineUid) + it = m_lines.erase(it); + else + ++it; + } + return false; +} + +/// +/// \brief CarsCounting::CheckLinesIntersection +/// \param track +/// +void CarsCounting::CheckLinesIntersection(const TrackingObject& track, float xMax, float yMax) +{ + auto Pti2f = [&](cv::Point pt) + { + return cv::Point2f(pt.x / xMax, pt.y / yMax); + }; + + constexpr size_t minTrack = 5; + if (track.m_trace.size() >= minTrack) + { + for (auto& rl : m_lines) + { + rl.IsIntersect(track.m_ID, Pti2f(track.m_trace[track.m_trace.size() - minTrack]), Pti2f(track.m_trace[track.m_trace.size() - 1])); + } + } +} + +/// +/// \brief CarsCounting::DrawHeatMap +/// +cv::Mat CarsCounting::DrawHeatMap() +{ + cv::Mat res; + if (!m_heatMap.empty()) + { + cv::normalize(m_heatMap, m_normHeatMap, 255, 0, cv::NORM_MINMAX, CV_8UC1); + cv::applyColorMap(m_normHeatMap, m_colorMap, cv::COLORMAP_HOT); + cv::bitwise_or(m_keyFrame, m_colorMap, res); + } + return res; +} + +/// +/// \brief CarsCounting::AddToHeatMap +/// +void CarsCounting::AddToHeatMap(const cv::Rect& rect) +{ + if (m_heatMap.empty()) + return; + + constexpr float w = 0.001f; + for (int y = 0; y < rect.height; ++y) + { + float* heatPtr = m_heatMap.ptr(rect.y + y) + rect.x; + for (int x = 0; x < rect.width; ++x) + { + heatPtr[x] += w; + } + } +} + +/// +/// \brief CarsCounting::ReadGeobindings +/// +bool CarsCounting::ReadGeobindings(cv::Size frameSize) +{ + bool res = true; + INIReader reader(m_geoBindFile); + + int parseError = reader.ParseError(); + if (parseError < 0) + { + m_logger->critical("GeoBindFile file {} does not exist!", m_geoBindFile); + res = false; + } + else if (parseError > 0) + { + m_logger->critical("GeoBindFile file {0} parse error in line: {1}", m_geoBindFile, parseError); + res = false; + } + if (!res) + return res; + + // Read frame-map bindings + std::vector geoPoints; + std::vector framePoints; + for (size_t i = 0;; ++i) + { + cv::Point2d geoPoint; + std::string lat = "lat" + std::to_string(i); + std::string lon = "lon" + std::to_string(i); + std::string px_x = "px_x" + std::to_string(i); + std::string px_y = "px_y" + std::to_string(i); + if (reader.HasValue("points", lat) && reader.HasValue("points", lon) && reader.HasValue("points", px_x) && reader.HasValue("points", px_y)) + { + geoPoints.emplace_back(reader.GetReal("points", lat, 0), reader.GetReal("points", lon, 0)); + framePoints.emplace_back(cvRound(reader.GetReal("points", px_x, 0) * frameSize.width), cvRound(reader.GetReal("points", px_y, 0) * frameSize.height)); + } + else + { + break; + } + } + res = m_geoParams.SetKeyPoints(framePoints, geoPoints); + + // Read map image + std::string mapFile = reader.GetString("map", "file", ""); + std::vector mapGeoCorners; + mapGeoCorners.emplace_back(reader.GetReal("map", "left_top_lat", 0), reader.GetReal("map", "left_top_lon", 0)); + mapGeoCorners.emplace_back(reader.GetReal("map", "right_top_lat", 0), reader.GetReal("map", "right_top_lon", 0)); + mapGeoCorners.emplace_back(reader.GetReal("map", "right_bottom_lat", 0), reader.GetReal("map", "right_bottom_lon", 0)); + mapGeoCorners.emplace_back(reader.GetReal("map", "left_bottom_lat", 0), reader.GetReal("map", "left_bottom_lon", 0)); + m_geoParams.SetMapParams(mapFile, mapGeoCorners); + + // Read lines + m_logger->info("Read lines:"); + for (size_t i = 0;; ++i) + { + std::string line = "line" + std::to_string(i); + std::string x0 = line + "_x0"; + std::string y0 = line + "_y0"; + std::string x1 = line + "_x1"; + std::string y1 = line + "_y1"; + if (reader.HasValue("lines", x0) && reader.HasValue("lines", y0) && reader.HasValue("lines", x1) && reader.HasValue("lines", y1)) + { + cv::Point2f p0(static_cast(reader.GetReal("lines", x0, 0)), static_cast(reader.GetReal("lines", y0, 0))); + cv::Point2f p1(static_cast(reader.GetReal("lines", x1, 0)), static_cast(reader.GetReal("lines", y1, 0))); + m_logger->info("Line {0}: ({1}, {2}) - ({3}, 4)", i, p0.x, p0.y, p1.x, p1.y); + AddLine(RoadLine(p0, p1, static_cast(i))); + } + else + { + break; + } + } + + return res; +} diff --git a/cars_counting/CarsCounting.h b/example/CarsCounting.h similarity index 63% rename from cars_counting/CarsCounting.h rename to example/CarsCounting.h index 898a582ab..929672869 100644 --- a/cars_counting/CarsCounting.h +++ b/example/CarsCounting.h @@ -1,11 +1,7 @@ #pragma once -#include "BaseDetector.h" - -#include "Ctracker.h" -#include -#include -#include +#include +#include "VideoExample.h" /// constexpr double DEG_TO_RAD = 0.017453292519943295769236907684886; @@ -34,6 +30,34 @@ T DistanceInMeters(const cv::Point_& from, const cv::Point_& to) return EarthRadius * Haversine(from, to); } +// Fix cv::getPerspectiveTransform for double +template +cv::Mat GetPerspectiveTransform(const std::vector>& src, const std::vector>& dst, int solveMethod) +{ + cv::Mat M(3, 3, CV_64F), X(8, 1, CV_64F, M.ptr()); + double a[8][8], b[8]; + cv::Mat A(8, 8, CV_64F, a), B(8, 1, CV_64F, b); + + for (int i = 0; i < 4; ++i) + { + a[i][0] = a[i + 4][3] = src[i].x; + a[i][1] = a[i + 4][4] = src[i].y; + a[i][2] = a[i + 4][5] = 1; + a[i][3] = a[i][4] = a[i][5] = a[i + 4][0] = a[i + 4][1] = a[i + 4][2] = 0; + a[i][6] = -src[i].x * dst[i].x; + a[i][7] = -src[i].y * dst[i].x; + a[i + 4][6] = -src[i].x * dst[i].y; + a[i + 4][7] = -src[i].y * dst[i].y; + b[i] = dst[i].x; + b[i + 4] = dst[i].y; + } + + cv::solve(A, B, X, solveMethod); + M.ptr()[8] = 1.; + + return M; +} + /// /// \brief The GeoParams class /// @@ -63,7 +87,7 @@ class GeoParams std::vector> tmpPix; tmpPix.reserve(m_framePoints.size()); - for (auto pix : m_framePoints) + for (const auto& pix : m_framePoints) { tmpPix.emplace_back(static_cast(pix.x), static_cast(pix.y)); } @@ -75,8 +99,8 @@ class GeoParams } std::cout << std::endl; #endif - cv::Mat toGeo = cv::getPerspectiveTransform(tmpPix, m_geoPoints); - cv::Mat toPix = cv::getPerspectiveTransform(m_geoPoints, tmpPix); + cv::Mat toGeo = GetPerspectiveTransform(tmpPix, m_geoPoints, (int)cv::DECOMP_LU); + cv::Mat toPix = GetPerspectiveTransform(m_geoPoints, tmpPix, (int)cv::DECOMP_LU); m_toGeo = toGeo; m_toPix = toPix; //std::cout << "To Geo: " << m_toGeo << std::endl; @@ -85,6 +109,18 @@ class GeoParams return res; } + /// + bool SetMapParams(const std::string& mapfile, const std::vector>& mapGeoCorners) + { + m_map = cv::imread(mapfile); + m_mapGeoCorners.assign(std::begin(mapGeoCorners), std::end(mapGeoCorners)); + + if (!m_map.empty()) + m_p2mParams.Init(m_mapGeoCorners, m_map.cols, m_map.rows); + + return !m_map.empty(); + } + /// cv::Point Geo2Pix(const cv::Point_& geo) const { @@ -113,12 +149,85 @@ class GeoParams return m_framePoints.size() != m_geoPoints.size() || m_framePoints.size() < 4; } + /// + cv::Mat DrawTracksOnMap(const std::vector& tracks) + { + if (m_map.empty()) + return cv::Mat(); + + cv::Mat map = m_map.clone(); + +#if 1 // Debug output + // Draw bindings points + for (size_t i = 0; i < m_geoPoints.size(); ++i) + { + cv::Point_ gp1(m_geoPoints[i % m_geoPoints.size()]); + cv::Point p1(m_p2mParams.Geo2Pix(gp1)); + + cv::Point_ gp2(m_geoPoints[(i + 1) % m_geoPoints.size()]); + cv::Point p2(m_p2mParams.Geo2Pix(gp2)); + + cv::line(map, p1, p2, cv::Scalar(255, 255, 255), 2, cv::LINE_AA); + + //std::cout << p1 << " - " << p2 << std::endl; + + std::stringstream label; + label << std::fixed << std::setw(5) << std::setprecision(5) << "[" << m_geoPoints[i].x << ", " << m_geoPoints[i].y << "]"; + int baseLine = 0; + double fontScale = (map.cols < 1000 && map.rows < 1000) ? 0.5 : 1.; + cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_TRIPLEX, fontScale, 1, &baseLine); + cv::putText(map, label.str(), p1, cv::FONT_HERSHEY_TRIPLEX, fontScale, cv::Scalar(255, 255, 255)); + } +#endif + + // Draw tracks + for (const auto& track : tracks) + { + if (track.m_lastRobust) + { + auto geoCenter = Pix2Geo(track.m_rrect.center); + auto center = m_p2mParams.Geo2Pix(geoCenter); + //std::cout << "Convert: " << track.m_rrect.center << " -> " << geoCenter << " -> " << center << std::endl; + if (center.x > 0 && center.x < map.cols && center.y > 0 && center.y < map.rows) + cv::ellipse(map, center, cv::Size(5, 5), 0, 0, 360, cv::Scalar(255, 0, 255), cv::FILLED, 8); + } + } + + return map; + } + private: std::vector m_framePoints; std::vector> m_geoPoints; cv::Matx m_toGeo; cv::Matx m_toPix; + + cv::Mat m_map; + std::vector> m_mapGeoCorners; + + /// + struct Pix2MapParams + { + cv::Matx m_toPix; + + /// + cv::Point Geo2Pix(const cv::Point_& geo) const + { + cv::Vec g(geo.x, geo.y, 1); + auto p = m_toPix * g; + return cv::Point(cvRound(p[0] / p[2]), cvRound(p[1] / p[2])); + } + + /// + void Init(const std::vector>& geoCorners, int mapWidth, int mapHeight) + { + std::vector> frameCorners { cv::Point_(0, 0), cv::Point_(mapWidth, 0), cv::Point_(mapWidth, mapHeight), cv::Point_(0, mapHeight) }; + cv::Mat toPix = GetPerspectiveTransform(geoCorners, frameCorners, (int)cv::DECOMP_LU); + m_toPix = toPix; + } + }; + Pix2MapParams m_p2mParams; }; /// @@ -130,9 +239,7 @@ class RoadLine /// /// \brief RoadLine /// - RoadLine() - { - } + RoadLine() = default; RoadLine(const cv::Point2f& pt1, const cv::Point2f& pt2, unsigned int uid) : m_pt1(pt1), m_pt2(pt2), m_uid(uid) @@ -163,7 +270,7 @@ class RoadLine /// void Draw(cv::Mat frame) const { - auto Ptf2i = [&](cv::Point2f pt) -> cv::Point + auto Ptf2i = [&](cv::Point2f pt) { return cv::Point(cvRound(frame.cols * pt.x), cvRound(frame.rows * pt.y)); }; @@ -187,13 +294,19 @@ class RoadLine /// \param pt2 /// \return /// - int IsIntersect(cv::Point2f pt1, cv::Point2f pt2) + int IsIntersect(track_id_t objID, cv::Point2f pt1, cv::Point2f pt2) { - bool isIntersect = CheckIntersection(pt1, pt2); int direction = 0; + if (m_lastIntersections.find(objID) != m_lastIntersections.end()) + return direction; + + bool isIntersect = CheckIntersection(pt1, pt2); + if (isIntersect) { + m_lastIntersections.emplace(objID); + cv::Point2f pt; if ((m_pt1.x <= m_pt2.x) && (m_pt1.y > m_pt2.y)) { @@ -241,6 +354,8 @@ class RoadLine private: + std::unordered_set m_lastIntersections; + /// /// \brief CheckIntersection /// \param pt1 @@ -330,62 +445,34 @@ class RoadLine /// /// \brief The CarsCounting class /// -class CarsCounting +class CarsCounting final : public VideoExample { public: CarsCounting(const cv::CommandLineParser& parser); - virtual ~CarsCounting(); - - void Process(); // Lines API void AddLine(const RoadLine& newLine); bool GetLine(unsigned int lineUid, RoadLine& line); bool RemoveLine(unsigned int lineUid); -protected: - - std::string m_weightsFile; - std::string m_configFile; - std::string m_namesFile; - tracking::Detectors m_detectorType = tracking::Detectors::Yolo_Darknet; - - std::unique_ptr m_detector; - std::unique_ptr m_tracker; +private: bool m_drawHeatMap = false; - bool m_showLogs = false; - float m_fps = 0; - - virtual bool InitTracker(cv::UMat frame); - - virtual void DrawData(cv::Mat frame, int framesCounter, int currTime); - - void DrawTrack(cv::Mat frame, - int resizeCoeff, - const TrackingObject& track, - bool drawTrajectory = true); - -private: - - bool m_isTrackerInitialized = false; - std::string m_inFile; - std::string m_outFile; - int m_startFrame = 0; - int m_endFrame = 0; - int m_finishDelay = 0; - std::vector m_colors; + bool InitDetector(cv::UMat frame) override; + bool InitTracker(cv::UMat frame) override; - int m_minObjWidth = 10; + void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override; + void DrawTrack(cv::Mat frame, const TrackingObject& track, bool drawTrajectory, int framesCounter, const std::string& userLabel = "") override; // Road lines std::deque m_lines; void CheckLinesIntersection(const TrackingObject& track, float xMax, float yMax); - std::set m_lastIntersections; // Binding frame coordinates to geographical coordinates - GeoParams m_geoParams; + GeoParams m_geoParams; + std::string m_geoBindFile; + bool ReadGeobindings(cv::Size frameSize); // Heat map for visualization long term detections cv::Mat m_keyFrame; diff --git a/example/FileLogger.h b/example/FileLogger.h new file mode 100644 index 000000000..3b8109c2c --- /dev/null +++ b/example/FileLogger.h @@ -0,0 +1,166 @@ +#pragma once +#include +#include +#include + +#include + +#include "object_types.h" + +/// +/// \brief The ResultsLog class +/// +class ResultsLog +{ +public: + /// + ResultsLog(const std::string& fileName, int writeEachNFrame) + : m_fileName(fileName), m_writeEachNFrame(writeEachNFrame) + { + } + + /// + ~ResultsLog() + { + WriteAll(true); + } + + /// + bool Open() + { + m_resCSV.close(); + if (m_fileName.size() > 5) + { + m_resCSV.open(m_fileName); + return m_resCSV.is_open(); + } + return false; + } + + /// + bool AddTrack(int framesCounter, track_id_t trackID, const cv::Rect& brect, objtype_t type, float confidence) + { + if (m_resCSV.is_open()) + { + auto frame = m_frames.find(framesCounter); + if (frame == std::end(m_frames)) + { + DetectsOnFrame tmpFrame; + tmpFrame.m_detects.emplace_back(trackID, brect, type, confidence); + m_frames.emplace(framesCounter, tmpFrame); + } + else + { + frame->second.m_detects.emplace_back(trackID, brect, type, confidence); + } + return true; + } + return false; + } + + /// + void AddRobustTrack(track_id_t trackID) + { + m_robustIDs.insert(trackID); + } + + /// + void Flush() + { + WriteAll(true); + m_frames.clear(); + } + +private: + std::string m_fileName; + std::ofstream m_resCSV; + + /// + struct Detection + { + cv::Rect m_rect; + objtype_t m_type; + float m_conf = 0.f; + track_id_t m_trackID = 0; + + Detection(track_id_t trackID, const cv::Rect& brect, objtype_t type, float confidence) + { + m_type = type; + m_rect = brect; + m_conf = confidence; + m_trackID = trackID; + } + }; + + /// + struct DetectsOnFrame + { + std::vector m_detects; + }; + std::map m_frames; + std::unordered_set m_robustIDs; + int m_writeEachNFrame = 1; + + /// + void WriteAll(bool byFrames) + { + if (byFrames) + { +#if 1 + char delim = ','; + for (const auto& frame : m_frames) + { + if (frame.first % m_writeEachNFrame == 0) + { + for (const auto& detect : frame.second.m_detects) + { + if (m_robustIDs.find(detect.m_trackID) != std::end(m_robustIDs)) + { + m_resCSV << frame.first << delim << TypeConverter::Type2Str(detect.m_type) << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << + detect.m_rect.width << delim << detect.m_rect.height << delim << + detect.m_conf << delim << std::endl; + } + } + } + } +#else + char delim = ' '; + for (const auto& frame : m_frames) + { + for (const auto& detect : frame.second.m_detects) + { + if (m_robustIDs.find(detect.m_trackID) != std::end(m_robustIDs)) + { + m_resCSV << frame.first << delim << TypeConverter::Type2Str(detect.m_type) << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << + (detect.m_rect.x + detect.m_rect.width) << delim << (detect.m_rect.y + detect.m_rect.height) << delim << + detect.m_conf << delim << detect.m_trackID << std::endl; + } + } + } +#endif + } + else + { + char delim = ','; + for (auto id : m_robustIDs) + { + for (const auto& frame : m_frames) + { + if (frame.first % m_writeEachNFrame == 0) + { + for (const auto& detect : frame.second.m_detects) + { + if (detect.m_trackID == id) + { + m_resCSV << frame.first << delim << id.ID2Str() << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << + detect.m_rect.width << delim << detect.m_rect.height << delim << + detect.m_conf << ",-1,-1,-1," << std::endl; + break; + } + } + } + } + } + } + } +}; diff --git a/example/MotionDetectorExample.h b/example/MotionDetectorExample.h new file mode 100644 index 000000000..7319c4079 --- /dev/null +++ b/example/MotionDetectorExample.h @@ -0,0 +1,227 @@ +#pragma once + +#include +#include +#include + +#include "VideoExample.h" + +#ifdef USE_CLIP +#include "ruclip/ClipAPI.h" +#endif // USE_CLIP + +/// +/// \brief The MotionDetectorExample class +/// +class MotionDetectorExample final : public VideoExample +{ +public: + MotionDetectorExample(const cv::CommandLineParser& parser) + : VideoExample(parser) + { +#ifdef USE_CLIP + std::string clipModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336"; + std::string bpeModel = "C:/work/clip/ruclip_/CLIP/data/ruclip-vit-large-patch14-336/bpe.model"; + m_clip.Init(clipModel, bpeModel, 336, 0, { "pedestrian", "person", "suv", "pickup", "car", "truck", "bus" }); +#endif // USE_CLIP + + m_logger->info("MotionDetectorExample"); + } + +protected: + /// + /// \brief InitDetector + /// \param frame + /// \return + /// + bool InitDetector(cv::UMat frame) override + { + m_logger->info("MotionDetectorExample::InitDetector"); + + m_minObjWidth = 2; + + config_t config; + config.emplace("useRotatedRect", "0"); + + tracking::Detectors detectorType = tracking::Detectors::Motion_MOG; + + switch (detectorType) + { + case tracking::Detectors::Motion_VIBE: + config.emplace("samples", "20"); + config.emplace("pixelNeighbor", "1"); + config.emplace("distanceThreshold", "20"); + config.emplace("matchingThreshold", "3"); + config.emplace("updateFactor", "16"); + break; + case tracking::Detectors::Motion_MOG: + config.emplace("history", std::to_string(cvRound(5000 * m_fps))); + config.emplace("nmixtures", "3"); + config.emplace("backgroundRatio", "0.7"); + config.emplace("noiseSigma", "0"); + break; + case tracking::Detectors::Motion_GMG: + config.emplace("initializationFrames", "50"); + config.emplace("decisionThreshold", "0.7"); + break; + case tracking::Detectors::Motion_CNT: + config.emplace("minPixelStability", "15"); + config.emplace("maxPixelStability", std::to_string(cvRound(20 * m_minStaticTime * m_fps))); + config.emplace("useHistory", "1"); + config.emplace("isParallel", "1"); + break; + case tracking::Detectors::Motion_MOG2: + config.emplace("history", std::to_string(cvRound(20 * m_minStaticTime * m_fps))); + config.emplace("varThreshold", "10"); + config.emplace("detectShadows", "1"); + break; + } + m_detector = BaseDetector::CreateDetector(detectorType, config, frame); + + if (m_detector.get()) + { + m_detector->SetMinObjectSize(cv::Size(m_minObjWidth, m_minObjWidth)); + return true; + } + return false; + } + /// + /// \brief InitTracker + /// \param frame + /// \return + /// + bool InitTracker(cv::UMat frame) override + { + m_logger->info("MotionDetectorExample::InitTracker"); + + if (!m_trackerSettingsLoaded) + { + m_trackerSettings.SetDistance(tracking::DistCenters); + m_trackerSettings.m_kalmanType = tracking::KalmanLinear; + m_trackerSettings.m_filterGoal = tracking::FilterCenter; + m_trackerSettings.m_lostTrackType = tracking::TrackNone; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect + m_trackerSettings.m_matchType = tracking::MatchHungrian; + m_trackerSettings.m_useAcceleration = false; // Use constant acceleration motion model + m_trackerSettings.m_dt = m_trackerSettings.m_useAcceleration ? 0.05f : 0.3f; // Delta time for Kalman filter + m_trackerSettings.m_accelNoiseMag = 0.1f; // Accel noise magnitude for Kalman filter + m_trackerSettings.m_distThres = 0.95f; // Distance threshold between region and object on two frames +#if 1 + m_trackerSettings.m_minAreaRadiusPix = frame.rows / 5.f; +#else + m_trackerSettings.m_minAreaRadiusPix = -1.f; +#endif + m_trackerSettings.m_minAreaRadiusK = 0.8f; + + m_trackerSettings.m_useAbandonedDetection = false; + if (m_trackerSettings.m_useAbandonedDetection) + { + m_trackerSettings.m_minStaticTime = m_minStaticTime; + m_trackerSettings.m_maxStaticTime = 10; + m_trackerSettings.m_maximumAllowedLostTime = m_trackerSettings.m_minStaticTime; // Maximum allowed lost time + m_trackerSettings.m_maxTraceLength = 2 * m_trackerSettings.m_maximumAllowedLostTime; // Maximum trace length + } + else + { + m_trackerSettings.m_maximumAllowedLostTime = 2.; // Maximum allowed lost time + m_trackerSettings.m_maxTraceLength = 2.; // Maximum trace length + } + } + + m_tracker = BaseTracker::CreateTracker(m_trackerSettings, m_fps); + return true; + } + + /// + /// \brief DrawData + /// \param frame + /// \param tracks + /// \param framesCounter + /// \param currTime + /// + void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override + { + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); + +#ifdef USE_CLIP + std::vector clipResult; + std::vector clipRects; + clipRects.reserve(tracks.size()); + for (const auto& track : tracks) + { + clipRects.emplace_back(track.GetBoundingRect()); + } + m_clip.ProcessFrame(frame, clipRects, clipResult); +#endif // USE_CLIP + + for (size_t i = 0; i < tracks.size(); ++i) + { + const auto& track = tracks[i]; + if (track.m_isStatic) + { +#ifdef USE_CLIP + DrawTrack(frame, track, false, framesCounter, clipResult[i].m_label + ", " + std::to_string(clipResult[i].m_conf)); +#else + DrawTrack(frame, track, false, framesCounter); +#endif //USE_CLIP + std::string label = "abandoned " + track.m_ID.ID2Str(); + int baseLine = 0; + cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_TRIPLEX, 0.5, 1, &baseLine); + + cv::Rect brect = track.m_rrect.boundingRect(); + if (brect.x < 0) + { + brect.width = std::min(brect.width, frame.cols - 1); + brect.x = 0; + } + else if (brect.x + brect.width >= frame.cols) + { + brect.x = std::max(0, frame.cols - brect.width - 1); + brect.width = std::min(brect.width, frame.cols - 1); + } + if (brect.y - labelSize.height < 0) + { + brect.height = std::min(brect.height, frame.rows - 1); + brect.y = labelSize.height; + } + else if (brect.y + brect.height >= frame.rows) + { + brect.y = std::max(0, frame.rows - brect.height - 1); + brect.height = std::min(brect.height, frame.rows - 1); + } + DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(255, 0, 255), 150); + cv::putText(frame, label, brect.tl(), cv::FONT_HERSHEY_TRIPLEX, 0.5, cv::Scalar(0, 0, 0)); + } + else + { + auto velocity = sqrt(sqr(track.m_velocity[0]) + sqr(track.m_velocity[1])); + if (track.IsRobust(4, // Minimal trajectory size + 0.3f, // Minimal ratio raw_trajectory_points / trajectory_lenght + cv::Size2f(0.2f, 5.0f), // Min and max ratio: width / height + 2)) + //velocity > 30 // Velocity more than 30 pixels per second + { + //track_t mean = 0; + //track_t stddev = 0; + //TrackingObject::LSParams lsParams; + //if (track.LeastSquares2(20, mean, stddev, lsParams) && mean > stddev) + { +#ifdef USE_CLIP + DrawTrack(frame, track, true, framesCounter, clipResult[i].m_label + ", " + std::to_string(clipResult[i].m_conf)); +#else + DrawTrack(frame, track, true, framesCounter); +#endif //USE_CLIP + } + } + } + } + m_detector->CalcMotionMap(frame); + } + +private: + int m_minObjWidth = 8; + int m_minStaticTime = 5; + +#ifdef USE_CLIP + ClassificationCLIP m_clip; +#endif // USE_CLIP +}; diff --git a/example/MouseExample.h b/example/MouseExample.h index 1f4b341f4..e58fd5a1e 100644 --- a/example/MouseExample.h +++ b/example/MouseExample.h @@ -1,6 +1,6 @@ #pragma once -#include "Ctracker.h" +#include "BaseTracker.h" #include #include @@ -37,10 +37,8 @@ void MouseTracking(cv::CommandLineParser parser) cv::Mat frame = cv::Mat(800, 800, CV_8UC3); - if (!writer.isOpened()) - { - writer.open(outFile, cv::VideoWriter::fourcc('P', 'I', 'M', '1'), 20, frame.size(), true); - } + if (!outFile.empty() && !writer.isOpened()) + writer.open(outFile, cv::VideoWriter::fourcc('h', '2', '6', '4'), 20, frame.size(), true); // Set mouse callback cv::Point2f pointXY; @@ -56,10 +54,10 @@ void MouseTracking(cv::CommandLineParser parser) settings.m_accelNoiseMag = 0.5f; settings.m_distThres = 0.8f; settings.m_minAreaRadiusPix = frame.rows / 20.f; - settings.m_maximumAllowedSkippedFrames = 25; - settings.m_maxTraceLength = 25; + settings.m_maximumAllowedLostTime = 1.; + settings.m_maxTraceLength = 1.; - CTracker tracker(settings); + std::unique_ptr tracker = BaseTracker::CreateTracker(settings, 30.f); track_t alpha = 0; cv::RNG rng; @@ -85,7 +83,6 @@ void MouseTracking(cv::CommandLineParser parser) regions.push_back(CRegion(cv::Rect(cvRound(p.x), cvRound(p.y), 1, 1))); } - for (size_t i = 0; i < pts.size(); i++) { #if (CV_VERSION_MAJOR >= 4) @@ -95,10 +92,10 @@ void MouseTracking(cv::CommandLineParser parser) #endif } - tracker.Update(regions, cv::UMat(), 100); + tracker->Update(regions, cv::UMat(), std::chrono::system_clock::now()); std::vector tracks; - tracker.GetTracks(tracks); + tracker->GetTracks(tracks); std::cout << tracks.size() << std::endl; for (size_t i = 0; i < tracks.size(); i++) @@ -119,9 +116,7 @@ void MouseTracking(cv::CommandLineParser parser) } if (writer.isOpened()) - { writer << frame; - } #ifndef SILENT_WORK cv::imshow("Video", frame); diff --git a/example/VideoExample.cpp b/example/VideoExample.cpp index 03f91aadf..4c41e0032 100644 --- a/example/VideoExample.cpp +++ b/example/VideoExample.cpp @@ -1,8 +1,5 @@ #include #include -#include - -#include #include "VideoExample.h" @@ -11,15 +8,17 @@ /// \param parser /// VideoExample::VideoExample(const cv::CommandLineParser& parser) - : m_resultsLog(parser.get("res")) + : m_resultsLog(parser.get("log_res"), parser.get("write_n_frame")), + m_cvatAnnotationsGenerator(parser.get("cvat_res")) { m_inFile = parser.get(0); m_outFile = parser.get("out"); - m_showLogs = parser.get("show_logs") != 0; + m_showLogsLevel = parser.get("show_logs"); m_startFrame = parser.get("start_frame"); m_endFrame = parser.get("end_frame"); m_finishDelay = parser.get("end_delay"); m_batchSize = std::max(1, parser.get("batch_size")); + m_useContrastAdjustment = parser.get("contrast_adjustment") != 0; m_colors.emplace_back(255, 0, 0); m_colors.emplace_back(0, 255, 0); @@ -33,63 +32,51 @@ VideoExample::VideoExample(const cv::CommandLineParser& parser) m_resultsLog.Open(); + // Create loggers + m_consoleSink = std::make_shared(); + m_consoleSink->set_level(spdlog::level::from_str(m_showLogsLevel)); + m_consoleSink->set_pattern("[%^%l%$] %v"); + + auto currentTime = std::chrono::system_clock::now(); + auto transformed = currentTime.time_since_epoch().count() / 1000000; + std::time_t tt = std::chrono::system_clock::to_time_t(currentTime); + char buffer[80]; +#ifdef WIN32 + tm timeInfo; + localtime_s(&timeInfo, &tt); + strftime(buffer, 80, "%G%m%d_%H%M%S", &timeInfo); +#else + auto timeInfo = localtime(&tt); + strftime(buffer, 80, "%G%m%d_%H%M%S", timeInfo); +#endif + + size_t max_size = 1024 * 1024 * 5; + size_t max_files = 3; + m_fileSink = std::make_shared("logs/" + std::string(buffer) + std::to_string(transformed % 1000) + ".txt", max_size, max_files); + m_fileSink->set_level(spdlog::level::from_str(m_showLogsLevel)); + + m_logger = std::shared_ptr(new spdlog::logger("traffic", { m_consoleSink, m_fileSink })); + m_logger->set_level(spdlog::level::from_str(m_showLogsLevel)); + m_logger->info("Start service"); + std::string settingsFile = parser.get("settings"); - m_trackerSettingsLoaded = ParseTrackerSettings(settingsFile); + m_trackerSettingsLoaded = ParseTrackerSettings(settingsFile, m_trackerSettings); if (m_batchSize > 1) { m_frameInfo[0].SetBatchSize(m_batchSize); m_frameInfo[1].SetBatchSize(m_batchSize); } -} - -/// -/// \brief VideoExample::ParseTrackerSettings -/// -bool VideoExample::ParseTrackerSettings(const std::string& settingsFile) -{ - INIReader reader(settingsFile); + for (auto& fr : m_frameInfo[0].m_frames) + { + fr.SetUseAdjust(m_useContrastAdjustment); + } + for (auto& fr : m_frameInfo[1].m_frames) + { + fr.SetUseAdjust(m_useContrastAdjustment); + } - if (reader.ParseError() >= 0) - { - m_trackerSettings = TrackerSettings(); - - auto distType = reader.GetInteger("tracking", "distance_type", -1); - if (distType >=0 && distType < (int)tracking::DistsCount) - m_trackerSettings.SetDistance((tracking::DistType)distType); - - auto kalmanType = reader.GetInteger("tracking", "kalman_type", -1); - if (kalmanType >=0 && kalmanType < (int)tracking::KalmanCount) - m_trackerSettings.m_kalmanType = (tracking::KalmanType)kalmanType; - - auto filterGoal = reader.GetInteger("tracking", "filter_goal", -1); - if (filterGoal >=0 && filterGoal < (int)tracking::FiltersCount) - m_trackerSettings.m_filterGoal = (tracking::FilterGoal)filterGoal; - - auto lostTrackType = reader.GetInteger("tracking", "lost_track_type", -1); - if (lostTrackType >=0 && lostTrackType < (int)tracking::SingleTracksCount) - m_trackerSettings.m_lostTrackType = (tracking::LostTrackType)lostTrackType; - - auto matchType = reader.GetInteger("tracking", "match_type", -1); - if (matchType >=0 && matchType < (int)tracking::MatchCount) - m_trackerSettings.m_matchType = (tracking::MatchType)matchType; - - m_trackerSettings.m_useAcceleration = reader.GetInteger("tracking", "use_aceleration", 0) != 0; // Use constant acceleration motion model - m_trackerSettings.m_dt = static_cast(reader.GetReal("tracking", "delta_time", 0.4)); // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = static_cast(reader.GetReal("tracking", "accel_noise", 0.2)); // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = static_cast(reader.GetReal("tracking", "dist_thresh", 0.8)); // Distance threshold between region and object on two frames - m_trackerSettings.m_minAreaRadiusPix = static_cast(reader.GetReal("tracking", "min_area_radius_pix", -1.)); - m_trackerSettings.m_minAreaRadiusK = static_cast(reader.GetReal("tracking", "min_area_radius_k", 0.8)); - m_trackerSettings.m_maximumAllowedSkippedFrames = reader.GetInteger("tracking", "max_skip_frames", 50); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = reader.GetInteger("tracking", "max_trace_len", 50); // Maximum trace length - m_trackerSettings.m_useAbandonedDetection = reader.GetInteger("tracking", "detect_abandoned", 0) != 0; - m_trackerSettings.m_minStaticTime = reader.GetInteger("tracking", "min_static_time", 5); - m_trackerSettings.m_maxStaticTime = reader.GetInteger("tracking", "max_static_time", 25); - m_trackerSettings.m_maxSpeedForStatic = reader.GetInteger("tracking", "max_speed_for_static", 10); - - return true; - } - return false; + m_startTimeStamp = currentTime; } /// @@ -112,18 +99,68 @@ void VideoExample::SyncProcess() cv::VideoCapture capture; if (!OpenCapture(capture)) { - std::cerr << "Can't open " << m_inFile << std::endl; + m_logger->critical("Can't open {}", m_inFile); return; } +#if 0 + // Write preview + cv::Mat prFrame; + capture >> prFrame; + cv::Mat textFrame(prFrame.size(), CV_8UC3); + textFrame = cv::Scalar(0, 0, 0); + std::string label{ "Original video" }; + int baseLine = 0; + double fontScale = (textFrame.cols < 1920) ? 2.0 : 3.0; + int thickness = 2; + int lineType = cv::LINE_AA; + int fontFace = cv::FONT_HERSHEY_TRIPLEX; + cv::Size labelSize = cv::getTextSize(label, fontFace, fontScale, thickness, &baseLine); + cv::putText(textFrame, label, cv::Point(textFrame.cols / 2 - labelSize.width / 2, textFrame.rows / 2 - labelSize.height / 2), fontFace, fontScale, cv::Scalar(255, 255, 255), thickness, lineType); + for (size_t fi = 0; fi < cvRound(2 * m_fps); ++fi) + { + WriteFrame(writer, textFrame); + } + WriteFrame(writer, prFrame); + for (;;) + { + capture >> prFrame; + if (prFrame.empty()) + break; + WriteFrame(writer, prFrame); + } + textFrame = cv::Scalar(0, 0, 0); + label = "Detection result"; + labelSize = cv::getTextSize(label, fontFace, fontScale, thickness, &baseLine); + cv::putText(textFrame, label, cv::Point(textFrame.cols / 2 - labelSize.width / 2, textFrame.rows / 2 - labelSize.height / 2), fontFace, fontScale, cv::Scalar(255, 255, 255), thickness, lineType); + for (size_t fi = 0; fi < cvRound(2 * m_fps); ++fi) + { + WriteFrame(writer, textFrame); + } + capture.release(); + OpenCapture(capture); +#endif + FrameInfo frameInfo(m_batchSize); frameInfo.m_frames.resize(frameInfo.m_batchSize); frameInfo.m_frameInds.resize(frameInfo.m_batchSize); + frameInfo.m_frameTimeStamps.resize(frameInfo.m_batchSize); + + for (auto& fr : frameInfo.m_frames) + { + fr.SetUseAdjust(m_useContrastAdjustment); + } int64 startLoopTime = cv::getTickCount(); + //double fps = capture.get(cv::CAP_PROP_FPS); + //double readPeriodSeconds = 2.; + //int readPeriodFrames = cvRound(readPeriodSeconds * fps); + for (;;) { + //int currFramesPos = cvRound(capture.get(cv::CAP_PROP_POS_FRAMES)); + size_t i = 0; for (; i < m_batchSize; ++i) { @@ -131,17 +168,23 @@ void VideoExample::SyncProcess() if (frameInfo.m_frames[i].empty()) break; frameInfo.m_frameInds[i] = framesCounter; + frameInfo.m_frameTimeStamps[i] = GetNextTimeStamp(framesCounter); + frameInfo.m_frames[i].AdjustMatBGR(); ++framesCounter; if (m_endFrame && framesCounter > m_endFrame) { - std::cout << "Process: riched last " << m_endFrame << " frame" << std::endl; + m_logger->info("Process: riched last {} frame", m_endFrame); break; } + + m_logger->debug("VideoExample::SyncProcess: Capture {0} frame", framesCounter); } if (i < m_batchSize) break; + //capture.set(cv::CAP_PROP_POS_FRAMES, currFramesPos + readPeriodFrames); + if (!m_isDetectorInitialized || !m_isTrackerInitialized) { cv::UMat ufirst = frameInfo.m_frames[0].GetUMatBGR(); @@ -150,7 +193,7 @@ void VideoExample::SyncProcess() m_isDetectorInitialized = InitDetector(ufirst); if (!m_isDetectorInitialized) { - std::cerr << "CaptureAndDetect: Detector initialize error!!!" << std::endl; + m_logger->critical("CaptureAndDetect: Detector initialize error!!!"); break; } } @@ -159,7 +202,7 @@ void VideoExample::SyncProcess() m_isTrackerInitialized = InitTracker(ufirst); if (!m_isTrackerInitialized) { - std::cerr << "CaptureAndDetect: Tracker initialize error!!!" << std::endl; + m_logger->critical("CaptureAndDetect: Tracker initialize error!!!"); break; } } @@ -167,7 +210,6 @@ void VideoExample::SyncProcess() int64 t1 = cv::getTickCount(); - regions_t regions; Detection(frameInfo); Tracking(frameInfo); int64 t2 = cv::getTickCount(); @@ -189,16 +231,20 @@ void VideoExample::SyncProcess() else if (k == 'm' || k == 'M') manualMode = !manualMode; #else - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + //std::this_thread::sleep_for(std::chrono::milliseconds(1)); #endif WriteFrame(writer, frameInfo.m_frames[i].GetMatBGR()); } + if (framesCounter % 100 == 0) + m_resultsLog.Flush(); } + m_cvatAnnotationsGenerator.Save(m_inFile, m_framesCount, m_frameSize); + int64 stopLoopTime = cv::getTickCount(); - std::cout << "algorithms time = " << (allTime / freq) << ", work time = " << ((stopLoopTime - startLoopTime) / freq) << std::endl; + m_logger->info("algorithms time = {0}, work time = {1}", allTime / freq, (stopLoopTime - startLoopTime) / freq); #ifndef SILENT_WORK cv::waitKey(m_finishDelay); #endif @@ -224,27 +270,26 @@ void VideoExample::AsyncProcess() int64 allTime = 0; int64 startLoopTime = cv::getTickCount(); - size_t processCounter = 0; + size_t processCounter = 0; for (; !stopCapture.load(); ) { FrameInfo& frameInfo = m_frameInfo[processCounter % 2]; - //std::cout << "tracking from " << (processCounter % 2) << " ind = " << processCounter << std::endl; + m_logger->debug("--- waiting tracking from {0} ind = {1}", processCounter % 2, processCounter); { std::unique_lock lock(frameInfo.m_mutex); - if (!frameInfo.m_cond.wait_for(lock, std::chrono::milliseconds(m_captureTimeOut), [&frameInfo]{ return frameInfo.m_captured; })) + if (!frameInfo.m_cond.wait_for(lock, std::chrono::milliseconds(m_captureTimeOut), [&frameInfo] { return frameInfo.m_captured.load(); })) { - std::cout << "Wait frame timeout!" << std::endl; + m_logger->info("--- Wait frame timeout!"); break; } } - //std::cout << "tracking from " << (processCounter % 2) << " in process..." << std::endl; - + m_logger->debug("--- tracking from {} in progress...", processCounter % 2); if (!m_isTrackerInitialized) { m_isTrackerInitialized = InitTracker(frameInfo.m_frames[0].GetUMatBGR()); if (!m_isTrackerInitialized) { - std::cerr << "CaptureAndDetect: Tracker initialize error!!!" << std::endl; + m_logger->critical("--- AsyncProcess: Tracker initialize error!!!"); frameInfo.m_cond.notify_one(); break; } @@ -259,7 +304,7 @@ void VideoExample::AsyncProcess() allTime += t2 - t1 + frameInfo.m_dt; int currTime = cvRound(1000 * (t2 - t1 + frameInfo.m_dt) / freq); - //std::cout << "Frame " << framesCounter << ": td = " << (1000 * frameInfo.m_dt / freq) << ", tt = " << (1000 * (t2 - t1) / freq) << std::endl; + m_logger->debug("--- Frame {0}: td = {1}, tt = {2}", frameInfo.m_frameInds[0], 1000 * frameInfo.m_dt / freq, 1000 * (t2 - t1) / freq); int key = 0; for (size_t i = 0; i < m_batchSize; ++i) @@ -269,7 +314,7 @@ void VideoExample::AsyncProcess() WriteFrame(writer, frameInfo.m_frames[i].GetMatBGR()); #ifndef SILENT_WORK - cv::imshow("Video", frameInfo.m_frames[0].GetMatBGR()); + cv::imshow("Video", frameInfo.m_frames[i].GetMatBGR()); int waitTime = manualMode ? 0 : 1;// std::max(1, cvRound(1000 / m_fps - currTime)); key = cv::waitKey(waitTime); @@ -278,31 +323,36 @@ void VideoExample::AsyncProcess() else break; #else - std::this_thread::sleep_for(std::chrono::milliseconds(1)); + //std::this_thread::sleep_for(std::chrono::milliseconds(1)); #endif } - { - std::unique_lock lock(frameInfo.m_mutex); - //std::cout << "tracking m_captured " << (processCounter % 2) << " - " << frameInfo.m_captured << std::endl; - assert(frameInfo.m_captured); - frameInfo.m_captured = false; - } + { + std::unique_lock lock(frameInfo.m_mutex); + m_logger->debug("--- tracking m_captured {0} - captured still {1}", processCounter % 2, frameInfo.m_captured.load()); + assert(frameInfo.m_captured.load()); + frameInfo.m_captured = false; + } frameInfo.m_cond.notify_one(); if (key == 27) break; - ++processCounter; + ++processCounter; + + if (processCounter % 100 == 0) + m_resultsLog.Flush(); } stopCapture = true; if (thCapDet.joinable()) thCapDet.join(); + m_cvatAnnotationsGenerator.Save(m_inFile, m_framesCount, m_frameSize); + int64 stopLoopTime = cv::getTickCount(); - std::cout << "algorithms time = " << (allTime / freq) << ", work time = " << ((stopLoopTime - startLoopTime) / freq) << std::endl; + m_logger->info("--- algorithms time = {0}, work time = {1}", allTime / freq, (stopLoopTime - startLoopTime) / freq); #ifndef SILENT_WORK cv::waitKey(m_finishDelay); @@ -319,64 +369,71 @@ void VideoExample::CaptureAndDetect(VideoExample* thisPtr, std::atomic& st cv::VideoCapture capture; if (!thisPtr->OpenCapture(capture)) { - std::cerr << "Can't open " << thisPtr->m_inFile << std::endl; + thisPtr->m_logger->critical("+++ Can't open {}", thisPtr->m_inFile); stopCapture = true; return; } int framesCounter = 0; - int trackingTimeOut = thisPtr->m_trackingTimeOut; - size_t processCounter = 0; + const auto localEndFrame = thisPtr->m_endFrame; + auto localIsDetectorInitialized = thisPtr->m_isDetectorInitialized; + auto localTrackingTimeOut = thisPtr->m_trackingTimeOut; + size_t processCounter = 0; for (; !stopCapture.load();) { FrameInfo& frameInfo = thisPtr->m_frameInfo[processCounter % 2]; - //std::cout << "captured to " << (processCounter % 2) << " ind = " << processCounter << std::endl; + thisPtr->m_logger->debug("+++ waiting capture to {0}, ind = {1}", processCounter % 2, processCounter); { std::unique_lock lock(frameInfo.m_mutex); - if (!frameInfo.m_cond.wait_for(lock, std::chrono::milliseconds(trackingTimeOut), [&frameInfo]{ return !frameInfo.m_captured; })) + if (!frameInfo.m_cond.wait_for(lock, std::chrono::milliseconds(localTrackingTimeOut), [&frameInfo] { return !frameInfo.m_captured.load(); })) { - std::cout << "Wait tracking timeout!" << std::endl; + thisPtr->m_logger->info("+++ Wait tracking timeout!"); frameInfo.m_cond.notify_one(); break; } } - //std::cout << "capture from " << (processCounter % 2) << " in process..." << std::endl; - + thisPtr->m_logger->debug("+++ capture to {0} in progress...", processCounter % 2); if (frameInfo.m_frames.size() < frameInfo.m_batchSize) { frameInfo.m_frames.resize(frameInfo.m_batchSize); frameInfo.m_frameInds.resize(frameInfo.m_batchSize); + frameInfo.m_frameTimeStamps.resize(frameInfo.m_batchSize); } + cv::Mat frame; size_t i = 0; for (; i < frameInfo.m_batchSize; ++i) { - capture >> frameInfo.m_frames[i].GetMatBGRWrite(); - if (frameInfo.m_frames[i].empty()) + capture >> frame; + if (frame.empty()) { - std::cerr << "CaptureAndDetect: frame is empty!" << std::endl; + thisPtr->m_logger->error("+++ CaptureAndDetect: frame is empty!"); frameInfo.m_cond.notify_one(); break; } - frameInfo.m_frameInds[i] = framesCounter; + frameInfo.m_frames[i].GetMatBGRWrite() = frame; + frameInfo.m_frames[i].AdjustMatBGR(); + frameInfo.m_frameInds[i] = framesCounter; + frameInfo.m_frameTimeStamps[i] = thisPtr->GetNextTimeStamp(framesCounter); ++framesCounter; - if (thisPtr->m_endFrame && framesCounter > thisPtr->m_endFrame) - { - std::cout << "Process: riched last " << thisPtr->m_endFrame << " frame" << std::endl; - break; - } - } - if (i < frameInfo.m_batchSize) - break; + if (localEndFrame && framesCounter > localEndFrame) + { + thisPtr->m_logger->info("+++ Process: riched last {} frame", localEndFrame); + break; + } + } + if (i < frameInfo.m_batchSize) + break; - if (!thisPtr->m_isDetectorInitialized) + if (!localIsDetectorInitialized) { thisPtr->m_isDetectorInitialized = thisPtr->InitDetector(frameInfo.m_frames[0].GetUMatBGR()); + localIsDetectorInitialized = thisPtr->m_isDetectorInitialized; if (!thisPtr->m_isDetectorInitialized) { - std::cerr << "CaptureAndDetect: Detector initialize error!!!" << std::endl; + thisPtr->m_logger->critical("+++ CaptureAndDetect: Detector initialize error!!!"); frameInfo.m_cond.notify_one(); break; } @@ -387,19 +444,32 @@ void VideoExample::CaptureAndDetect(VideoExample* thisPtr, std::atomic& st int64 t2 = cv::getTickCount(); frameInfo.m_dt = t2 - t1; - { - std::unique_lock lock(frameInfo.m_mutex); - //std::cout << "capture m_captured " << (processCounter % 2) << " - " << frameInfo.m_captured << std::endl; - assert(!frameInfo.m_captured); - frameInfo.m_captured = true; - } - frameInfo.m_cond.notify_one(); + { + std::unique_lock lock(frameInfo.m_mutex); + thisPtr->m_logger->debug("+++ capture m_captured {0} - captured still {1}", processCounter % 2, frameInfo.m_captured.load()); + assert(!frameInfo.m_captured.load()); + frameInfo.m_captured = true; + } + frameInfo.m_cond.notify_one(); ++processCounter; } stopCapture = true; } +/// +/// \brief VideoExample::GetNextTimeStamp +/// \param framesCounter +/// \return +/// +time_point_t VideoExample::GetNextTimeStamp(int framesCounter) const +{ + if (m_useArchieveTime) + return m_startTimeStamp + std::chrono::milliseconds(cvRound(framesCounter * (1000.f / m_fps))); + else + return std::chrono::system_clock::now(); +} + /// /// \brief VideoExample::Detection /// \param frame @@ -440,11 +510,10 @@ void VideoExample::Tracking(FrameInfo& frame) frame.CleanTracks(); for (size_t i = 0; i < frame.m_frames.size(); ++i) { - if (m_tracker->CanColorFrameToTrack()) - m_tracker->Update(frame.m_regions[i], frame.m_frames[i].GetUMatBGR(), m_fps); - else - m_tracker->Update(frame.m_regions[i], frame.m_frames[i].GetUMatGray(), m_fps); + m_tracker->Update(frame.m_regions[i], frame.m_frames[i].GetUMatBGR(), frame.m_frameTimeStamps[i]); m_tracker->GetTracks(frame.m_tracks[i]); + + m_cvatAnnotationsGenerator.NewDetects(frame.m_frameInds[i], frame.m_tracks[i], 0); } if (m_trackerSettings.m_useAbandonedDetection) m_tracker->GetTracks(m_tracks); @@ -453,28 +522,24 @@ void VideoExample::Tracking(FrameInfo& frame) /// /// \brief VideoExample::DrawTrack /// \param frame -/// \param resizeCoeff /// \param track /// \param drawTrajectory /// void VideoExample::DrawTrack(cv::Mat frame, - int resizeCoeff, const TrackingObject& track, bool drawTrajectory, - int framesCounter) + int framesCounter, + const std::string& userLabel) { - auto ResizePoint = [resizeCoeff](const cv::Point& pt) -> cv::Point - { - return cv::Point(resizeCoeff * pt.x, resizeCoeff * pt.y); - }; - cv::Scalar color = track.m_isStatic ? cv::Scalar(255, 0, 255) : cv::Scalar(0, 255, 0); cv::Point2f rectPoints[4]; track.m_rrect.points(rectPoints); + //std::cout << "track: rrect [" << track.m_rrect.size << " from " << track.m_rrect.center << ", " << track.m_rrect.angle << "]" << std::endl; for (int i = 0; i < 4; ++i) { - cv::line(frame, ResizePoint(rectPoints[i]), ResizePoint(rectPoints[(i+1) % 4]), color); + cv::line(frame, rectPoints[i], rectPoints[(i+1) % 4], color); } + #if 0 #if 0 track_t minAreaRadiusPix = frame.rows / 20.f; @@ -520,29 +585,88 @@ void VideoExample::DrawTrack(cv::Mat frame, #endif if (drawTrajectory) { - cv::Scalar cl = m_colors[track.m_ID % m_colors.size()]; + cv::Scalar cl = m_colors[track.m_ID.ID2Module(m_colors.size())]; for (size_t j = 0; j < track.m_trace.size() - 1; ++j) { const TrajectoryPoint& pt1 = track.m_trace.at(j); const TrajectoryPoint& pt2 = track.m_trace.at(j + 1); #if (CV_VERSION_MAJOR >= 4) - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, cv::LINE_AA); + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, cv::LINE_AA); #else - cv::line(frame, ResizePoint(pt1.m_prediction), ResizePoint(pt2.m_prediction), cl, 1, CV_AA); + cv::line(frame, pt1.m_prediction, pt2.m_prediction, cl, 1, CV_AA); #endif if (!pt2.m_hasRaw) { #if (CV_VERSION_MAJOR >= 4) - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 1, cv::LINE_AA); + cv::circle(frame, pt2.m_prediction, 4, cl, 1, cv::LINE_AA); #else - cv::circle(frame, ResizePoint(pt2.m_prediction), 4, cl, 1, CV_AA); + cv::circle(frame, pt2.m_prediction, 4, cl, 1, CV_AA); #endif } } } - cv::Rect brect = track.m_rrect.boundingRect(); + cv::Rect brect = track.m_rrect.boundingRect(); + std::stringstream label; + label << track.m_ID.ID2Str(); + if (track.m_type != bad_type) + label << ": " << TypeConverter::Type2Str(track.m_type); + else if (!userLabel.empty()) + label << ": " << userLabel; + if (track.m_confidence > 0) + label << ", " << std::fixed << std::setw(2) << std::setprecision(2) << track.m_confidence; +#if 0 + track_t mean = 0; + track_t stddev = 0; + TrackingObject::LSParams lsParams; + if (track.LeastSquares2(10, mean, stddev, lsParams)) + { + std::cout << "LSParams: " << lsParams << std::endl; + cv::Scalar cl(255, 0, 255); + label += ", [" + std::to_string(cvRound(mean)) + ", " + std::to_string(cvRound(stddev)) + "]"; + for (size_t j = 0; j < track.m_trace.size() - 1; ++j) + { + track_t t1 = j; + track_t t2 = j + 1; + cv::Point pt1(lsParams.m_ax * sqr(t1) + lsParams.m_v0x * t1 + lsParams.m_x0, lsParams.m_ay * sqr(t1) + lsParams.m_v0y * t1 + lsParams.m_y0); + cv::Point pt2(lsParams.m_ax * sqr(t2) + lsParams.m_v0x * t2 + lsParams.m_x0, lsParams.m_ay * sqr(t2) + lsParams.m_v0y * t2 + lsParams.m_y0); + //std::cout << pt1 << " - " << pt2 << std::endl; +#if (CV_VERSION_MAJOR >= 4) + cv::line(frame, pt1, pt2, cl, 1, cv::LINE_AA); +#else + cv::line(frame, pt1, pt2, cl, 1, CV_AA); +#endif + } + } + label += ", " + std::to_string(cvRound(sqrt(sqr(track.m_velocity[0]) + sqr(track.m_velocity[1])))); +#endif + int baseLine = 0; + double fontScale = (frame.cols < 1920) ? 0.5 : 0.7; + cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_TRIPLEX, fontScale, 1, &baseLine); + if (brect.x < 0) + { + brect.width = std::min(brect.width, frame.cols - 1); + brect.x = 0; + } + else if (brect.x + brect.width >= frame.cols) + { + brect.x = std::max(0, frame.cols - brect.width - 1); + brect.width = std::min(brect.width, frame.cols - 1); + } + if (brect.y - labelSize.height < 0) + { + brect.height = std::min(brect.height, frame.rows - 1); + brect.y = labelSize.height; + } + else if (brect.y + brect.height >= frame.rows) + { + brect.y = std::max(0, frame.rows - brect.height - 1); + brect.height = std::min(brect.height, frame.rows - 1); + } + DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); + cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_TRIPLEX, fontScale, cv::Scalar(0, 0, 0)); + m_resultsLog.AddTrack(framesCounter, track.m_ID, brect, track.m_type, track.m_confidence); m_resultsLog.AddRobustTrack(track.m_ID); } @@ -571,9 +695,13 @@ bool VideoExample::OpenCapture(cv::VideoCapture& capture) { capture.set(cv::CAP_PROP_POS_FRAMES, m_startFrame); - m_fps = std::max(25.f, (float)capture.get(cv::CAP_PROP_FPS)); + m_fps = std::max(1.f, (float)capture.get(cv::CAP_PROP_FPS)); + + m_frameSize.width = cvRound(capture.get(cv::CAP_PROP_FRAME_WIDTH)); + m_frameSize.height = cvRound(capture.get(cv::CAP_PROP_FRAME_HEIGHT)); + m_framesCount = cvRound(capture.get(cv::CAP_PROP_FRAME_COUNT)); - std::cout << "Video " << m_inFile << " was started from " << m_startFrame << " frame with " << m_fps << " fps" << std::endl; + std::cout << "Video " << m_inFile << " was started from " << m_startFrame << " frame with " << m_fps << " fps, frame size " << m_frameSize << " and length " << m_framesCount << std::endl; return true; } diff --git a/example/VideoExample.h b/example/VideoExample.h index b96782914..b5e0fc693 100644 --- a/example/VideoExample.h +++ b/example/VideoExample.h @@ -1,9 +1,7 @@ #pragma once #include -#include #include -#include #include #include #include @@ -11,151 +9,15 @@ #include #include "BaseDetector.h" -#include "Ctracker.h" +#include "BaseTracker.h" +#include "FileLogger.h" +#include "cvatAnnotationsGenerator.h" -/// -/// \brief The ResultsLog class -/// -class ResultsLog -{ -public: - /// - ResultsLog(const std::string& fileName) - : m_fileName(fileName) - { - } - - /// - ~ResultsLog() - { - WriteAll(true); - } - - /// - bool Open() - { - m_resCSV.close(); - if (m_fileName.size() > 5) - { - m_resCSV.open(m_fileName); - return m_resCSV.is_open(); - } - return false; - } - - /// - bool AddTrack(int framesCounter, size_t trackID, const cv::Rect& brect, objtype_t type, float confidence) - { - if (m_resCSV.is_open()) - { - auto frame = m_frames.find(framesCounter); - if (frame == std::end(m_frames)) - { - DetectsOnFrame tmpFrame; - tmpFrame.m_detects.emplace_back(trackID, brect, type, confidence); - m_frames.emplace(framesCounter, tmpFrame); - } - else - { - frame->second.m_detects.emplace_back(trackID, brect, type, confidence); - } - return true; - } - return false; - } - - /// - void AddRobustTrack(size_t trackID) - { - m_robustIDs.insert(trackID); - } - -private: - std::string m_fileName; - std::ofstream m_resCSV; - - /// - struct Detection - { - cv::Rect m_rect; - objtype_t m_type; - float m_conf = 0.f; - size_t m_trackID = 0; - - Detection(size_t trackID, const cv::Rect& brect, objtype_t type, float confidence) - { - m_type = type; - m_rect = brect; - m_conf = confidence; - m_trackID = trackID; - } - }; - - /// - struct DetectsOnFrame - { - std::vector m_detects; - }; - std::map m_frames; - std::set m_robustIDs; - - /// - void WriteAll(bool byFrames) - { - if (byFrames) - { -#if 1 - char delim = ','; - for (const auto& frame : m_frames) - { - for (const auto& detect : frame.second.m_detects) - { - if (m_robustIDs.find(detect.m_trackID) != std::end(m_robustIDs)) - { - m_resCSV << frame.first << delim << TypeConverter::Type2Str(detect.m_type) << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << - detect.m_rect.width << delim << detect.m_rect.height << delim << - detect.m_conf << delim << std::endl; - } - } - } -#else - char delim = ' '; - for (const auto& frame : m_frames) - { - for (const auto& detect : frame.second.m_detects) - { - if (m_robustIDs.find(detect.m_trackID) != std::end(m_robustIDs)) - { - m_resCSV << frame.first << delim << TypeConverter::Type2Str(detect.m_type) << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << - (detect.m_rect.x + detect.m_rect.width) << delim << (detect.m_rect.y + detect.m_rect.height) << delim << - detect.m_conf << delim << detect.m_trackID << std::endl; - } - } - } -#endif - } - else - { - char delim = ','; - for (size_t id : m_robustIDs) - { - for (const auto& frame : m_frames) - { - for (const auto& detect : frame.second.m_detects) - { - if (detect.m_trackID == id) - { - m_resCSV << frame.first << delim << id << delim << detect.m_rect.x << delim << detect.m_rect.y << delim << - detect.m_rect.width << delim << detect.m_rect.height << delim << - detect.m_conf << ",-1,-1,-1," << std::endl; - break; - } - } - } - } - } - } -}; +#include "spdlog/spdlog.h" +#include "spdlog/async.h" +#include "spdlog/sinks/stdout_color_sinks.h" +#include "spdlog/sinks/basic_file_sink.h" +#include "spdlog/sinks/rotating_file_sink.h" /// /// \brief The Frame struct @@ -163,91 +25,130 @@ class ResultsLog class Frame { public: - Frame() = default; - Frame(cv::Mat imgBGR) - { - m_mBGR = imgBGR; - } - - /// - bool empty() const - { - return m_mBGR.empty(); - } - - /// - const cv::Mat& GetMatBGR() - { - return m_mBGR; - } - /// - cv::Mat& GetMatBGRWrite() - { - m_umBGRGenerated = false; - m_mGrayGenerated = false; - m_umGrayGenerated = false; - return m_mBGR; - } - /// - const cv::Mat& GetMatGray() - { - if (m_mGray.empty() || !m_mGrayGenerated) - { - if (m_umGray.empty() || !m_umGrayGenerated) - cv::cvtColor(m_mBGR, m_mGray, cv::COLOR_BGR2GRAY); - else - m_mGray = m_umGray.getMat(cv::ACCESS_READ); - m_mGrayGenerated = true; - } - return m_mGray; - } - /// - const cv::UMat& GetUMatBGR() - { - std::thread::id lastThreadID = std::this_thread::get_id(); - - if (m_umBGR.empty() || !m_umBGRGenerated || lastThreadID != m_umBGRThreadID) - { - m_umBGR = m_mBGR.getUMat(cv::ACCESS_READ); - m_umBGRGenerated = true; - m_umBGRThreadID = lastThreadID; - } - return m_umBGR; - } - /// - const cv::UMat& GetUMatGray() - { - std::thread::id lastThreadID = std::this_thread::get_id(); - - if (m_umGray.empty() || !m_umGrayGenerated || lastThreadID != m_umGrayThreadID) - { - if (m_mGray.empty() || !m_mGrayGenerated) - { - if (m_umBGR.empty() || !m_umBGRGenerated || lastThreadID != m_umGrayThreadID) - cv::cvtColor(m_mBGR, m_umGray, cv::COLOR_BGR2GRAY); - else - cv::cvtColor(m_umBGR, m_umGray, cv::COLOR_BGR2GRAY); - } - else - { - m_umGray = m_mGray.getUMat(cv::ACCESS_READ); - } - m_umGrayGenerated = true; - m_umGrayThreadID = lastThreadID; - } - return m_umGray; - } + Frame() = default; + Frame(cv::Mat imgBGR, bool useCLAHE) + { + m_mBGR = imgBGR; + if (useCLAHE) + { + m_clahe = cv::createCLAHE(1.2, cv::Size(4, 4)); + AdjustMatBGR(); + } + } + + /// + void SetUseAdjust(bool useCLAHE) + { + if (useCLAHE) + { + m_clahe = cv::createCLAHE(1.2, cv::Size(4, 4)); + AdjustMatBGR(); + } + else + { + m_clahe.reset(); + } + } + + /// + bool empty() const noexcept + { + return m_mBGR.empty(); + } + + /// + const cv::Mat& GetMatBGR() const noexcept + { + return m_mBGR; + } + /// + cv::Mat& GetMatBGRWrite() + { + m_umBGRGenerated = false; + m_mGrayGenerated = false; + m_umGrayGenerated = false; + return m_mBGR; + } + /// + bool AdjustMatBGR() + { + if (m_mBGR.empty() || m_clahe.empty()) + return false; + + cv::cvtColor(m_mBGR, m_mHSV, cv::COLOR_BGR2HSV); + cv::split(m_mHSV, m_chansHSV); + m_clahe->apply(m_chansHSV[2], m_chansHSV[2]); + cv::merge(m_chansHSV, m_mHSV); + cv::cvtColor(m_mHSV, m_mBGR, cv::COLOR_HSV2BGR); + + //std::cout << "AdjustMatBGR()" << std::endl; + + return true; + } + /// + const cv::Mat& GetMatGray() + { + if (m_mGray.empty() || !m_mGrayGenerated) + { + if (m_umGray.empty() || !m_umGrayGenerated) + cv::cvtColor(m_mBGR, m_mGray, cv::COLOR_BGR2GRAY); + else + m_mGray = m_umGray.getMat(cv::ACCESS_READ); + m_mGrayGenerated = true; + } + return m_mGray; + } + /// + const cv::UMat& GetUMatBGR() + { + std::thread::id lastThreadID = std::this_thread::get_id(); + + if (m_umBGR.empty() || !m_umBGRGenerated || lastThreadID != m_umBGRThreadID) + { + m_umBGR = m_mBGR.getUMat(cv::ACCESS_READ); + m_umBGRGenerated = true; + m_umBGRThreadID = lastThreadID; + } + return m_umBGR; + } + /// + const cv::UMat& GetUMatGray() + { + std::thread::id lastThreadID = std::this_thread::get_id(); + + if (m_umGray.empty() || !m_umGrayGenerated || lastThreadID != m_umGrayThreadID) + { + if (m_mGray.empty() || !m_mGrayGenerated) + { + if (m_umBGR.empty() || !m_umBGRGenerated || lastThreadID != m_umGrayThreadID) + cv::cvtColor(m_mBGR, m_umGray, cv::COLOR_BGR2GRAY); + else + cv::cvtColor(m_umBGR, m_umGray, cv::COLOR_BGR2GRAY); + } + else + { + m_umGray = m_mGray.getUMat(cv::ACCESS_READ); + } + m_umGrayGenerated = true; + m_umGrayThreadID = lastThreadID; + } + return m_umGray; + } private: - cv::Mat m_mBGR; - cv::Mat m_mGray; - cv::UMat m_umBGR; - cv::UMat m_umGray; - bool m_umBGRGenerated = false; - bool m_mGrayGenerated = false; - bool m_umGrayGenerated = false; - std::thread::id m_umBGRThreadID; - std::thread::id m_umGrayThreadID; + cv::Mat m_mBGR; + cv::Mat m_mGray; + cv::UMat m_umBGR; + cv::UMat m_umGray; + bool m_umBGRGenerated = false; + bool m_mGrayGenerated = false; + bool m_umGrayGenerated = false; + std::thread::id m_umBGRThreadID; + std::thread::id m_umGrayThreadID; + + cv::Ptr m_clahe; + cv::Mat m_mHSV; + std::vector m_chansHSV; }; /// @@ -255,68 +156,68 @@ class Frame /// struct FrameInfo { - /// - FrameInfo() - { - m_frames.reserve(m_batchSize); - m_regions.reserve(m_batchSize); - m_frameInds.reserve(m_batchSize); - } - /// - FrameInfo(size_t batchSize) - : m_batchSize(batchSize) - { - m_frames.reserve(m_batchSize); - m_regions.reserve(m_batchSize); - m_frameInds.reserve(m_batchSize); - } - - /// - void SetBatchSize(size_t batchSize) - { - m_batchSize = batchSize; - m_frames.reserve(m_batchSize); - m_regions.reserve(m_batchSize); - m_frameInds.reserve(m_batchSize); - } - - /// - void CleanRegions() - { - if (m_regions.size() != m_batchSize) - m_regions.resize(m_batchSize); - for (auto& regions : m_regions) - { - regions.clear(); - } - } - - /// - void CleanTracks() - { - if (m_tracks.size() != m_batchSize) - m_tracks.resize(m_batchSize); - for (auto& tracks : m_tracks) - { - tracks.clear(); - } - } - - std::vector m_frames; - std::vector m_regions; - std::vector> m_tracks; - std::vector m_frameInds; - - size_t m_batchSize = 1; - - int64 m_dt = 0; - - std::condition_variable m_cond; - std::mutex m_mutex; - bool m_captured = false; + /// + FrameInfo() + { + m_frames.reserve(m_batchSize); + m_regions.reserve(m_batchSize); + m_frameInds.reserve(m_batchSize); + } + /// + FrameInfo(size_t batchSize) + : m_batchSize(batchSize) + { + m_frames.reserve(m_batchSize); + m_regions.reserve(m_batchSize); + m_frameInds.reserve(m_batchSize); + } + + /// + void SetBatchSize(size_t batchSize) + { + m_batchSize = batchSize; + m_frames.reserve(m_batchSize); + m_regions.reserve(m_batchSize); + m_frameInds.reserve(m_batchSize); + } + + /// + void CleanRegions() + { + if (m_regions.size() != m_batchSize) + m_regions.resize(m_batchSize); + for (auto& regions : m_regions) + { + regions.clear(); + } + } + + /// + void CleanTracks() + { + if (m_tracks.size() != m_batchSize) + m_tracks.resize(m_batchSize); + for (auto& tracks : m_tracks) + { + tracks.clear(); + } + } + + std::vector m_frames; + std::vector m_regions; + std::vector> m_tracks; + std::vector m_frameInds; + std::vector m_frameTimeStamps; + + size_t m_batchSize = 1; + + int64 m_dt = 0; + + std::condition_variable m_cond; + std::mutex m_mutex; + std::atomic m_captured { false }; }; - /// /// \brief The VideoExample class /// @@ -336,10 +237,14 @@ class VideoExample protected: std::unique_ptr m_detector; - std::unique_ptr m_tracker; + std::unique_ptr m_tracker; - bool m_showLogs = true; + std::string m_showLogsLevel = "debug"; float m_fps = 25; + cv::Size m_frameSize; + int m_framesCount = 0; + + bool m_useContrastAdjustment = false; size_t m_batchSize = 1; @@ -347,6 +252,7 @@ class VideoExample int m_trackingTimeOut = 60000; ResultsLog m_resultsLog; + CVATAnnotationsGenerator m_cvatAnnotationsGenerator; static void CaptureAndDetect(VideoExample* thisPtr, std::atomic& stopCapture); @@ -357,12 +263,16 @@ class VideoExample void Tracking(FrameInfo& frame); virtual void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) = 0; + virtual void DrawTrack(cv::Mat frame, const TrackingObject& track, bool drawTrajectory, int framesCounter, const std::string& userLabel = ""); - void DrawTrack(cv::Mat frame, int resizeCoeff, const TrackingObject& track, bool drawTrajectory, int framesCounter); + TrackerSettings m_trackerSettings; + bool m_trackerSettingsLoaded = false; - TrackerSettings m_trackerSettings; - bool m_trackerSettingsLoaded = false; - bool ParseTrackerSettings(const std::string& settingsFile); + std::vector m_colors; + + std::shared_ptr m_consoleSink; + std::shared_ptr m_fileSink; + std::shared_ptr m_logger; private: std::vector m_tracks; @@ -371,13 +281,21 @@ class VideoExample bool m_isDetectorInitialized = false; std::string m_inFile; std::string m_outFile; +#if 0 + int m_fourcc = cv::VideoWriter::fourcc('h', '2', '6', '4'); +#else int m_fourcc = cv::VideoWriter::fourcc('M', 'J', 'P', 'G'); +#endif int m_startFrame = 0; int m_endFrame = 0; int m_finishDelay = 0; - std::vector m_colors; + + time_point_t m_startTimeStamp; + bool m_useArchieveTime = true; FrameInfo m_frameInfo[2]; + + time_point_t GetNextTimeStamp(int framesCounter) const; bool OpenCapture(cv::VideoCapture& capture); bool WriteFrame(cv::VideoWriter& writer, const cv::Mat& frame); diff --git a/example/cvatAnnotationsGenerator.h b/example/cvatAnnotationsGenerator.h new file mode 100644 index 000000000..49f140a62 --- /dev/null +++ b/example/cvatAnnotationsGenerator.h @@ -0,0 +1,252 @@ +#pragma once + +#include +#include +#include "defines.h" + +/// +class CVATAnnotationsGenerator +{ +public: + /// + CVATAnnotationsGenerator(const std::string& annFileName) + : m_annFileName(annFileName) + { + } + + /// + bool NewDetects(int frameInd, const std::vector& tracks, size_t detectorInd) + { + if (m_annFileName.empty()) + return false; + + auto it = m_detects.find(frameInd); + if (it == m_detects.end()) + { + if (detectorInd == 0) + { + m_detects.emplace(frameInd, tracks); + } + else + { + std::vector tmpTracks = tracks; + for (auto& track : tmpTracks) + { + track.m_ID.m_val += detectorInd * DetectorIDRange; + } + m_detects.emplace(frameInd, tmpTracks); + } + + //it = m_detects.find(frameInd); + //std::cout << "New detects 1: Frame " << frameInd << ", detector ind " << detectorInd << std::endl; + //for (const auto& track : it->second) + //{ + // std::cout << "track " << track.m_ID.ID2Str() << ", type = " << track.m_type << ", rect = " << track.m_rrect.boundingRect() << std::endl; + //} + } + else + { + if (detectorInd == 0) + { + it->second.insert(it->second.end(), tracks.begin(), tracks.end()); + } + else + { + std::vector tmpTracks = tracks; + for (auto& track : tmpTracks) + { + track.m_ID.m_val += detectorInd * DetectorIDRange; + } + it->second.insert(it->second.end(), tmpTracks.begin(), tmpTracks.end()); + } + + //std::cout << "New detects 2: Frame " << frameInd << ", detector ind " << detectorInd << std::endl; + //for (const auto& track : it->second) + //{ + // std::cout << "track " << track.m_ID.ID2Str() << ", type = " << track.m_type << ", rect = " << track.m_rrect.boundingRect() << std::endl; + //} + } + + return true; + } + + /// + bool Save(const std::string& videoFileName, int framesCount, cv::Size frameSize) + { + //PrintDetects(); + + bool res = !m_annFileName.empty(); + if (!res) + return res; + + std::ofstream annFile(m_annFileName); + res = annFile.is_open(); + if (!res) + return res; + + WriteMeta(annFile, videoFileName, framesCount, frameSize); + + auto WritePoly = [&](int frameInd, const cv::RotatedRect& rrect) + { + cv::Point2f pts[4]; + rrect.points(pts); + annFile << " \n"; + annFile << " \n"; + }; + + std::unordered_set writedTracks; + + for (auto itStartFrame = std::begin(m_detects); itStartFrame != std::end(m_detects); ++itStartFrame) + { + for (const auto& track : itStartFrame->second) + { + if (writedTracks.find(track.m_ID.m_val) != std::end(writedTracks)) + continue; + writedTracks.emplace(track.m_ID.m_val); + + annFile << " \n"; + WritePoly(itStartFrame->first, track.m_rrect); + + //std::cout << "track " << track.m_ID.ID2Str() << ", type = " << track.m_type << ", rect = " << track.m_rrect.boundingRect() << std::endl; + + auto itNextFrame = itStartFrame; + for (++itNextFrame; itNextFrame != std::end(m_detects); ++itNextFrame) + { + for (const auto& subTrack : itNextFrame->second) + { + if (track.m_ID.m_val != subTrack.m_ID.m_val) + continue; + + WritePoly(itNextFrame->first, subTrack.m_rrect); + + //std::cout << "subTrack " << subTrack.m_ID.ID2Str() << ", type = " << subTrack.m_type << ", rect = " << subTrack.m_rrect.boundingRect() << "\n"; + break; + } + } + + annFile << " \n"; + } + } + + FinalMeta(annFile); + + return res; + } + +private: + std::string m_annFileName; + + std::map> m_detects; + + static constexpr track_id_t::value_type DetectorIDRange = 1000000000; + + /// + void PrintDetects() + { + std::cout << "Print detects:\n"; + for (auto it = m_detects.begin(); it != m_detects.end(); ++it) + { + std::cout << "Frame " << it->first << ": \n"; + for (const auto track : it->second) + { + std::cout << "track " << track.m_ID.ID2Str() << ", type = " << track.m_type << ", rect = " << track.m_rrect.boundingRect() << "\n"; + } + } + std::cout.flush(); + } + + /// + template + std::string Time2Str(TimePoint now) + { + // get number of milliseconds for the current second + // (remainder after division into seconds) + auto ms = std::chrono::duration_cast(now.time_since_epoch()) % 1000; + + // convert to std::time_t in order to convert to std::tm (broken time) + auto timer = std::chrono::system_clock::to_time_t(now); + + // convert to broken time +#ifdef _WIN32 + std::tm bt; + localtime_s(&bt, &timer); +#else + std::tm bt = *std::localtime(&timer); +#endif + + std::ostringstream oss; + oss << std::put_time(&bt, "%Y-%m-%d %H:%M:%S"); + oss << '.' << std::setfill('0') << std::setw(3) << ms.count() << "+00:00"; + + return oss.str(); + } + + /// + void WriteMeta(std::ofstream& annFile, const std::string& videoFileName, int framesCount, cv::Size frameSize) + { + std::string currTime = Time2Str(std::chrono::system_clock::now()); + + annFile << "\n"; + annFile << "\n"; + annFile << " 1.1\n"; + annFile << " \n"; + annFile << " \n"; + annFile << " 777\n"; + annFile << " " << videoFileName << "\n"; + annFile << " " << framesCount << "\n"; + annFile << " interpolation\n"; + annFile << " 5\n"; + annFile << " \n"; + annFile << " " << currTime << "\n"; + annFile << " " << currTime << "\n"; + annFile << " default\n"; + annFile << " " << 0 << "\n"; + annFile << " " << (framesCount - 1) << "\n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " 777\n"; + annFile << " " << 0 << "\n"; + annFile << " " << (framesCount - 1) << "\n"; + annFile << " http://127.0.0.1:8080/?id=777\n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " user\n"; + annFile << " user@de-id.ca\n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " \n"; + annFile << " " << frameSize.width << "\n"; + annFile << " " << frameSize.height << "\n"; + annFile << " \n"; + annFile << " \n"; + annFile << " " << currTime << "\n"; + annFile << " " << videoFileName << "\n"; + annFile << " \n"; + } + + /// + void FinalMeta(std::ofstream& annFile) + { + annFile << "\n"; + } +}; diff --git a/example/examples.h b/example/examples.h index c528b012d..bc6db8a1f 100644 --- a/example/examples.h +++ b/example/examples.h @@ -6,402 +6,17 @@ #include "VideoExample.h" -/// -/// \brief DrawFilledRect -/// -void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, int alpha) -{ - if (alpha) - { - const int alpha_1 = 255 - alpha; - const int nchans = frame.channels(); - int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; - for (int y = rect.y; y < rect.y + rect.height; ++y) - { - uchar* ptr = frame.ptr(y) + nchans * rect.x; - for (int x = rect.x; x < rect.x + rect.width; ++x) - { - for (int i = 0; i < nchans; ++i) - { - ptr[i] = cv::saturate_cast((alpha_1 * ptr[i] + alpha * color[i]) / 255); - } - ptr += nchans; - } - } - } - else - { - cv::rectangle(frame, rect, cl, cv::FILLED); - } -} - -/// -/// \brief The MotionDetectorExample class -/// -class MotionDetectorExample : public VideoExample -{ -public: - MotionDetectorExample(const cv::CommandLineParser& parser) - : - VideoExample(parser), - m_minObjWidth(10) - { - } - -protected: - /// - /// \brief InitDetector - /// \param frame - /// \return - /// - bool InitDetector(cv::UMat frame) - { - m_minObjWidth = frame.cols / 20; - - config_t config; - config.emplace("useRotatedRect", "0"); - - tracking::Detectors detectorType = tracking::Detectors::Motion_VIBE; - - switch (detectorType) - { - case tracking::Detectors::Motion_VIBE: - config.emplace("samples", "20"); - config.emplace("pixelNeighbor", "1"); - config.emplace("distanceThreshold", "20"); - config.emplace("matchingThreshold", "3"); - config.emplace("updateFactor", "16"); - break; - case tracking::Detectors::Motion_MOG: - config.emplace("history", std::to_string(cvRound(50 * m_minStaticTime * m_fps))); - config.emplace("nmixtures", "3"); - config.emplace("backgroundRatio", "0.7"); - config.emplace("noiseSigma", "0"); - break; - case tracking::Detectors::Motion_GMG: - config.emplace("initializationFrames", "50"); - config.emplace("decisionThreshold", "0.7"); - break; - case tracking::Detectors::Motion_CNT: - config.emplace("minPixelStability", "15"); - config.emplace("maxPixelStability", std::to_string(cvRound(20 * m_minStaticTime * m_fps))); - config.emplace("useHistory", "1"); - config.emplace("isParallel", "1"); - break; - case tracking::Detectors::Motion_SuBSENSE: - break; - case tracking::Detectors::Motion_LOBSTER: - break; - case tracking::Detectors::Motion_MOG2: - config.emplace("history", std::to_string(cvRound(20 * m_minStaticTime * m_fps))); - config.emplace("varThreshold", "10"); - config.emplace("detectShadows", "1"); - break; - } - m_detector = std::unique_ptr(CreateDetector(detectorType, config, frame)); - - if (m_detector.get()) - { - m_detector->SetMinObjectSize(cv::Size(m_minObjWidth, m_minObjWidth)); - return true; - } - return false; - } - /// - /// \brief InitTracker - /// \param frame - /// \return - /// - bool InitTracker(cv::UMat frame) - { - if (!m_trackerSettingsLoaded) - { - m_trackerSettings.SetDistance(tracking::DistRects); - m_trackerSettings.m_kalmanType = tracking::KalmanLinear; - m_trackerSettings.m_filterGoal = tracking::FilterCenter; - m_trackerSettings.m_lostTrackType = tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - m_trackerSettings.m_matchType = tracking::MatchHungrian; - m_trackerSettings.m_useAcceleration = false; // Use constant acceleration motion model - m_trackerSettings.m_dt = m_trackerSettings.m_useAcceleration ? 0.05f : 0.2f; // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = 0.95f; // Distance threshold between region and object on two frames -#if 0 - m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; -#else - m_trackerSettings.m_minAreaRadiusPix = -1.f; -#endif - m_trackerSettings.m_minAreaRadiusK = 0.8f; - - m_trackerSettings.m_useAbandonedDetection = true; - if (m_trackerSettings.m_useAbandonedDetection) - { - m_trackerSettings.m_minStaticTime = m_minStaticTime; - m_trackerSettings.m_maxStaticTime = 10; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(m_trackerSettings.m_minStaticTime * m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = 2 * m_trackerSettings.m_maximumAllowedSkippedFrames; // Maximum trace length - } - else - { - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(2 * m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(4 * m_fps); // Maximum trace length - } - } - - m_tracker = std::make_unique(m_trackerSettings); - return true; - } - - /// - /// \brief DrawData - /// \param frame - /// \param tracks - /// \param framesCounter - /// \param currTime - /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) - { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; - - for (const auto& track : tracks) - { - if (track.m_isStatic) - { - DrawTrack(frame, 1, track, false, framesCounter); - - std::string label = "abandoned " + std::to_string(track.m_ID); - int baseLine = 0; - cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); - - cv::Rect brect = track.m_rrect.boundingRect(); - if (brect.x < 0) - { - brect.width = std::min(brect.width, frame.cols - 1); - brect.x = 0; - } - else if (brect.x + brect.width >= frame.cols) - { - brect.x = std::max(0, frame.cols - brect.width - 1); - brect.width = std::min(brect.width, frame.cols - 1); - } - if (brect.y - labelSize.height < 0) - { - brect.height = std::min(brect.height, frame.rows - 1); - brect.y = labelSize.height; - } - else if (brect.y + brect.height >= frame.rows) - { - brect.y = std::max(0, frame.rows - brect.height - 1); - brect.height = std::min(brect.height, frame.rows - 1); - } - DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(255, 0, 255), 150); - cv::putText(frame, label, brect.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); - } - else - { - if (track.IsRobust(cvRound(m_fps / 4), // Minimal trajectory size - 0.7f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height - DrawTrack(frame, 1, track, true, framesCounter); - } - } - m_detector->CalcMotionMap(frame); - } - -private: - int m_minObjWidth = 8; - int m_minStaticTime = 5; -}; - -// ---------------------------------------------------------------------- - -/// -/// \brief The FaceDetectorExample class -/// -class FaceDetectorExample : public VideoExample -{ -public: - FaceDetectorExample(const cv::CommandLineParser& parser) - : - VideoExample(parser) - { - } - -protected: - /// - /// \brief InitDetector - /// \param frame - /// \return - /// - bool InitDetector(cv::UMat frame) - { -#ifdef _WIN32 - std::string pathToModel = "../../data/"; -#else - std::string pathToModel = "../data/"; -#endif - - config_t config; - config.emplace("cascadeFileName", pathToModel + "haarcascade_frontalface_alt2.xml"); - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::Face_HAAR, config, frame)); - if (m_detector.get()) - { - m_detector->SetMinObjectSize(cv::Size(frame.cols / 20, frame.rows / 20)); - return true; - } - return false; - } - /// - /// \brief InitTracker - /// \param frame - /// \return - /// - bool InitTracker(cv::UMat frame) - { - if (!m_trackerSettingsLoaded) - { - m_trackerSettings.SetDistance(tracking::DistJaccard); - m_trackerSettings.m_kalmanType = tracking::KalmanUnscented; - m_trackerSettings.m_filterGoal = tracking::FilterRect; - m_trackerSettings.m_lostTrackType = tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - m_trackerSettings.m_matchType = tracking::MatchHungrian; - m_trackerSettings.m_dt = 0.3f; // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = 0.1f; // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames - m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(m_fps / 2); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(5 * m_fps); // Maximum trace length - } - m_tracker = std::make_unique(m_trackerSettings); - - return true; - } - - /// - /// \brief DrawData - /// \param frame - /// \param tracks - /// \param framesCounter - /// \param currTime - /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) - { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; - - for (const auto& track : tracks) - { - if (track.IsRobust(8, // Minimal trajectory size - 0.4f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height - DrawTrack(frame, 1, track, true, framesCounter); - } - m_detector->CalcMotionMap(frame); - } -}; - -// ---------------------------------------------------------------------- - -/// -/// \brief The PedestrianDetectorExample class -/// -class PedestrianDetectorExample : public VideoExample -{ -public: - PedestrianDetectorExample(const cv::CommandLineParser& parser) - : - VideoExample(parser) - { - } - -protected: - /// - /// \brief InitDetector - /// \param frame - /// \return - /// - bool InitDetector(cv::UMat frame) - { - tracking::Detectors detectorType = tracking::Detectors::Pedestrian_C4; // tracking::Detectors::Pedestrian_HOG; - -#ifdef _WIN32 - std::string pathToModel = "../../data/"; -#else - std::string pathToModel = "../data/"; -#endif - - config_t config; - config.emplace("detectorType", (detectorType == tracking::Pedestrian_HOG) ? "HOG" : "C4"); - config.emplace("cascadeFileName1", pathToModel + "combined.txt.model"); - config.emplace("cascadeFileName2", pathToModel + "combined.txt.model_"); - m_detector = std::unique_ptr(CreateDetector(detectorType, config, frame)); - if (m_detector.get()) - { - m_detector->SetMinObjectSize(cv::Size(frame.cols / 20, frame.rows / 20)); - return true; - } - return false; - } - /// - /// \brief InitTracker - /// \param frame - /// \return - /// - bool InitTracker(cv::UMat frame) - { - if (!m_trackerSettingsLoaded) - { - m_trackerSettings.SetDistance(tracking::DistRects); - m_trackerSettings.m_kalmanType = tracking::KalmanLinear; - m_trackerSettings.m_filterGoal = tracking::FilterRect; - m_trackerSettings.m_lostTrackType = tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - m_trackerSettings.m_matchType = tracking::MatchHungrian; - m_trackerSettings.m_dt = 0.3f; // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = 0.1f; // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames - m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(5 * m_fps); // Maximum trace length - } - m_tracker = std::make_unique(m_trackerSettings); - - return true; - } - - /// - /// \brief DrawData - /// \param frame - /// \param tracks - /// \param framesCounter - /// \param currTime - /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) - { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; - - for (const auto& track : tracks) - { - if (track.IsRobust(cvRound(m_fps / 2), // Minimal trajectory size - 0.4f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height - DrawTrack(frame, 1, track, true, framesCounter); - } - m_detector->CalcMotionMap(frame); - } -}; // ---------------------------------------------------------------------- /// /// \brief The OpenCVDNNExample class /// -class OpenCVDNNExample : public VideoExample +class OpenCVDNNExample final : public VideoExample { public: OpenCVDNNExample(const cv::CommandLineParser& parser) - : - VideoExample(parser) + : VideoExample(parser) { } @@ -411,69 +26,90 @@ class OpenCVDNNExample : public VideoExample /// \param frame /// \return /// - bool InitDetector(cv::UMat frame) + bool InitDetector(cv::UMat frame) override { config_t config; - + if (!m_trackerSettingsLoaded) + { #ifdef _WIN32 - std::string pathToModel = "../../data/"; + std::string pathToModel = "../../data/"; #else - std::string pathToModel = "../data/"; + std::string pathToModel = "../data/"; #endif - enum class NNModels - { - TinyYOLOv3 = 0, - YOLOv3, - YOLOv4, - TinyYOLOv4, - MobileNetSSD - }; - NNModels usedModel = NNModels::MobileNetSSD; - switch (usedModel) + enum class NNModels + { + TinyYOLOv3 = 0, + YOLOv3, + YOLOv4, + TinyYOLOv4, + MobileNetSSD + }; + NNModels usedModel = NNModels::MobileNetSSD; + switch (usedModel) + { + case NNModels::TinyYOLOv3: + config.emplace("modelConfiguration", pathToModel + "yolov3-tiny.cfg"); + config.emplace("modelBinary", pathToModel + "yolov3-tiny.weights"); + config.emplace("classNames", pathToModel + "coco.names"); + config.emplace("confidenceThreshold", "0.5"); + break; + + case NNModels::YOLOv3: + config.emplace("modelConfiguration", pathToModel + "yolov3.cfg"); + config.emplace("modelBinary", pathToModel + "yolov3.weights"); + config.emplace("classNames", pathToModel + "coco.names"); + config.emplace("confidenceThreshold", "0.7"); + break; + + case NNModels::YOLOv4: + config.emplace("modelConfiguration", pathToModel + "yolov4.cfg"); + config.emplace("modelBinary", pathToModel + "yolov4.weights"); + config.emplace("classNames", pathToModel + "coco.names"); + config.emplace("confidenceThreshold", "0.5"); + break; + + case NNModels::TinyYOLOv4: + config.emplace("modelConfiguration", pathToModel + "yolov4-tiny.cfg"); + config.emplace("modelBinary", pathToModel + "yolov4-tiny.weights"); + config.emplace("classNames", pathToModel + "coco.names"); + config.emplace("confidenceThreshold", "0.5"); + break; + + case NNModels::MobileNetSSD: + config.emplace("modelConfiguration", pathToModel + "MobileNetSSD_deploy.prototxt"); + config.emplace("modelBinary", pathToModel + "MobileNetSSD_deploy.caffemodel"); + config.emplace("classNames", pathToModel + "voc.names"); + config.emplace("confidenceThreshold", "0.5"); + break; + } + config.emplace("maxCropRatio", "-1"); + + config.emplace("dnnTarget", "DNN_TARGET_CPU"); + config.emplace("dnnBackend", "DNN_BACKEND_DEFAULT"); + } + else { - case NNModels::TinyYOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3-tiny.weights"); - config.emplace("classNames", pathToModel + "coco.names"); - config.emplace("confidenceThreshold", "0.5"); - break; - - case NNModels::YOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3.weights"); - config.emplace("classNames", pathToModel + "coco.names"); - config.emplace("confidenceThreshold", "0.7"); - break; - - case NNModels::YOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4.weights"); - config.emplace("classNames", pathToModel + "coco.names"); - config.emplace("confidenceThreshold", "0.5"); - break; - - case NNModels::TinyYOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4-tiny.weights"); - config.emplace("classNames", pathToModel + "coco.names"); - config.emplace("confidenceThreshold", "0.5"); - break; - - case NNModels::MobileNetSSD: - config.emplace("modelConfiguration", pathToModel + "MobileNetSSD_deploy.prototxt"); - config.emplace("modelBinary", pathToModel + "MobileNetSSD_deploy.caffemodel"); - config.emplace("classNames", pathToModel + "voc.names"); - config.emplace("confidenceThreshold", "0.5"); - break; + config.emplace("modelConfiguration", m_trackerSettings.m_nnConfig); + config.emplace("modelBinary", m_trackerSettings.m_nnWeights); + config.emplace("confidenceThreshold", std::to_string(m_trackerSettings.m_confidenceThreshold)); + config.emplace("classNames", m_trackerSettings.m_classNames); + config.emplace("maxCropRatio", std::to_string(m_trackerSettings.m_maxCropRatio)); + config.emplace("maxBatch", std::to_string(m_trackerSettings.m_maxBatch)); + config.emplace("gpuId", std::to_string(m_trackerSettings.m_gpuId)); + config.emplace("net_type", m_trackerSettings.m_netType); + config.emplace("inference_precision", m_trackerSettings.m_inferencePrecision); + config.emplace("video_memory", std::to_string(m_trackerSettings.m_maxVideoMemory)); + config.emplace("dnnTarget", m_trackerSettings.m_dnnTarget); + config.emplace("dnnBackend", m_trackerSettings.m_dnnBackend); + config.emplace("inWidth", std::to_string(m_trackerSettings.m_inputSize.width)); + config.emplace("inHeight", std::to_string(m_trackerSettings.m_inputSize.height)); + + for (auto wname : m_trackerSettings.m_whiteList) + { + config.emplace("white_list", wname); + } } - config.emplace("maxCropRatio", "-1"); - - config.emplace("dnnTarget", "DNN_TARGET_CPU"); - config.emplace("dnnBackend", "DNN_BACKEND_DEFAULT"); - - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::DNN_OCV, config, frame)); - if (m_detector.get()) - m_detector->SetMinObjectSize(cv::Size(frame.cols / 40, frame.rows / 40)); + m_detector = BaseDetector::CreateDetector(tracking::Detectors::DNN_OCV, config, frame); return (m_detector.get() != nullptr); } @@ -482,29 +118,29 @@ class OpenCVDNNExample : public VideoExample /// \param frame /// \return /// - bool InitTracker(cv::UMat frame) + bool InitTracker(cv::UMat frame) override { if (!m_trackerSettingsLoaded) { - m_trackerSettings.SetDistance(tracking::DistCenters); - m_trackerSettings.m_kalmanType = tracking::KalmanLinear; - m_trackerSettings.m_filterGoal = tracking::FilterRect; - m_trackerSettings.m_lostTrackType = tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - m_trackerSettings.m_matchType = tracking::MatchHungrian; - m_trackerSettings.m_useAcceleration = false; // Use constant acceleration motion model - m_trackerSettings.m_dt = m_trackerSettings.m_useAcceleration ? 0.05f : 0.4f; // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames + m_trackerSettings.SetDistance(tracking::DistCenters); + m_trackerSettings.m_kalmanType = tracking::KalmanLinear; + m_trackerSettings.m_filterGoal = tracking::FilterRect; + m_trackerSettings.m_lostTrackType = tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect + m_trackerSettings.m_matchType = tracking::MatchHungrian; + m_trackerSettings.m_useAcceleration = false; // Use constant acceleration motion model + m_trackerSettings.m_dt = m_trackerSettings.m_useAcceleration ? 0.05f : 0.4f; // Delta time for Kalman filter + m_trackerSettings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter + m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames #if 0 - m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; + m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; #else - m_trackerSettings.m_minAreaRadiusPix = -1.f; + m_trackerSettings.m_minAreaRadiusPix = -1.f; #endif - m_trackerSettings.m_minAreaRadiusK = 0.8f; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(2 * m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(2 * m_fps); // Maximum trace length + m_trackerSettings.m_minAreaRadiusK = 0.8f; + m_trackerSettings.m_maximumAllowedLostTime = 2.; // Maximum allowed skipped frames + m_trackerSettings.m_maxTraceLength = 2.; // Maximum trace length } - m_tracker = std::make_unique(m_trackerSettings); + m_tracker = BaseTracker::CreateTracker(m_trackerSettings, m_fps); return true; } @@ -515,10 +151,9 @@ class OpenCVDNNExample : public VideoExample /// \param framesCounter /// \param currTime /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) + void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); for (const auto& track : tracks) { @@ -526,14 +161,14 @@ class OpenCVDNNExample : public VideoExample 0.5f, // Minimal ratio raw_trajectory_points / trajectory_lenght cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height { - DrawTrack(frame, 1, track, false, framesCounter); + DrawTrack(frame, track, false, framesCounter); std::stringstream label; label << TypeConverter::Type2Str(track.m_type) << std::setprecision(2) << ": " << track.m_confidence; int baseLine = 0; - cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_TRIPLEX, 0.5, 1, &baseLine); cv::Rect brect = track.m_rrect.boundingRect(); if (brect.x < 0) @@ -556,8 +191,8 @@ class OpenCVDNNExample : public VideoExample brect.y = std::max(0, frame.rows - brect.height - 1); brect.height = std::min(brect.height, frame.rows - 1); } - DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); - cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + //DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); + //cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_TRIPLEX, 0.5, cv::Scalar(0, 0, 0)); } } @@ -565,113 +200,242 @@ class OpenCVDNNExample : public VideoExample } }; -#ifdef BUILD_YOLO_LIB // ---------------------------------------------------------------------- +#ifdef BUILD_ONNX_TENSORRT + /// -/// \brief The YoloDarknetExample class +/// \brief The ONNXTensorRTExample class /// -class YoloDarknetExample : public VideoExample +class ONNXTensorRTExample final : public VideoExample { public: - YoloDarknetExample(const cv::CommandLineParser& parser) - : - VideoExample(parser) + ONNXTensorRTExample(const cv::CommandLineParser& parser) + : VideoExample(parser) { } protected: - /// - /// \brief InitDetector - /// \param frame - /// \return - /// - bool InitDetector(cv::UMat frame) + /// + /// \brief InitDetector + /// \param frame + /// \return + /// + bool InitDetector(cv::UMat frame) override { config_t config; - + if (!m_trackerSettingsLoaded) + { #ifdef _WIN32 - std::string pathToModel = "../../data/"; + std::string pathToModel = "../../data/"; #else - std::string pathToModel = "../data/"; + std::string pathToModel = "../data/"; #endif - size_t maxBatch = 1; - enum class YOLOModels - { - TinyYOLOv3 = 0, - YOLOv3, - YOLOv4, - TinyYOLOv4, - ScaledYOLOv4 - }; - YOLOModels usedModel = YOLOModels::ScaledYOLOv4; - switch (usedModel) - { - case YOLOModels::TinyYOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3-tiny.weights"); - config.emplace("confidenceThreshold", "0.5"); - break; - - case YOLOModels::YOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3.weights"); - config.emplace("confidenceThreshold", "0.7"); - break; - - case YOLOModels::YOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4.weights"); - config.emplace("confidenceThreshold", "0.5"); - break; - - case YOLOModels::TinyYOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4-tiny.weights"); - config.emplace("confidenceThreshold", "0.5"); - maxBatch = 4; - break; - - case YOLOModels::ScaledYOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4-csp.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4-csp.weights"); - config.emplace("confidenceThreshold", "0.5"); - maxBatch = 2; - break; - } - if (maxBatch < m_batchSize) - maxBatch = m_batchSize; - config.emplace("maxBatch", std::to_string(m_batchSize)); - config.emplace("classNames", pathToModel + "coco.names"); - config.emplace("maxCropRatio", "-1"); - - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_person)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_car)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bicycle)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_motorbike)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bus)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_truck)); - - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::Yolo_Darknet, config, frame)); - if (m_detector.get()) + size_t maxBatch = 1; + enum class YOLOModels + { + TinyYOLOv3 = 0, + YOLOv3, + YOLOv4, + TinyYOLOv4, + YOLOv5, + YOLOv6, + YOLOv7, + YOLOv7Mask, + YOLOv8, + YOLOV8_OBB, + YOLOv8Mask, + YOLOv9, + YOLOv10, + YOLOv11, + YOLOv11_OBB, + YOLOv11Mask, + YOLOv12 + }; + YOLOModels usedModel = YOLOModels::YOLOv9; + switch (usedModel) + { + case YOLOModels::TinyYOLOv3: + config.emplace("modelConfiguration", pathToModel + "yolov3-tiny.cfg"); + config.emplace("modelBinary", pathToModel + "yolov3-tiny.weights"); + config.emplace("confidenceThreshold", "0.5"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV3"); + maxBatch = 4; + config.emplace("maxCropRatio", "2"); + break; + + case YOLOModels::YOLOv3: + config.emplace("modelConfiguration", pathToModel + "yolov3.cfg"); + config.emplace("modelBinary", pathToModel + "yolov3.weights"); + config.emplace("confidenceThreshold", "0.7"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV3"); + maxBatch = 2; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv4: + config.emplace("modelConfiguration", pathToModel + "yolov4.cfg"); + config.emplace("modelBinary", pathToModel + "yolov4.weights"); + config.emplace("confidenceThreshold", "0.4"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV4"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::TinyYOLOv4: + config.emplace("modelConfiguration", pathToModel + "yolov4-tiny.cfg"); + config.emplace("modelBinary", pathToModel + "yolov4-tiny.weights"); + config.emplace("confidenceThreshold", "0.5"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV4_TINY"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv5: + config.emplace("modelConfiguration", pathToModel + "yolov5s.cfg"); + config.emplace("modelBinary", pathToModel + "yolov5s.weights"); + config.emplace("confidenceThreshold", "0.5"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV5"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv6: + config.emplace("modelConfiguration", pathToModel + "yolov6s.onnx"); + config.emplace("modelBinary", pathToModel + "yolov6s.onnx"); + config.emplace("confidenceThreshold", "0.5"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV6"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv7: + config.emplace("modelConfiguration", pathToModel + "yolov7.onnx"); + config.emplace("modelBinary", pathToModel + "yolov7.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV7"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv7Mask: + config.emplace("modelConfiguration", pathToModel + "yolov7-mask.onnx"); + config.emplace("modelBinary", pathToModel + "yolov7-mask.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV7Mask"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv8: + config.emplace("modelConfiguration", pathToModel + "yolov8s.onnx"); + config.emplace("modelBinary", pathToModel + "yolov8s.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV8"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOV8_OBB: + config.emplace("modelConfiguration", pathToModel + "yolov8s-obb.onnx"); + config.emplace("modelBinary", pathToModel + "yolov8s-obb.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP16"); + config.emplace("net_type", "YOLOV8_OBB"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv8Mask: + config.emplace("modelConfiguration", pathToModel + "yolov8s-seg.onnx"); + config.emplace("modelBinary", pathToModel + "yolov8s-seg.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV8Mask"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + + case YOLOModels::YOLOv9: + config.emplace("modelConfiguration", pathToModel + "yolov9-c.onnx"); + config.emplace("modelBinary", pathToModel + "yolov9-c.onnx"); + config.emplace("confidenceThreshold", "0.2"); + config.emplace("inference_precision", "FP32"); + config.emplace("net_type", "YOLOV9"); + maxBatch = 1; + config.emplace("maxCropRatio", "-1"); + break; + } + if (maxBatch < m_batchSize) + maxBatch = m_batchSize; + config.emplace("maxBatch", std::to_string(maxBatch)); + config.emplace("classNames", pathToModel + "coco.names"); + + //config.emplace("white_list", "person"); + //config.emplace("white_list", "car"); + //config.emplace("white_list", "bicycle"); + //config.emplace("white_list", "motorbike"); + //config.emplace("white_list", "bus"); + //config.emplace("white_list", "truck"); + } + else { - m_detector->SetMinObjectSize(cv::Size(frame.cols / 40, frame.rows / 40)); - return true; + config.emplace("modelConfiguration", m_trackerSettings.m_nnConfig); + config.emplace("modelBinary", m_trackerSettings.m_nnWeights); + config.emplace("confidenceThreshold", std::to_string(m_trackerSettings.m_confidenceThreshold)); + config.emplace("classNames", m_trackerSettings.m_classNames); + config.emplace("maxCropRatio", std::to_string(m_trackerSettings.m_maxCropRatio)); + config.emplace("maxBatch", std::to_string(m_trackerSettings.m_maxBatch)); + config.emplace("gpuId", std::to_string(m_trackerSettings.m_gpuId)); + config.emplace("net_type", m_trackerSettings.m_netType); + config.emplace("inference_precision", m_trackerSettings.m_inferencePrecision); + config.emplace("video_memory", std::to_string(m_trackerSettings.m_maxVideoMemory)); + + for (auto wname : m_trackerSettings.m_whiteList) + { + config.emplace("white_list", wname); + } + + m_logger->info("YoloTensorRTExample:"); + m_logger->info("modelConfiguration: {}", m_trackerSettings.m_nnConfig); + m_logger->info("modelBinary: {}", m_trackerSettings.m_nnWeights); + m_logger->info("confidenceThreshold: {}", m_trackerSettings.m_confidenceThreshold); + m_logger->info("classNames: {}", m_trackerSettings.m_classNames); + m_logger->info("maxCropRatio: {}", m_trackerSettings.m_maxCropRatio); + m_logger->info("maxBatch: {}", m_trackerSettings.m_maxBatch); + m_logger->info("gpuId: {}", m_trackerSettings.m_gpuId); + m_logger->info("net_type: {}", m_trackerSettings.m_netType); + m_logger->info("inference_precision: {}", m_trackerSettings.m_inferencePrecision); + m_logger->info("video_memory: {}", m_trackerSettings.m_maxVideoMemory); + for (auto wname : m_trackerSettings.m_whiteList) + { + m_logger->info("white name: {}", wname); + } } - return false; - } - - /// - /// \brief InitTracker - /// \param frame - /// \return - /// - bool InitTracker(cv::UMat frame) + + m_detector = BaseDetector::CreateDetector(tracking::Detectors::ONNX_TensorRT, config, frame); + return (m_detector.get() != nullptr); + } + + /// + /// \brief InitTracker + /// \param frame + /// \return + /// + bool InitTracker(cv::UMat frame) override { if (!m_trackerSettingsLoaded) { - bool useDeepSORT = false; - + bool useDeepSORT = true; if (useDeepSORT) { #ifdef _WIN32 @@ -680,263 +444,44 @@ class YoloDarknetExample : public VideoExample std::string pathToModel = "../data/"; #endif - m_trackerSettings.m_embeddings.emplace_back(pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.xml", - pathToModel + "open_model_zoo/person-reidentification-retail-0286/FP16-INT8/person-reidentification-retail-0286.bin", - cv::Size(128, 256), - std::vector{ ObjectTypes::obj_person }); - -#if 0 - m_trackerSettings.m_embeddings.emplace_back("", - pathToModel + "open_model_zoo/vehicle-reid-0001/osnet_ain_x1_0_vehicle_reid.onnx", - cv::Size(208, 208), - std::vector{ ObjectTypes::obj_car, ObjectTypes::obj_bus, ObjectTypes::obj_truck }); -#endif + m_trackerSettings.m_embeddings.emplace_back(pathToModel + "reid/osnet_x0_25_msmt17.onnx", pathToModel + "reid/osnet_x0_25_msmt17.onnx", + cv::Size(128, 256), + std::vector{ TypeConverter::Str2Type("person"), TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), TypeConverter::Str2Type("truck"), TypeConverter::Str2Type("vehicle") }); std::array distType{ 0.f, // DistCenters 0.f, // DistRects 0.5f, // DistJaccard 0.f, // DistHist - 0.5f // DistFeatureCos + 0.5f, // DistFeatureCos + 0.f // DistMahalanobis }; if (!m_trackerSettings.SetDistances(distType)) - std::cerr << "SetDistances failed! Absolutly summ must be equal 1" << std::endl; + m_logger->error("SetDistances failed! Absolutly summ must be equal 1"); } else { m_trackerSettings.SetDistance(tracking::DistCenters); } - m_trackerSettings.m_kalmanType = tracking::KalmanLinear; - m_trackerSettings.m_filterGoal = tracking::FilterRect; - m_trackerSettings.m_lostTrackType = useDeepSORT ? tracking::TrackNone : tracking::TrackCSRT; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect - m_trackerSettings.m_matchType = tracking::MatchHungrian; - m_trackerSettings.m_useAcceleration = false; // Use constant acceleration motion model - m_trackerSettings.m_dt = m_trackerSettings.m_useAcceleration ? 0.05f : 0.4f; // Delta time for Kalman filter - m_trackerSettings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter - m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames -#if 0 - m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; -#else - m_trackerSettings.m_minAreaRadiusPix = -1.f; -#endif - m_trackerSettings.m_minAreaRadiusK = 0.8f; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(2 * m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(2 * m_fps); // Maximum trace length - } - m_trackerSettings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_bus, true); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_truck, true); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_bus, ObjectTypes::obj_truck, true); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_bicycle, true); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_motorbike, true); - - m_tracker = std::make_unique(m_trackerSettings); - - return true; - } - - /// - /// \brief DrawData - /// \param frame - /// \param tracks - /// \param framesCounter - /// \param currTime - /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) - { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; - - for (const auto& track : tracks) - { - if (track.IsRobust(3, // Minimal trajectory size - 0.5f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height - { - DrawTrack(frame, 1, track, false, framesCounter); - - - std::stringstream label; -#if 1 - label << TypeConverter::Type2Str(track.m_type) << std::setprecision(2) << ": " << track.m_confidence; -#else - label << TypeConverter::Type2Str(track.m_type) << " " << std::setprecision(2) << track.m_velocity << ": " << track.m_confidence; -#endif - int baseLine = 0; - cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); - - cv::Rect brect = track.m_rrect.boundingRect(); - if (brect.x < 0) - { - brect.width = std::min(brect.width, frame.cols - 1); - brect.x = 0; - } - else if (brect.x + brect.width >= frame.cols) - { - brect.x = std::max(0, frame.cols - brect.width - 1); - brect.width = std::min(brect.width, frame.cols - 1); - } - if (brect.y - labelSize.height < 0) - { - brect.height = std::min(brect.height, frame.rows - 1); - brect.y = labelSize.height; - } - else if (brect.y + brect.height >= frame.rows) - { - brect.y = std::max(0, frame.rows - brect.height - 1); - brect.height = std::min(brect.height, frame.rows - 1); - } - DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); - cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); - } - } - - //m_detector->CalcMotionMap(frame); - } -}; - -#endif - -#ifdef BUILD_YOLO_TENSORRT -// ---------------------------------------------------------------------- - -/// -/// \brief The YoloTensorRTExample class -/// -class YoloTensorRTExample : public VideoExample -{ -public: - YoloTensorRTExample(const cv::CommandLineParser& parser) - : - VideoExample(parser) - { - } - -protected: - /// - /// \brief InitDetector - /// \param frame - /// \return - /// - bool InitDetector(cv::UMat frame) - { - config_t config; - -#ifdef _WIN32 - std::string pathToModel = "../../data/"; -#else - std::string pathToModel = "../data/"; -#endif - size_t maxBatch = 1; - enum class YOLOModels - { - TinyYOLOv3 = 0, - YOLOv3, - YOLOv4, - TinyYOLOv4, - YOLOv5 - }; - YOLOModels usedModel = YOLOModels::YOLOv4; - switch (usedModel) - { - case YOLOModels::TinyYOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3-tiny.weights"); - config.emplace("confidenceThreshold", "0.5"); - config.emplace("inference_precison", "FP32"); - config.emplace("net_type", "YOLOV3_TINY"); - maxBatch = 4; - config.emplace("maxCropRatio", "2"); - break; - - case YOLOModels::YOLOv3: - config.emplace("modelConfiguration", pathToModel + "yolov3.cfg"); - config.emplace("modelBinary", pathToModel + "yolov3.weights"); - config.emplace("confidenceThreshold", "0.7"); - config.emplace("inference_precison", "FP32"); - config.emplace("net_type", "YOLOV3"); - maxBatch = 2; - config.emplace("maxCropRatio", "-1"); - break; - - case YOLOModels::YOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4.weights"); - config.emplace("confidenceThreshold", "0.8"); - config.emplace("inference_precison", "FP32"); - config.emplace("net_type", "YOLOV4"); - maxBatch = 1; - config.emplace("maxCropRatio", "-1"); - break; - - case YOLOModels::TinyYOLOv4: - config.emplace("modelConfiguration", pathToModel + "yolov4-tiny.cfg"); - config.emplace("modelBinary", pathToModel + "yolov4-tiny.weights"); - config.emplace("confidenceThreshold", "0.5"); - config.emplace("inference_precison", "FP32"); - config.emplace("net_type", "YOLOV4_TINY"); - maxBatch = 4; - config.emplace("maxCropRatio", "1"); - break; - - case YOLOModels::YOLOv5: - config.emplace("modelConfiguration", pathToModel + "yolov5x.cfg"); - config.emplace("modelBinary", pathToModel + "yolov5x.weights"); - config.emplace("confidenceThreshold", "0.5"); - config.emplace("inference_precison", "FP32"); - config.emplace("net_type", "YOLOV5"); - maxBatch = 1; - config.emplace("maxCropRatio", "-1"); - break; - } - if (maxBatch < m_batchSize) - maxBatch = m_batchSize; - config.emplace("maxBatch", std::to_string(m_batchSize)); - config.emplace("classNames", pathToModel + "coco.names"); - - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_person)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_car)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bicycle)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_motorbike)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_bus)); - config.emplace("white_list", std::to_string((objtype_t)ObjectTypes::obj_truck)); - - m_detector = std::unique_ptr(CreateDetector(tracking::Detectors::Yolo_TensorRT, config, frame)); - if (m_detector.get()) - { - m_detector->SetMinObjectSize(cv::Size(frame.cols / 40, frame.rows / 40)); - return true; - } - return false; - } - - /// - /// \brief InitTracker - /// \param frame - /// \return - /// - bool InitTracker(cv::UMat frame) - { - if (!m_trackerSettingsLoaded) - { - m_trackerSettings.SetDistance(tracking::DistCenters); + //m_trackerSettings.SetDistance(tracking::DistCenters); m_trackerSettings.m_kalmanType = tracking::KalmanLinear; m_trackerSettings.m_filterGoal = tracking::FilterCenter; - m_trackerSettings.m_lostTrackType = tracking::TrackKCF; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect + m_trackerSettings.m_lostTrackType = tracking::TrackKCF; // Use visual objects tracker for collisions resolving. Used if m_filterGoal == tracking::FilterRect m_trackerSettings.m_matchType = tracking::MatchHungrian; m_trackerSettings.m_dt = 0.3f; // Delta time for Kalman filter m_trackerSettings.m_accelNoiseMag = 0.2f; // Accel noise magnitude for Kalman filter m_trackerSettings.m_distThres = 0.8f; // Distance threshold between region and object on two frames m_trackerSettings.m_minAreaRadiusPix = frame.rows / 20.f; - m_trackerSettings.m_maximumAllowedSkippedFrames = cvRound(2 * m_fps); // Maximum allowed skipped frames - m_trackerSettings.m_maxTraceLength = cvRound(5 * m_fps); // Maximum trace length + m_trackerSettings.m_maximumAllowedLostTime = 2.; // Maximum allowed skipped frames + m_trackerSettings.m_maxTraceLength = 5.; // Maximum trace length } - m_trackerSettings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_bus, false); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_car, ObjectTypes::obj_truck, false); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_bicycle, true); - m_trackerSettings.AddNearTypes(ObjectTypes::obj_person, ObjectTypes::obj_motorbike, true); + //m_trackerSettings.AddNearTypes(TypeConverter::Str2Type("car"), TypeConverter::Str2Type("bus"), false); + //m_trackerSettings.AddNearTypes(TypeConverter::Str2Type("car"), TypeConverter::Str2Type("truck"), false); + //m_trackerSettings.AddNearTypes(TypeConverter::Str2Type("person"), TypeConverter::Str2Type("bicycle"), true); + //m_trackerSettings.AddNearTypes(TypeConverter::Str2Type("person"), TypeConverter::Str2Type("motorbike"), true); - m_tracker = std::make_unique(m_trackerSettings); + m_tracker = BaseTracker::CreateTracker(m_trackerSettings, m_fps); return true; } @@ -948,24 +493,29 @@ class YoloTensorRTExample : public VideoExample /// \param framesCounter /// \param currTime /// - void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) + void DrawData(cv::Mat frame, const std::vector& tracks, int framesCounter, int currTime) override { - if (m_showLogs) - std::cout << "Frame " << framesCounter << ": tracks = " << tracks.size() << ", time = " << currTime << std::endl; + m_logger->info("Frame {0} ({1}): tracks = {2}, time = {3}", framesCounter, m_framesCount, tracks.size(), currTime); + + static float averFps = 0; + if (averFps == 0) + averFps = 1000.f / currTime; + else + averFps = 0.9f * averFps + 0.1f * (1000.f / currTime); + cv::putText(frame, std::to_string(cvRound(averFps)) + " fps", cv::Point(10, 40), cv::FONT_HERSHEY_TRIPLEX, (frame.cols > 1000) ? 1.5 : 1.0, cv::Scalar(255, 0, 255)); for (const auto& track : tracks) { - if (track.IsRobust(2, // Minimal trajectory size - 0.5f, // Minimal ratio raw_trajectory_points / trajectory_lenght - cv::Size2f(0.1f, 8.0f))) // Min and max ratio: width / height + if (track.IsRobust(2, // Minimal trajectory size + 0.5f, // Minimal ratio raw_trajectory_points / trajectory_lenght + cv::Size2f(0.1f, 8.0f), 2)) // Min and max ratio: width / height { - DrawTrack(frame, 1, track, true, framesCounter); - + DrawTrack(frame, track, true, framesCounter); std::stringstream label; label << TypeConverter::Type2Str(track.m_type) << " " << std::setprecision(2) << track.m_velocity << ": " << track.m_confidence; int baseLine = 0; - cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); + cv::Size labelSize = cv::getTextSize(label.str(), cv::FONT_HERSHEY_TRIPLEX, 0.5, 1, &baseLine); cv::Rect brect = track.m_rrect.boundingRect(); if (brect.x < 0) @@ -988,8 +538,8 @@ class YoloTensorRTExample : public VideoExample brect.y = std::max(0, frame.rows - brect.height - 1); brect.height = std::min(brect.height, frame.rows - 1); } - DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); - cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0)); + //DrawFilledRect(frame, cv::Rect(cv::Point(brect.x, brect.y - labelSize.height), cv::Size(labelSize.width, labelSize.height + baseLine)), cv::Scalar(200, 200, 200), 150); + //cv::putText(frame, label.str(), brect.tl(), cv::FONT_HERSHEY_TRIPLEX, 0.5, cv::Scalar(0, 0, 0)); } } @@ -997,4 +547,4 @@ class YoloTensorRTExample : public VideoExample } }; -#endif +#endif // BUILD_ONNX_TENSORRT diff --git a/example/main.cpp b/example/main.cpp index 82e68c869..f266bbd6e 100644 --- a/example/main.cpp +++ b/example/main.cpp @@ -1,99 +1,97 @@ -#include "MouseExample.h" -#include "examples.h" - -#include -#include - -// ---------------------------------------------------------------------- - -static void Help() -{ - printf("\nExamples of the Multitarget tracking algorithm\n" - "Usage: \n" - " ./MultitargetTracker [--example]= [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= [--async]= [--res]= [--settings]= [--batch_size=] \n\n" - "Press:\n" - "\'m\' key for change mode: play|pause. When video is paused you can press any key for get next frame. \n\n" - "Press Esc to exit from video \n\n" - ); -} - -const char* keys = -{ - "{ @1 |../data/atrium.avi | movie file | }" - "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 2 - FaceDetector, 3 - PedestrianDetector, 4 - OpenCV dnn objects detector, 5 - YOLO Darknet detector, 6 - YOLO TensorRT Detector | }" - "{ sf start_frame |0 | Start a video from this position | }" - "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" - "{ ed end_delay |0 | Delay in milliseconds after video ending | }" - "{ o out | | Name of result video file | }" - "{ sl show_logs |1 | Show Trackers logs | }" - "{ g gpu |0 | Use OpenCL acceleration | }" - "{ a async |1 | Use 2 theads for processing pipeline | }" - "{ r res | | Path to the csv file with tracking result | }" - "{ s settings | | Path to the init file with tracking settings | }" - "{ bs batch_size |1 | Batch size - frames count for processing | }" -}; - -// ---------------------------------------------------------------------- - -int main(int argc, char** argv) -{ - Help(); - - cv::CommandLineParser parser(argc, argv, keys); - - bool useOCL = parser.get("gpu") != 0; - cv::ocl::setUseOpenCL(useOCL); - std::cout << (cv::ocl::useOpenCL() ? "OpenCL is enabled" : "OpenCL not used") << std::endl; - - int exampleNum = parser.get("example"); - int asyncPipeline = parser.get("async"); - - std::unique_ptr detector; - - switch (exampleNum) - { - case 0: - MouseTracking(parser); - break; - - case 1: - detector = std::make_unique(parser); - break; - - case 2: - detector = std::make_unique(parser); - break; - - case 3: - detector = std::make_unique(parser); - break; - - case 4: - detector = std::make_unique(parser); - break; - -#ifdef BUILD_YOLO_LIB - case 5: - detector = std::make_unique(parser); - break; -#endif - -#ifdef BUILD_YOLO_TENSORRT - case 6: - detector = std::make_unique(parser); - break; -#endif - - default: - std::cerr << "Wrong example number: " << exampleNum << std::endl; - break; - } - - if (detector.get()) - asyncPipeline ? detector->AsyncProcess() : detector->SyncProcess(); - -#ifndef SILENT_WORK - cv::destroyAllWindows(); -#endif - return 0; -} +#include "MouseExample.h" +#include "examples.h" +#include "MotionDetectorExample.h" + +#ifdef BUILD_CARS_COUNTING +#include "CarsCounting.h" +#endif + +#include +#include + +///---------------------------------------------------------------------- +int main(int argc, char** argv) +{ + const char* keys = + { + "{ @1 |../data/atrium.avi | movie file | }" + "{ e example |1 | number of example 0 - MouseTracking, 1 - MotionDetector, 2 - opencv_dnn detector, 3 - YOLO TensorRT Detector, 4 - Cars counting | }" + "{ sf start_frame |0 | Start a video from this position | }" + "{ ef end_frame |0 | Play a video to this position (if 0 then played to the end of file) | }" + "{ ed end_delay |0 | Delay in milliseconds after video ending | }" + "{ o out | | Name of result video file | }" + "{ show_logs |info | Show Trackers logs: trace, debug, info, warning, error, critical, off | }" + "{ g gpu |0 | Use OpenCL acceleration | }" + "{ a async |1 | Use 2 theads for processing pipeline | }" + "{ r log_res | | Path to the csv file with tracking result | }" + "{ cvat_res | | Path to the xml file in cvat format with tracking result | }" + "{ s settings | | Path to the ini file with tracking settings | }" + "{ bs batch_size |1 | Batch size - frames count for processing | }" + "{ wf write_n_frame |1 | Write logs on each N frame: 1 for writing each frame | }" + "{ hm heat_map |0 | For CarsCounting: Draw heat map | }" + "{ geo_bind |geo_bind.ini | For CarsCounting: ini file with geographical binding | }" + "{ contrast_adjustment |0 | Use contrast adjustment for frames before detection | }" + }; + + cv::CommandLineParser parser(argc, argv, keys); + + std::cout << "\nExamples of the Multitarget tracking algorithm\n" + "Usage: \n" + " ./MultitargetTracker [--example]= [--start_frame]= [--end_frame]= [--end_delay]= [--out]= [--show_logs]= [--async]= [--res]= [--settings]= [--batch_size=] \n\n" + "Press:\n" + "\'m\' key for change mode: play|pause. When video is paused you can press any key for get next frame. \n\n" + "Press Esc to exit from video \n" << std::endl; + + parser.printMessage(); + + bool useOCL = parser.get("gpu") != 0; + cv::ocl::setUseOpenCL(useOCL); + std::cout << (cv::ocl::useOpenCL() ? "OpenCL is enabled" : "OpenCL not used") << std::endl; + + int exampleNum = parser.get("example"); + int asyncPipeline = parser.get("async"); + + std::unique_ptr detector; + + switch (exampleNum) + { + case 0: + MouseTracking(parser); + break; + + case 1: + detector = std::make_unique(parser); + break; + + case 2: + detector = std::make_unique(parser); + break; + +#ifdef BUILD_ONNX_TENSORRT + case 3: + detector = std::make_unique(parser); + break; +#endif + +#ifdef BUILD_CARS_COUNTING + case 4: + { + auto carsCounting = new CarsCounting(parser); + detector = std::unique_ptr(carsCounting); + break; + } +#endif + + default: + std::cerr << "Wrong example number: " << exampleNum << std::endl; + break; + } + + if (detector.get()) + asyncPipeline ? detector->AsyncProcess() : detector->SyncProcess(); + +#ifndef SILENT_WORK + cv::destroyAllWindows(); +#endif + return 0; +} diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..a5308c628 --- /dev/null +++ b/setup.py @@ -0,0 +1,213 @@ +import os, re, sys, shutil, platform, subprocess + +from setuptools import setup, find_packages, Extension +from setuptools.command.build_ext import build_ext +from setuptools.command.install_lib import install_lib +from setuptools.command.install_scripts import install_scripts +from distutils.command.install_data import install_data +from distutils.version import LooseVersion + +PACKAGE_NAME = "pymtracking" + +class CMakeExtension(Extension): + def __init__(self, name, sourcedir=''): + Extension.__init__(self, name, sources=[]) + self.sourcedir = os.path.abspath(sourcedir) + + +class InstallCMakeLibsData(install_data): + """ + Just a wrapper to get the install data into the egg-info + Listing the installed files in the egg-info guarantees that + all of the package files will be uninstalled when the user + uninstalls your package through pip + """ + def run(self): + """ + Outfiles are the libraries that were built using cmake + """ + # There seems to be no other way to do this; I tried listing the + # libraries during the execution of the InstallCMakeLibs.run() but + # setuptools never tracked them, seems like setuptools wants to + # track the libraries through package data more than anything... + # help would be appriciated + self.outfiles = self.distribution.data_files + +__metaclass__ = type +class InstallCMakeLibs(install_lib, object): + """ + Get the libraries from the parent distribution, use those as the outfiles + Skip building anything; everything is already built, forward libraries to + the installation step + """ + def run(self): + """ + Copy libraries from the bin directory and place them as appropriate + """ + self.announce("Moving library files", level=3) + # We have already built the libraries in the previous build_ext step + self.skip_build = True + if hasattr(self.distribution, 'bin_dir'): + bin_dir = self.distribution.bin_dir + else: + bin_dir = os.path.join(self.build_dir, "Release") + if not os.path.exists(bin_dir): + bin_dir = "build/Release" + self.build_dir = "build/Release" + print("bin_dir:", bin_dir, "build_dir:", self.build_dir) + # Depending on the files that are generated from your cmake + # build chain, you may need to change the below code, such that + # your files are moved to the appropriate location when the installation + # is run + libs = [os.path.join(bin_dir, _lib) for _lib in + os.listdir(bin_dir) if + os.path.isfile(os.path.join(bin_dir, _lib)) and + os.path.splitext(_lib)[1] in [".dll", ".so"] + and not (_lib.startswith("python") or _lib.startswith(PACKAGE_NAME))] + for lib in libs: + shutil.move(lib, os.path.join(self.build_dir, + os.path.basename(lib))) + # Mark the libs for installation, adding them to + # distribution.data_files seems to ensure that setuptools' record + # writer appends them to installed-files.txt in the package's egg-info + # + # Also tried adding the libraries to the distribution.libraries list, + # but that never seemed to add them to the installed-files.txt in the + # egg-info, and the online recommendation seems to be adding libraries + # into eager_resources in the call to setup(), which I think puts them + # in data_files anyways. + # + # What is the best way? + # These are the additional installation files that should be + # included in the package, but are resultant of the cmake build + # step; depending on the files that are generated from your cmake + # build chain, you may need to modify the below code + self.distribution.data_files = [os.path.join(self.install_dir, + os.path.basename(lib)) + for lib in libs] + # Must be forced to run after adding the libs to data_files + self.distribution.run_command("install_data") + super(InstallCMakeLibs, self).run() + +__metaclass__ = type +class InstallCMakeScripts(install_scripts, object): + """ + Install the scripts in the build dir + """ + def run(self): + """ + Copy the required directory to the build directory and super().run() + """ + self.announce("Moving scripts files", level=3) + # Scripts were already built in a previous step + self.skip_build = True + bin_dir = self.distribution.bin_dir + scripts_dirs = [os.path.join(bin_dir, _dir) for _dir in + os.listdir(bin_dir) if + os.path.isdir(os.path.join(bin_dir, _dir))] + for scripts_dir in scripts_dirs: + shutil.move(scripts_dir, + os.path.join(self.build_dir, + os.path.basename(scripts_dir))) + # Mark the scripts for installation, adding them to + # distribution.scripts seems to ensure that the setuptools' record + # writer appends them to installed-files.txt in the package's egg-info + self.distribution.scripts = scripts_dirs + super(InstallCMakeScripts, self).run() + +__metaclass__ = type +class BuildCMakeExt(build_ext, object): + """ + Builds using cmake instead of the python setuptools implicit build + """ + def run(self): + """ + Perform build_cmake before doing the 'normal' stuff + """ + for extension in self.extensions: + self.build_cmake(extension) + super(BuildCMakeExt, self).run() + + def build_cmake(self, extension): + """ + The steps required to build the extension + """ + self.announce("Preparing the build environment", level=3) + build_dir = os.path.join(self.build_temp) + extension_path = os.path.abspath(os.path.dirname(self.get_ext_fullpath(extension.name))) + os.makedirs(build_dir) + os.makedirs(extension_path) + python_version = str(sys.version_info[0]) + "." + str(sys.version_info[1]) + + # Now that the necessary directories are created, build + self.announce("Configuring cmake project", level=3) + cmake_args = ['-DPYTHON_EXECUTABLE=' + sys.executable, + '-DUSE_OCV_BGFG=ON', + '-DUSE_OCV_KCF=ON', + '-DSILENT_WORK=ON', + '-DBUILD_EXAMPLES=OFF', + '-DBUILD_ASYNC_DETECTOR=OFF', + '-DBUILD_CARS_COUNTING=OFF', + '-DBUILD_YOLO_LIB=OFF', + '-DBUILD_YOLO_TENSORRT=OFF', + '-DMTRACKER_PYTHON=ON'] + if not os.path.exists(self.build_temp): + os.makedirs(self.build_temp) + self.spawn(['cmake', '-H'+extension.sourcedir, '-B'+self.build_temp]+ cmake_args) + + self.announce("Building binaries", level=3) + self.spawn(["cmake", "--build", self.build_temp, + "--config", "Release", '--', '-j8']) + + # Build finished, now copy the files into the copy directory + # The copy directory is the parent directory of the extension (.pyd) + self.announce("Moving built python module", level=3) + + bin_dir = "build" # self.build_temp + self.distribution.bin_dir = bin_dir + list_bin = os.listdir(bin_dir) + print("bin_dir:", bin_dir, ", extension_path:", extension_path, ", list_bin:", list_bin) + pyd_path = [] + for _pyd in list_bin: + print("_pyd:", _pyd) + if os.path.isfile(os.path.join(bin_dir, _pyd)) and os.path.splitext(_pyd)[0].startswith(PACKAGE_NAME) and os.path.splitext(_pyd)[1] in [".pyd", ".so"]: + pyd_path.append(os.path.join(bin_dir, _pyd)) + print("pyd_path:", pyd_path) + pyd_path = pyd_path[0] + shutil.move(pyd_path, extension_path) + + # After build_ext is run, the following commands will run: + # + # install_lib + # install_scripts + # + # These commands are subclassed above to avoid pitfalls that + # setuptools tries to impose when installing these, as it usually + # wants to build those libs and scripts as well or move them to a + # different place. See comments above for additional information + +with open("README.md", "r") as fh: + long_description = fh.read() + +setup( + name=PACKAGE_NAME, + version='1.0.1', + author='Nuzhny007', + author_email='nuzhny@mail.ru', + url='https://github.com/Smorodov/Multitarget-tracker', + license='Apache 2.0', + description='Official Python wrapper for Multitarget-tracker', + long_description=long_description, + long_description_content_type="text/markdown", + ext_modules=[CMakeExtension(name=PACKAGE_NAME, sourcedir='.')], + cmdclass={ + 'build_ext': BuildCMakeExt, + 'install_data': InstallCMakeLibsData, + 'install_lib': InstallCMakeLibs, + #'install_scripts': InstallCMakeScripts + }, + zip_safe=False, + packages=find_packages(), + keywords=['Multitarget-tracker', 'Multiple Object Tracking', 'Computer Vision', 'Machine Learning'], +) + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index bdb39ce68..5aa83f307 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,2 +1,36 @@ add_subdirectory(Detector) -add_subdirectory(Tracker) \ No newline at end of file +add_subdirectory(Tracker) + +if(MTRACKER_PYTHON) + file(GLOB_RECURSE mtracker_python_src python_bind/*.cpp) + file(GLOB_RECURSE mtracker_python_inc python_bind/*.h) + + include_directories(${CMAKE_SOURCE_DIR}/thirdparty/pybind11/include) + include_directories(${PYTHON_INCLUDE_DIRS}) + include_directories(${NUMPY_INCLUDE_DIR}) + +if (MSVC) + if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") + set(BIT_SYSTEM x32) + else() + set(BIT_SYSTEM x64) + endif() + + include_directories(Detector/darknet/3rdparty/include) + link_directories(Detector/darknet/3rdparty/lib/${BIT_SYSTEM}) + set(LIB_PTHREAD pthreadVC2) +else() + set(LIB_PTHREAD pthread) +endif() + + include_directories(mtracking) + + pybind11_add_module(pymtracking ${mtracker_python_src} ${mtracker_python_inc}) + target_link_libraries(pymtracking PRIVATE mtracking mdetection ${OpenCV_LIBS} ${PYTHON_LIBRARY} pybind11::module) + + # set_target_properties(pymtracking PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}") + + # target_compile_definitions(pymtracking PRIVATE BGS_PYTHON_SUPPORT=1) + + set_property(TARGET pymtracking PROPERTY OUTPUT_NAME "pymtracking") +endif() diff --git a/src/Detector/BackgroundSubtract.cpp b/src/Detector/BackgroundSubtract.cpp index 494ff7f4b..a4fc689cf 100644 --- a/src/Detector/BackgroundSubtract.cpp +++ b/src/Detector/BackgroundSubtract.cpp @@ -150,14 +150,6 @@ bool BackgroundSubtract::Init(const config_t& config) break; #endif - case ALG_SuBSENSE: - m_modelSuBSENSE = std::make_unique(); // default params - break; - - case ALG_LOBSTER: - m_modelSuBSENSE = std::make_unique(); // default params - break; - case ALG_MOG2: { auto params = std::make_tuple(500, 16, 1); @@ -214,21 +206,13 @@ cv::UMat BackgroundSubtract::GetImg(const cv::UMat& image) if (image.channels() == 1) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_GRAY2BGR); -#else cv::cvtColor(image, newImg, cv::COLOR_GRAY2BGR); -#endif return newImg; } else if (image.channels() == 3) { cv::UMat newImg; -#if (CV_VERSION_MAJOR < 4) - cv::cvtColor(image, newImg, CV_BGR2GRAY); -#else cv::cvtColor(image, newImg, cv::COLOR_BGR2GRAY); -#endif return newImg; } } @@ -258,19 +242,6 @@ void BackgroundSubtract::Subtract(const cv::UMat& image, cv::UMat& foreground) break; #endif - case ALG_SuBSENSE: - case ALG_LOBSTER: - if (m_rawForeground.size() != image.size() || m_rawForeground.type() != CV_8UC1) - { - m_modelSuBSENSE->initialize(GetImg(image).getMat(cv::ACCESS_READ), cv::Mat()); - m_rawForeground.create(image.size(), CV_8UC1); - } - else - { - m_modelSuBSENSE->apply(GetImg(image), m_rawForeground); - } - break; - case ALG_MOG2: m_modelOCV->apply(GetImg(image), m_rawForeground); cv::threshold(m_rawForeground, m_rawForeground, 200, 255, cv::THRESH_BINARY); diff --git a/src/Detector/BackgroundSubtract.h b/src/Detector/BackgroundSubtract.h index 944d38f0d..31dfe5d97 100644 --- a/src/Detector/BackgroundSubtract.h +++ b/src/Detector/BackgroundSubtract.h @@ -1,9 +1,7 @@ #pragma once -#include "defines.h" +#include "mtracking/defines.h" #include "vibe_src/vibe.hpp" -#include "Subsense/BackgroundSubtractorSuBSENSE.h" -#include "Subsense/BackgroundSubtractorLOBSTER.h" #ifdef USE_OCV_BGFG #include @@ -21,8 +19,6 @@ class BackgroundSubtract ALG_MOG, ALG_GMG, ALG_CNT, - ALG_SuBSENSE, - ALG_LOBSTER, ALG_MOG2 }; @@ -41,7 +37,6 @@ class BackgroundSubtract private: std::unique_ptr m_modelVibe; cv::Ptr m_modelOCV; - std::unique_ptr m_modelSuBSENSE; cv::UMat m_rawForeground; diff --git a/src/Detector/BaseDetector.cpp b/src/Detector/BaseDetector.cpp index 8a35ff65e..6c197efff 100644 --- a/src/Detector/BaseDetector.cpp +++ b/src/Detector/BaseDetector.cpp @@ -1,14 +1,9 @@ #include "BaseDetector.h" #include "MotionDetector.h" -#include "FaceDetector.h" -#include "PedestrianDetector.h" #include "OCVDNNDetector.h" -#ifdef BUILD_YOLO_LIB -#include "YoloDarknetDetector.h" -#endif -#ifdef BUILD_YOLO_TENSORRT -#include "YoloTensorRTDetector.h" +#ifdef BUILD_ONNX_TENSORRT +#include "ONNXTensorRTDetector.h" #endif /// @@ -17,68 +12,43 @@ /// \param gray /// \return /// -BaseDetector* CreateDetector( - tracking::Detectors detectorType, - const config_t& config, - cv::UMat& frame - ) +std::unique_ptr BaseDetector::CreateDetector(tracking::Detectors detectorType, + const config_t& config, + const cv::UMat& frame) { - BaseDetector* detector = nullptr; + std::unique_ptr detector; switch (detectorType) { case tracking::Motion_VIBE: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_VIBE, frame); + detector = std::make_unique(BackgroundSubtract::BGFG_ALGS::ALG_VIBE, frame); break; case tracking::Motion_MOG: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_MOG, frame); + detector = std::make_unique(BackgroundSubtract::BGFG_ALGS::ALG_MOG, frame); break; case tracking::Motion_GMG: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_GMG, frame); + detector = std::make_unique(BackgroundSubtract::BGFG_ALGS::ALG_GMG, frame); break; case tracking::Motion_CNT: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_CNT, frame); - break; - - case tracking::Motion_SuBSENSE: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_SuBSENSE, frame); - break; - - case tracking::Motion_LOBSTER: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_LOBSTER, frame); + detector = std::make_unique(BackgroundSubtract::BGFG_ALGS::ALG_CNT, frame); break; case tracking::Motion_MOG2: - detector = new MotionDetector(BackgroundSubtract::BGFG_ALGS::ALG_MOG2, frame); - break; - - case tracking::Face_HAAR: - detector = new FaceDetector(frame); - break; - - case tracking::Pedestrian_HOG: - case tracking::Pedestrian_C4: - detector = new PedestrianDetector(frame); + detector = std::make_unique(BackgroundSubtract::BGFG_ALGS::ALG_MOG2, frame); break; +#ifdef USE_OCV_DNN case tracking::DNN_OCV: - detector = new OCVDNNDetector(frame); + detector = std::make_unique(frame); break; - - case tracking::Yolo_Darknet: -#ifdef BUILD_YOLO_LIB - detector = new YoloDarknetDetector(frame); -#else - std::cerr << "Darknet inference engine was not configured in CMake" << std::endl; #endif - break; - case tracking::Yolo_TensorRT: -#ifdef BUILD_YOLO_TENSORRT - detector = new YoloTensorRTDetector(frame); + case tracking::ONNX_TensorRT: +#ifdef BUILD_ONNX_TENSORRT + detector = std::make_unique(frame); #else std::cerr << "TensorRT inference engine was not configured in CMake" << std::endl; #endif @@ -89,9 +59,18 @@ BaseDetector* CreateDetector( } if (!detector->Init(config)) + detector.reset(); + return detector; +} + +/// +std::unique_ptr BaseDetector::CreateDetectorKV(tracking::Detectors detectorType, const KeyVal& config, const cv::Mat& gray) +{ + config_t mconfig; + for (auto kv : config.m_config) { - delete detector; - detector = nullptr; + mconfig.emplace(kv.first, kv.second); } - return detector; + cv::UMat uframe = gray.getUMat(cv::ACCESS_READ); + return CreateDetector(detectorType, mconfig, uframe); } diff --git a/src/Detector/BaseDetector.h b/src/Detector/BaseDetector.h index 9f5914ae9..bf8ae8828 100644 --- a/src/Detector/BaseDetector.h +++ b/src/Detector/BaseDetector.h @@ -1,7 +1,21 @@ #pragma once #include -#include "defines.h" +#include "mtracking/defines.h" + +/// +/// \brief The KeyVal struct +/// +struct KeyVal +{ + KeyVal() = default; + void Add(const std::string& key, const std::string& val) + { + m_config.emplace_back(key, val); + } + + std::vector> m_config; +}; /// /// \brief The BaseDetector class @@ -9,6 +23,15 @@ class BaseDetector { public: + /// + /// \brief BaseDetector + /// \param frame + /// + BaseDetector() + { + m_minObjectSize.width = 5; + m_minObjectSize.height = m_minObjectSize.width; + } /// /// \brief BaseDetector /// \param frame @@ -19,6 +42,15 @@ class BaseDetector m_minObjectSize.height = m_minObjectSize.width; } /// + /// \brief BaseDetector + /// \param frame + /// + BaseDetector(const cv::Mat& frame) + { + m_minObjectSize.width = std::max(5, frame.cols / 100); + m_minObjectSize.height = m_minObjectSize.width; + } + /// /// \brief ~BaseDetector /// virtual ~BaseDetector(void) = default; @@ -34,6 +66,11 @@ class BaseDetector /// \param frame /// virtual void Detect(const cv::UMat& frame) = 0; + virtual void DetectMat(cv::Mat frame) + { + cv::UMat um = frame.getUMat(cv::ACCESS_READ); + return Detect(um); + } /// /// \brief Detect @@ -45,19 +82,55 @@ class BaseDetector for (size_t i = 0; i < frames.size(); ++i) { Detect(frames[i]); - auto res = GetDetects(); + const auto& res = GetDetects(); regions[i].assign(std::begin(res), std::end(res)); } } + /// + /// \brief ResetModel + /// \param img + /// \param roiRect + /// + virtual void ResetModel(const cv::UMat& /*img*/, const cv::Rect& /*roiRect*/) + { + } + + /// + /// \brief ResetIgnoreMask + /// + virtual void ResetIgnoreMask() + { + if (!m_ignoreMask.empty()) + m_ignoreMask = 255; + } + /// - /// \brief ResetModel + /// \brief UpdateIgnoreMask /// \param img /// \param roiRect /// - virtual void ResetModel(const cv::UMat& /*img*/, const cv::Rect& /*roiRect*/) + virtual void UpdateIgnoreMask(const cv::UMat& img, cv::Rect roiRect) { - } + if (m_ignoreMask.empty()) + m_ignoreMask = cv::Mat(img.size(), CV_8UC1, cv::Scalar(255)); + + auto Clamp = [](int& v, int& size, int hi) + { + if (v < 0) + { + size += v; + v = 0; + } + else if (v + size > hi - 1) + { + size = hi - 1 - v; + } + }; + Clamp(roiRect.x, roiRect.width, m_ignoreMask.cols); + Clamp(roiRect.y, roiRect.height, m_ignoreMask.rows); + m_ignoreMask(roiRect) = 0; + } /// /// \brief CanGrayProcessing @@ -94,16 +167,21 @@ class BaseDetector cv::Mat foreground(m_motionMap.size(), CV_8UC1, cv::Scalar(0, 0, 0)); for (const auto& region : m_regions) { -#if (CV_VERSION_MAJOR < 4) - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), CV_FILLED); -#else - cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); -#endif + if (region.m_boxMask.empty()) + { + cv::ellipse(foreground, region.m_rrect, cv::Scalar(255, 255, 255), cv::FILLED); + } + else + { + cv::Rect brect = Clamp(cv::Rect(region.m_brect.x, region.m_brect.y, region.m_boxMask.cols, region.m_boxMask.rows), foreground.size()); + region.m_boxMask.copyTo(foreground(brect)); + } } - + if (!m_ignoreMask.empty()) + cv::bitwise_and(foreground, m_ignoreMask, foreground); cv::normalize(foreground, m_normFor, 255, 0, cv::NORM_MINMAX, m_motionMap.type()); - double alpha = 0.95; + double alpha = 0.9; cv::addWeighted(m_motionMap, alpha, m_normFor, 1 - alpha, 0, m_motionMap); const int chans = frame.channels(); @@ -123,18 +201,29 @@ class BaseDetector ++moPtr; } } +#if 0 + if (!m_ignoreMask.empty()) + cv::imshow("ignoreMask", m_ignoreMask); +#endif } + /// + static std::unique_ptr CreateDetector(tracking::Detectors detectorType, const config_t& config, const cv::UMat& gray); + static std::unique_ptr CreateDetectorKV(tracking::Detectors detectorType, const KeyVal& config, const cv::Mat& gray); + + protected: regions_t m_regions; - cv::Size m_minObjectSize; + cv::Size m_minObjectSize{2, 2}; + + cv::Mat m_ignoreMask; - // Motion map for visualization current detections + // Motion map for visualization current detections cv::Mat m_motionMap; - cv::Mat m_normFor; + cv::Mat m_normFor; - std::set m_classesWhiteList; + std::set m_classesWhiteList; std::vector GetCrops(float maxCropRatio, cv::Size netSize, cv::Size imgSize) const { @@ -205,21 +294,10 @@ class BaseDetector /// objtype_t T2T(size_t typeInd) const { - if (typeInd < m_typesMap.size()) - return m_typesMap[typeInd]; - else - return bad_type; + objtype_t res = (typeInd < m_typesMap.size()) ? m_typesMap[typeInd] : bad_type; + return res; } private: std::vector m_typesMap; }; - - -/// -/// \brief CreateDetector -/// \param detectorType -/// \param gray -/// \return -/// -BaseDetector* CreateDetector(tracking::Detectors detectorType, const config_t& config, cv::UMat& gray); diff --git a/src/Detector/CMakeLists.txt b/src/Detector/CMakeLists.txt index 3ff27b226..ffc367077 100644 --- a/src/Detector/CMakeLists.txt +++ b/src/Detector/CMakeLists.txt @@ -2,45 +2,31 @@ cmake_minimum_required(VERSION 3.9) project(mdetection) - set(detector_sources +set(detector_sources BaseDetector.cpp MotionDetector.cpp BackgroundSubtract.cpp - vibe_src/vibe.cpp - Subsense/BackgroundSubtractorLBSP.cpp - Subsense/BackgroundSubtractorLOBSTER.cpp - Subsense/BackgroundSubtractorSuBSENSE.cpp - Subsense/LBSP.cpp - FaceDetector.cpp - PedestrianDetector.cpp - pedestrians/c4-pedestrian-detector.cpp - OCVDNNDetector.cpp + vibe_src/vibe.cpp) +set(detector_headers BaseDetector.h MotionDetector.h BackgroundSubtract.h - vibe_src/vibe.hpp - Subsense/BackgroundSubtractorLBSP.h - Subsense/BackgroundSubtractorLOBSTER.h - Subsense/BackgroundSubtractorSuBSENSE.h - Subsense/DistanceUtils.h - Subsense/LBSP.h - Subsense/RandUtils.h - FaceDetector.h - PedestrianDetector.h - pedestrians/c4-pedestrian-detector.h - OCVDNNDetector.h -) - -if (BUILD_YOLO_LIB) - set(detector_sources ${detector_sources} YoloDarknetDetector.cpp YoloDarknetDetector.h) + vibe_src/vibe.hpp) + +if (BUILD_ONNX_TENSORRT) + set(detector_sources ${detector_sources} ONNXTensorRTDetector.cpp) + set(detector_headers ${detector_headers} ONNXTensorRTDetector.h) endif() -if (BUILD_YOLO_TENSORRT) - set(detector_sources ${detector_sources} YoloTensorRTDetector.cpp YoloTensorRTDetector.h) +option(USE_OCV_DNN "Use OpenCV DNN module?" ON) +if (USE_OCV_DNN) + set(detector_sources ${detector_sources} OCVDNNDetector.cpp) + set(detector_headers ${detector_headers} OCVDNNDetector.h) + add_definitions(-DUSE_OCV_DNN) endif() - SOURCE_GROUP("Detector" FILES ${detector_sources}) +SOURCE_GROUP("Detector" FILES ${detector_sources} ${detector_headers}) include(CheckIncludeFileCXX) check_include_file_cxx(opencv2/bgsegm.hpp HAVE_OPENCV_CONTRIB) @@ -64,10 +50,10 @@ endif(USE_OCV_BGFG) include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/../src) -include_directories(${PROJECT_SOURCE_DIR}/../common) +include_directories(${PROJECT_SOURCE_DIR}/..) if (CMAKE_COMPILER_IS_GNUCXX) - add_library(${PROJECT_NAME} SHARED + add_library(${PROJECT_NAME} SHARED ${detector_sources}) else(CMAKE_COMPILER_IS_GNUCXX) add_library(${PROJECT_NAME} @@ -75,24 +61,23 @@ else(CMAKE_COMPILER_IS_GNUCXX) endif() if (CMAKE_COMPILER_IS_GNUCXX) -set(LIBS - ${OpenCV_LIBS} -# iconv -) + set(LIBS ${OpenCV_LIBS}) else(CMAKE_COMPILER_IS_GNUCXX) -set(LIBS - ${OpenCV_LIBS} -) + set(LIBS ${OpenCV_LIBS}) endif() -if (BUILD_YOLO_LIB) - set(LIBS ${LIBS} yolo_lib) -endif(BUILD_YOLO_LIB) - -if (BUILD_YOLO_TENSORRT) +if (BUILD_ONNX_TENSORRT) set(LIBS ${LIBS} yolo_rt_lib) -endif(BUILD_YOLO_TENSORRT) +endif(BUILD_ONNX_TENSORRT) + +target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBS}) -target_link_libraries(${PROJECT_NAME} ${LIBS}) +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${detector_headers}") +install(TARGETS ${PROJECT_NAME} + EXPORT MTTrackingExports + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/FaceDetector.cpp b/src/Detector/FaceDetector.cpp deleted file mode 100644 index 63368fe72..000000000 --- a/src/Detector/FaceDetector.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "FaceDetector.h" - -/// -/// \brief FaceDetector::FaceDetector -/// \param gray -/// -FaceDetector::FaceDetector(const cv::UMat& gray) - : BaseDetector(gray) -{ -} - -/// -/// \brief FaceDetector::Init -/// \param cascadeFileName -/// \return -/// -bool FaceDetector::Init(const config_t& config) -{ - auto cascadeFileName = config.find("cascadeFileName"); - if (cascadeFileName != config.end() && - (!m_cascade.load(cascadeFileName->second) || m_cascade.empty())) - { - std::cerr << "Cascade " << cascadeFileName->second << " not opened!" << std::endl; - return false; - } - return true; -} - -/// -/// \brief FaceDetector::Detect -/// \param gray -/// -void FaceDetector::Detect(const cv::UMat& gray) -{ - bool findLargestObject = false; - bool filterRects = true; - std::vector faceRects; - m_cascade.detectMultiScale(gray, - faceRects, - 1.1, - (filterRects || findLargestObject) ? 3 : 0, - findLargestObject ? cv::CASCADE_FIND_BIGGEST_OBJECT : 0, - m_minObjectSize, - cv::Size(gray.cols / 2, gray.rows / 2)); - m_regions.clear(); - for (auto rect : faceRects) - { - m_regions.push_back(rect); - } -} diff --git a/src/Detector/FaceDetector.h b/src/Detector/FaceDetector.h deleted file mode 100644 index efb0d72f8..000000000 --- a/src/Detector/FaceDetector.h +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "BaseDetector.h" - -/// -/// \brief The FaceDetector class -/// -class FaceDetector : public BaseDetector -{ -public: - FaceDetector(const cv::UMat& gray); - ~FaceDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& gray); - - bool CanGrayProcessing() const - { - return true; - } - -private: - cv::CascadeClassifier m_cascade; -}; diff --git a/src/Detector/MotionDetector.cpp b/src/Detector/MotionDetector.cpp index 1654a0cef..cf6f3c26c 100644 --- a/src/Detector/MotionDetector.cpp +++ b/src/Detector/MotionDetector.cpp @@ -5,15 +5,22 @@ /// \param algType /// \param gray /// -MotionDetector::MotionDetector( - BackgroundSubtract::BGFG_ALGS algType, - cv::UMat& gray - ) - : - BaseDetector(gray), - m_algType(algType) +MotionDetector::MotionDetector(BackgroundSubtract::BGFG_ALGS algType, const cv::UMat& gray) + : BaseDetector(gray), m_algType(algType) { - m_fg = gray.clone(); + m_fg.create(gray.size(), CV_8UC1); + m_backgroundSubst = std::make_unique(algType, gray.channels()); +} + +/// +/// \brief MotionDetector::MotionDetector +/// \param algType +/// \param gray +/// +MotionDetector::MotionDetector(BackgroundSubtract::BGFG_ALGS algType, const cv::Mat& gray) + : BaseDetector(gray), m_algType(algType) +{ + m_fg.create(gray.size(), CV_8UC1); m_backgroundSubst = std::make_unique(algType, gray.channels()); } @@ -38,13 +45,12 @@ void MotionDetector::DetectContour() { m_regions.clear(); std::vector> contours; - std::vector hierarchy; -#if (CV_VERSION_MAJOR < 4) - cv::findContours(m_fg, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, cv::Point()); +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(m_fg, contours); #else - cv::findContours(m_fg, contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); + cv::findContours(m_fg, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); #endif - for (size_t i = 0; i < contours.size(); i++) + for (size_t i = 0; i < contours.size(); ++i) { cv::Rect br = cv::boundingRect(contours[i]); @@ -71,8 +77,10 @@ void MotionDetector::DetectContour() void MotionDetector::Detect(const cv::UMat& gray) { m_backgroundSubst->Subtract(gray, m_fg); + if (!m_ignoreMask.empty()) + cv::bitwise_and(m_fg, m_ignoreMask, m_fg); - DetectContour(); + DetectContour(); } /// @@ -117,4 +125,10 @@ void MotionDetector::CalcMotionMap(cv::Mat& frame) ++moPtr; } } + +#if 0 + std::cout << "m_ignoreMask = " << m_ignoreMask.size() << std::endl; + if (!m_ignoreMask.empty()) + cv::imshow("ignoreMask", m_ignoreMask); +#endif } diff --git a/src/Detector/MotionDetector.h b/src/Detector/MotionDetector.h index 5e9535947..f61d4f0f3 100644 --- a/src/Detector/MotionDetector.h +++ b/src/Detector/MotionDetector.h @@ -1,37 +1,38 @@ -#pragma once - -#include "BaseDetector.h" -#include "BackgroundSubtract.h" - -/// -/// \brief The MotionDetector class -/// -class MotionDetector : public BaseDetector -{ -public: - MotionDetector(BackgroundSubtract::BGFG_ALGS algType, cv::UMat& gray); - ~MotionDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& gray); - - bool CanGrayProcessing() const - { - return true; - } - - void CalcMotionMap(cv::Mat& frame); - - void ResetModel(const cv::UMat& img, const cv::Rect& roiRect); - -private: - void DetectContour(); - - std::unique_ptr m_backgroundSubst; - - cv::UMat m_fg; - - BackgroundSubtract::BGFG_ALGS m_algType = BackgroundSubtract::BGFG_ALGS::ALG_MOG2; - bool m_useRotatedRect = false; -}; +#pragma once + +#include "BaseDetector.h" +#include "BackgroundSubtract.h" + +/// +/// \brief The MotionDetector class +/// +class MotionDetector : public BaseDetector +{ +public: + MotionDetector(BackgroundSubtract::BGFG_ALGS algType, const cv::UMat& gray); + MotionDetector(BackgroundSubtract::BGFG_ALGS algType, const cv::Mat& gray); + ~MotionDetector(void) = default; + + bool Init(const config_t& config) override; + + void Detect(const cv::UMat& gray) override; + + bool CanGrayProcessing() const override + { + return true; + } + + void CalcMotionMap(cv::Mat& frame) override; + + void ResetModel(const cv::UMat& img, const cv::Rect& roiRect) override; + +private: + void DetectContour(); + + std::unique_ptr m_backgroundSubst; + + cv::UMat m_fg; + + BackgroundSubtract::BGFG_ALGS m_algType = BackgroundSubtract::BGFG_ALGS::ALG_MOG2; + bool m_useRotatedRect = false; +}; diff --git a/src/Detector/OCVDNNDetector.cpp b/src/Detector/OCVDNNDetector.cpp index 7fdf7e643..8151c6be6 100644 --- a/src/Detector/OCVDNNDetector.cpp +++ b/src/Detector/OCVDNNDetector.cpp @@ -1,312 +1,1227 @@ -#include -#include "OCVDNNDetector.h" -#include "nms.h" - -/// -/// \brief OCVDNNDetector::OCVDNNDetector -/// \param gray -/// -OCVDNNDetector::OCVDNNDetector(const cv::UMat& colorFrame) - : BaseDetector(colorFrame) - -{ - m_classNames = { "background", - "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", - "cow", "diningtable", "dog", "horse", - "motorbike", "person", "pottedplant", - "sheep", "sofa", "train", "tvmonitor" }; -} - -/// -/// \brief OCVDNNDetector::Init -/// \return -/// -bool OCVDNNDetector::Init(const config_t& config) -{ -#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) - std::map dictTargets; - dictTargets[cv::dnn::DNN_TARGET_CPU] = "DNN_TARGET_CPU"; - dictTargets[cv::dnn::DNN_TARGET_OPENCL] = "DNN_TARGET_OPENCL"; - dictTargets[cv::dnn::DNN_TARGET_OPENCL_FP16] = "DNN_TARGET_OPENCL_FP16"; - dictTargets[cv::dnn::DNN_TARGET_MYRIAD] = "DNN_TARGET_MYRIAD"; - dictTargets[cv::dnn::DNN_TARGET_CUDA] = "DNN_TARGET_CUDA"; - dictTargets[cv::dnn::DNN_TARGET_CUDA_FP16] = "DNN_TARGET_CUDA_FP16"; - - std::map dictBackends; - dictBackends[cv::dnn::DNN_BACKEND_DEFAULT] = "DNN_BACKEND_DEFAULT"; - dictBackends[cv::dnn::DNN_BACKEND_HALIDE] = "DNN_BACKEND_HALIDE"; - dictBackends[cv::dnn::DNN_BACKEND_INFERENCE_ENGINE] = "DNN_BACKEND_INFERENCE_ENGINE"; - dictBackends[cv::dnn::DNN_BACKEND_OPENCV] = "DNN_BACKEND_OPENCV"; - dictBackends[cv::dnn::DNN_BACKEND_VKCOM] = "DNN_BACKEND_VKCOM"; - dictBackends[cv::dnn::DNN_BACKEND_CUDA] = "DNN_BACKEND_CUDA"; - dictBackends[1000000] = "DNN_BACKEND_INFERENCE_ENGINE_NGRAPH"; - dictBackends[1000000 + 1] = "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019"; - - std::cout << "Avaible pairs for Target - backend:" << std::endl; - std::vector> pairs = cv::dnn::getAvailableBackends(); - for (auto p : pairs) - { - std::cout << dictBackends[p.first] << " (" << p.first << ") - " << dictTargets[p.second] << " (" << p.second << ")" << std::endl; - } -#endif - - auto modelConfiguration = config.find("modelConfiguration"); - auto modelBinary = config.find("modelBinary"); - if (modelConfiguration != config.end() && modelBinary != config.end()) - m_net = cv::dnn::readNet(modelConfiguration->second, modelBinary->second, ""); - - auto dnnTarget = config.find("dnnTarget"); - if (dnnTarget != config.end()) - { - std::map targets; - targets["DNN_TARGET_CPU"] = cv::dnn::DNN_TARGET_CPU; - targets["DNN_TARGET_OPENCL"] = cv::dnn::DNN_TARGET_OPENCL; -#if (CV_VERSION_MAJOR >= 4) - targets["DNN_TARGET_OPENCL_FP16"] = cv::dnn::DNN_TARGET_OPENCL_FP16; - targets["DNN_TARGET_MYRIAD"] = cv::dnn::DNN_TARGET_MYRIAD; -#endif -#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) - targets["DNN_TARGET_CUDA"] = cv::dnn::DNN_TARGET_CUDA; - targets["DNN_TARGET_CUDA_FP16"] = cv::dnn::DNN_TARGET_CUDA_FP16; -#endif - std::cout << "Trying to set target " << dnnTarget->second << "... "; - auto target = targets.find(dnnTarget->second); - if (target != std::end(targets)) - { - std::cout << "Succeded!" << std::endl; - m_net.setPreferableTarget(target->second); - } - else - { - std::cout << "Failed" << std::endl; - } - } - -#if (CV_VERSION_MAJOR >= 4) - auto dnnBackend = config.find("dnnBackend"); - if (dnnBackend != config.end()) - { - std::map backends; - backends["DNN_BACKEND_DEFAULT"] = cv::dnn::DNN_BACKEND_DEFAULT; - backends["DNN_BACKEND_HALIDE"] = cv::dnn::DNN_BACKEND_HALIDE; - backends["DNN_BACKEND_INFERENCE_ENGINE"] = cv::dnn::DNN_BACKEND_INFERENCE_ENGINE; - backends["DNN_BACKEND_OPENCV"] = cv::dnn::DNN_BACKEND_OPENCV; - backends["DNN_BACKEND_VKCOM"] = cv::dnn::DNN_BACKEND_VKCOM; -#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) - backends["DNN_BACKEND_CUDA"] = cv::dnn::DNN_BACKEND_CUDA; -#endif - std::cout << "Trying to set backend " << dnnBackend->second << "... "; - auto backend = backends.find(dnnBackend->second); - if (backend != std::end(backends)) - { - std::cout << "Succeded!" << std::endl; - m_net.setPreferableBackend(backend->second); - } - else - { - std::cout << "Failed" << std::endl; - } - } -#endif - - auto classNames = config.find("classNames"); - if (classNames != config.end()) - { - std::ifstream classNamesFile(classNames->second); - if (classNamesFile.is_open()) - { - m_classNames.clear(); - std::string className; - for (; std::getline(classNamesFile, className); ) - { - m_classNames.push_back(className); - } - if (!FillTypesMap(m_classNames)) - { - std::cout << "Unknown types in class names!" << std::endl; - assert(0); - } - } - } - - m_classesWhiteList.clear(); - auto whiteRange = config.equal_range("white_list"); - for (auto it = whiteRange.first; it != whiteRange.second; ++it) - { - m_classesWhiteList.insert(std::stoi(it->second)); - } - - auto confidenceThreshold = config.find("confidenceThreshold"); - if (confidenceThreshold != config.end()) - m_confidenceThreshold = std::stof(confidenceThreshold->second); - - auto nmsThreshold = config.find("nmsThreshold"); - if (nmsThreshold != config.end()) - m_nmsThreshold = std::stof(nmsThreshold->second); - - auto swapRB = config.find("swapRB"); - if (swapRB != config.end()) - m_swapRB = std::stoi(swapRB->second) != 0; - - auto maxCropRatio = config.find("maxCropRatio"); - if (maxCropRatio != config.end()) - m_maxCropRatio = std::stof(maxCropRatio->second); - - auto inWidth = config.find("inWidth"); - if (inWidth != config.end()) - m_inWidth = std::stoi(inWidth->second); - - auto inHeight = config.find("inHeight"); - if (inHeight != config.end()) - m_inHeight = std::stoi(inHeight->second); - - if (!m_net.empty()) - { - m_outNames = m_net.getUnconnectedOutLayersNames(); - m_outLayers = m_net.getUnconnectedOutLayers(); - m_outLayerType = m_net.getLayer(m_outLayers[0])->type; - - std::vector outputs; - std::vector internals; - m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals); - std::cout << "getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl; - if (outputs.size() && outputs[0].size() > 3) - { - std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl; - - m_inWidth = outputs[0][2]; - m_inHeight = outputs[0][3]; - } - } - m_WHRatio = static_cast(m_inWidth) / static_cast(m_inHeight); - - return !m_net.empty(); -} - -/// -/// \brief OCVDNNDetector::Detect -/// \param gray -/// -void OCVDNNDetector::Detect(const cv::UMat& colorFrame) -{ - m_regions.clear(); - - if (m_maxCropRatio <= 0) - { - DetectInCrop(colorFrame, cv::Rect(0, 0, colorFrame.cols, colorFrame.rows), m_regions); - } - else - { - std::vector crops = GetCrops(m_maxCropRatio, cv::Size(m_inWidth, m_inHeight), colorFrame.size()); - regions_t tmpRegions; - for (size_t i = 0; i < crops.size(); ++i) - { - const auto& crop = crops[i]; - //std::cout << "Crop " << i << ": " << crop << std::endl; - DetectInCrop(colorFrame, crop, tmpRegions); - } - - if (crops.size() > 1) - { - nms3(tmpRegions, m_regions, m_nmsThreshold, - [](const CRegion& reg) { return reg.m_brect; }, - [](const CRegion& reg) { return reg.m_confidence; }, - [](const CRegion& reg) { return reg.m_type; }, - 0, 0.f); - //std::cout << "nms for " << tmpRegions.size() << " objects - result " << m_regions.size() << std::endl; - } - } -} - -/// -/// \brief OCVDNNDetector::DetectInCrop -/// \param colorFrame -/// \param crop -/// \param tmpRegions -/// -void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) -{ - //Convert Mat to batch of images - cv::dnn::blobFromImage(cv::UMat(colorFrame, crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); - - m_net.setInput(m_inputBlob, "", m_inScaleFactor, m_meanVal); //set the network input - - if (m_net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN - { - //cv::resize(frame, frame, cv::Size(m_inWidth, m_inHeight)); - cv::Mat imInfo = (cv::Mat_(1, 3) << m_inHeight, m_inWidth, 1.6f); - m_net.setInput(imInfo, "im_info"); - } - - std::vector detections; - m_net.forward(detections, m_outNames); //compute output - - if (m_outLayerType == "DetectionOutput") - { - // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is a vector of values - // [batchId, classId, confidence, left, top, right, bottom] - CV_Assert(detections.size() > 0); - for (size_t k = 0; k < detections.size(); ++k) - { - const float* data = reinterpret_cast(detections[k].data); - for (size_t i = 0; i < detections[k].total(); i += 7) - { - float confidence = data[i + 2]; - if (confidence > m_confidenceThreshold) - { - int left = (int)data[i + 3]; - int top = (int)data[i + 4]; - int right = (int)data[i + 5]; - int bottom = (int)data[i + 6]; - int width = right - left + 1; - int height = bottom - top + 1; - if (width <= 2 || height <= 2) - { - left = (int)(data[i + 3] * crop.width); - top = (int)(data[i + 4] * crop.height); - right = (int)(data[i + 5] * crop.width); - bottom = (int)(data[i + 6] * crop.height); - width = right - left + 1; - height = bottom - top + 1; - } - size_t objectClass = (int)(data[i + 1]) - 1; - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(objectClass)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(objectClass), confidence); - } - } - } - } - else if (m_outLayerType == "Region") - { - for (size_t i = 0; i < detections.size(); ++i) - { - // Network produces output blob with a shape NxC where N is a number of detected objects and C is a number of classes + 4 where the first 4 - // numbers are [center_x, center_y, width, height] - const float* data = reinterpret_cast(detections[i].data); - for (int j = 0; j < detections[i].rows; ++j, data += detections[i].cols) - { - cv::Mat scores = detections[i].row(j).colRange(5, detections[i].cols); - cv::Point classIdPoint; - double confidence = 0; - minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); - if (confidence > m_confidenceThreshold) - { - int centerX = (int)(data[0] * crop.width); - int centerY = (int)(data[1] * crop.height); - int width = (int)(data[2] * crop.width); - int height = (int)(data[3] * crop.height); - int left = centerX - width / 2; - int top = centerY - height / 2; - - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classIdPoint.x)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classIdPoint.x), static_cast(confidence)); - } - } - } - } - else - { - CV_Error(cv::Error::StsNotImplemented, "Unknown output layer type: " + m_outLayerType); - } -} +#include +#include "OCVDNNDetector.h" +#include "mtracking/nms.h" + +/// +/// \brief OCVDNNDetector::OCVDNNDetector +/// \param colorFrame +/// +OCVDNNDetector::OCVDNNDetector(const cv::UMat& colorFrame) + : BaseDetector(colorFrame) + +{ + m_classNames = { "background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" }; +} + +/// +/// \brief OCVDNNDetector::OCVDNNDetector +/// \param colorFrame +/// +OCVDNNDetector::OCVDNNDetector(const cv::Mat& colorFrame) + : BaseDetector(colorFrame) + +{ + m_classNames = { "background", + "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor" }; +} + +/// +/// \brief OCVDNNDetector::Init +/// \return +/// +bool OCVDNNDetector::Init(const config_t& config) +{ +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) + std::map dictTargets; + dictTargets[cv::dnn::DNN_TARGET_CPU] = "DNN_TARGET_CPU"; + dictTargets[cv::dnn::DNN_TARGET_OPENCL] = "DNN_TARGET_OPENCL"; + dictTargets[cv::dnn::DNN_TARGET_OPENCL_FP16] = "DNN_TARGET_OPENCL_FP16"; + dictTargets[cv::dnn::DNN_TARGET_MYRIAD] = "DNN_TARGET_MYRIAD"; + dictTargets[cv::dnn::DNN_TARGET_CUDA] = "DNN_TARGET_CUDA"; + dictTargets[cv::dnn::DNN_TARGET_CUDA_FP16] = "DNN_TARGET_CUDA_FP16"; +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 10)) || (CV_VERSION_MAJOR > 4)) + dictTargets[cv::dnn::DNN_TARGET_HDDL] = "DNN_TARGET_HDDL"; + dictTargets[cv::dnn::DNN_TARGET_NPU] = "DNN_TARGET_NPU"; + dictTargets[cv::dnn::DNN_TARGET_CPU_FP16] = "DNN_TARGET_CPU_FP16"; +#endif + + std::map dictBackends; + dictBackends[cv::dnn::DNN_BACKEND_DEFAULT] = "DNN_BACKEND_DEFAULT"; + dictBackends[cv::dnn::DNN_BACKEND_INFERENCE_ENGINE] = "DNN_BACKEND_INFERENCE_ENGINE"; + dictBackends[cv::dnn::DNN_BACKEND_OPENCV] = "DNN_BACKEND_OPENCV"; + dictBackends[cv::dnn::DNN_BACKEND_VKCOM] = "DNN_BACKEND_VKCOM"; + dictBackends[cv::dnn::DNN_BACKEND_CUDA] = "DNN_BACKEND_CUDA"; +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 10)) || (CV_VERSION_MAJOR > 4)) + dictBackends[cv::dnn::DNN_BACKEND_WEBNN] = "DNN_BACKEND_WEBNN"; + dictBackends[cv::dnn::DNN_BACKEND_TIMVX] = "DNN_BACKEND_TIMVX"; + dictBackends[cv::dnn::DNN_BACKEND_CANN] = "DNN_BACKEND_CANN"; +#endif + dictBackends[1000000] = "DNN_BACKEND_INFERENCE_ENGINE_NGRAPH"; + dictBackends[1000000 + 1] = "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019"; + + std::cout << "Avaible pairs for Target - backend:" << std::endl; + std::vector> pairs = cv::dnn::getAvailableBackends(); + for (auto p : pairs) + { + std::cout << dictBackends[p.first] << " (" << p.first << ") - " << dictTargets[p.second] << " (" << p.second << ")" << std::endl; + } +#endif + + auto modelConfiguration = config.find("modelConfiguration"); + auto modelBinary = config.find("modelBinary"); + if (modelConfiguration != config.end() && modelBinary != config.end()) + m_net = cv::dnn::readNet(modelConfiguration->second, modelBinary->second, ""); + + auto dnnTarget = config.find("dnnTarget"); + if (dnnTarget != config.end()) + { + std::map targets; + targets["DNN_TARGET_CPU"] = cv::dnn::DNN_TARGET_CPU; + targets["DNN_TARGET_OPENCL"] = cv::dnn::DNN_TARGET_OPENCL; +#if (CV_VERSION_MAJOR >= 4) + targets["DNN_TARGET_OPENCL_FP16"] = cv::dnn::DNN_TARGET_OPENCL_FP16; + targets["DNN_TARGET_MYRIAD"] = cv::dnn::DNN_TARGET_MYRIAD; +#endif +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) + targets["DNN_TARGET_CUDA"] = cv::dnn::DNN_TARGET_CUDA; + targets["DNN_TARGET_CUDA_FP16"] = cv::dnn::DNN_TARGET_CUDA_FP16; +#endif +#if (CV_VERSION_MAJOR > 4) + targets["DNN_TARGET_HDDL"] = cv::dnn::DNN_TARGET_HDDL; + targets["DNN_TARGET_NPU"] = cv::dnn::DNN_TARGET_NPU; + targets["DNN_TARGET_CPU_FP16"] = cv::dnn::DNN_TARGET_CPU_FP16; +#endif + + std::cout << "Trying to set target " << dnnTarget->second << "... "; + auto target = targets.find(dnnTarget->second); + if (target != std::end(targets)) + { + std::cout << "Succeded!" << std::endl; + m_net.setPreferableTarget(target->second); + } + else + { + std::cout << "Failed" << std::endl; + } + } + +#if (CV_VERSION_MAJOR >= 4) + auto dnnBackend = config.find("dnnBackend"); + if (dnnBackend != config.end()) + { + std::map backends; + backends["DNN_BACKEND_DEFAULT"] = cv::dnn::DNN_BACKEND_DEFAULT; + backends["DNN_BACKEND_INFERENCE_ENGINE"] = cv::dnn::DNN_BACKEND_INFERENCE_ENGINE; + backends["DNN_BACKEND_OPENCV"] = cv::dnn::DNN_BACKEND_OPENCV; + backends["DNN_BACKEND_VKCOM"] = cv::dnn::DNN_BACKEND_VKCOM; +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) + backends["DNN_BACKEND_CUDA"] = cv::dnn::DNN_BACKEND_CUDA; +#endif +#if (CV_VERSION_MAJOR > 4) + backends["DNN_BACKEND_WEBNN"] = cv::dnn::DNN_BACKEND_WEBNN; + backends["DNN_BACKEND_TIMVX"] = cv::dnn::DNN_BACKEND_TIMVX; + backends["DNN_BACKEND_CANN"] = cv::dnn::DNN_BACKEND_CANN; +#endif + + std::cout << "Trying to set backend " << dnnBackend->second << "... "; + auto backend = backends.find(dnnBackend->second); + if (backend != std::end(backends)) + { + std::cout << "Succeded!" << std::endl; + m_net.setPreferableBackend(backend->second); + } + else + { + std::cout << "Failed" << std::endl; + } + } +#endif + + auto net_type = config.find("net_type"); + if (net_type != config.end()) + { + std::map dictNetType; + dictNetType["YOLOV3"] = ModelType::YOLOV3; + dictNetType["YOLOV4"] = ModelType::YOLOV4; + dictNetType["YOLOV4_TINY"] = ModelType::YOLOV4_TINY; + dictNetType["YOLOV5"] = ModelType::YOLOV5; + dictNetType["YOLOV5_OBB"] = ModelType::YOLOV5_OBB; + dictNetType["YOLOV5Mask"] = ModelType::YOLOV5Mask; + dictNetType["YOLOV6"] = ModelType::YOLOV6; + dictNetType["YOLOV7"] = ModelType::YOLOV7; + dictNetType["YOLOV7Mask"] = ModelType::YOLOV7Mask; + dictNetType["YOLOV8"] = ModelType::YOLOV8; + dictNetType["YOLOV8_OBB"] = ModelType::YOLOV8_OBB; + dictNetType["YOLOV8Mask"] = ModelType::YOLOV8Mask; + dictNetType["YOLOV9"] = ModelType::YOLOV9; + dictNetType["YOLOV10"] = ModelType::YOLOV10; + dictNetType["YOLOV11"] = ModelType::YOLOV11; + dictNetType["YOLOV11_OBB"] = ModelType::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = ModelType::YOLOV11Mask; + dictNetType["YOLOV12"] = ModelType::YOLOV12; + dictNetType["RFDETR"] = ModelType::RFDETR; + dictNetType["RFDETR_IS"] = ModelType::RFDETR_IS; + dictNetType["DFINE"] = ModelType::DFINE; + dictNetType["YOLOV13"] = ModelType::YOLOV13; + dictNetType["DFINE_IS"] = ModelType::DFINE_IS; + dictNetType["YOLOV26"] = ModelType::YOLOV26; + dictNetType["YOLOV26_OBB"] = ModelType::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = ModelType::YOLOV26Mask; + + auto netType = dictNetType.find(net_type->second); + if (netType != dictNetType.end()) + m_netType = netType->second; + else + { + assert(netType == dictNetType.end()); + std::cerr << "net_type = " << net_type->second << ", " << (int)m_netType << std::endl; + } + + std::cout << "net_type = " << net_type->second << ", " << (int)m_netType << std::endl; + } + + auto classNames = config.find("classNames"); + if (classNames != config.end()) + { + std::ifstream classNamesFile(classNames->second); + if (classNamesFile.is_open()) + { + m_classNames.clear(); + std::string className; + for (; std::getline(classNamesFile, className); ) + { + className.erase(className.find_last_not_of(" \t\n\r\f\v") + 1); + m_classNames.push_back(className); + } + if (!FillTypesMap(m_classNames)) + { + std::cout << "Unknown types in class names!" << std::endl; + assert(0); + } + } + } + + m_classesWhiteList.clear(); + auto whiteRange = config.equal_range("white_list"); + for (auto it = whiteRange.first; it != whiteRange.second; ++it) + { + m_classesWhiteList.insert(TypeConverter::Str2Type(it->second)); + } + + auto confidenceThreshold = config.find("confidenceThreshold"); + if (confidenceThreshold != config.end()) + m_confidenceThreshold = std::stof(confidenceThreshold->second); + + auto nmsThreshold = config.find("nmsThreshold"); + if (nmsThreshold != config.end()) + m_nmsThreshold = std::stof(nmsThreshold->second); + + auto swapRB = config.find("swapRB"); + if (swapRB != config.end()) + m_swapRB = std::stoi(swapRB->second) != 0; + + auto maxCropRatio = config.find("maxCropRatio"); + if (maxCropRatio != config.end()) + m_maxCropRatio = std::stof(maxCropRatio->second); + + auto inWidth = config.find("inWidth"); + if (inWidth != config.end()) + m_inWidth = std::stoi(inWidth->second); + + auto inHeight = config.find("inHeight"); + if (inHeight != config.end()) + m_inHeight = std::stoi(inHeight->second); + + if (!m_net.empty()) + { + m_outNames = m_net.getUnconnectedOutLayersNames(); + m_outLayers = m_net.getUnconnectedOutLayers(); + assert(!m_outLayers.empty()); + + m_outLayerTypes.clear(); + for (auto it : m_outLayers) + { + m_outLayerTypes.push_back(m_net.getLayer(it)->type); + } + + std::cout << "outNames: "; + for (auto it : m_outNames) + { + std::cout << it << " | "; + } + std::cout << std::endl; + + std::cout << "outLayerType: "; + for (auto it : m_outLayerTypes) + { + std::cout << it << " | "; + } + std::cout << std::endl; + +#if (CV_VERSION_MAJOR < 5) + std::vector outputs; + std::vector internals; + m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals); +#else + std::vector outputs; + std::vector internals; + m_net.getLayerShapes(cv::MatShape(), CV_32F, 0, outputs, internals); +#endif + std::cout << "getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl; + if (outputs.size() && outputs[0].size() > 3) + { + std::cout << "outputs: "; + for (size_t i = 0; i < outputs.size(); ++i) + { +#if (CV_VERSION_MAJOR < 5) + std::cout << i << ": ["; + for (size_t j = 0; j < outputs[i].size(); ++j) + { + std::cout << outputs[i][j] << " "; + } + std::cout << "]"; +#else + std::cout << i << ": " << outputs[i].str(); +#endif + } + std::cout << std::endl; + + std::cout << "internals: "; + for (size_t i = 0; i < internals.size(); ++i) + { +#if (CV_VERSION_MAJOR < 5) + std::cout << i << ": ["; + for (size_t j = 0; j < internals[i].size(); ++j) + { + std::cout << internals[i][j] << " "; + } + std::cout << "]"; +#else + std::cout << i << ": " << internals[i].str(); +#endif + } + std::cout << std::endl; + + if (!m_inWidth || !m_inHeight) + { + m_inWidth = outputs[0][2]; + m_inHeight = outputs[0][3]; + } + } + } + if (!m_inWidth || !m_inHeight) + { + m_inWidth = 608; + m_inHeight = 608; + } + m_WHRatio = static_cast(m_inWidth) / static_cast(m_inHeight); + + std::cout << "input size: " << cv::Size(m_inWidth, m_inHeight) << ", m_WHRatio = " << m_WHRatio << std::endl; + + return !m_net.empty(); +} + +/// +/// \brief OCVDNNDetector::Detect +/// \param gray +/// +void OCVDNNDetector::Detect(const cv::UMat& colorFrame) +{ + m_regions.clear(); + + regions_t tmpRegions; + if (m_maxCropRatio <= 0) + { + DetectInCrop(colorFrame, cv::Rect(0, 0, colorFrame.cols, colorFrame.rows), tmpRegions); + } + else + { + std::vector crops = GetCrops(m_maxCropRatio, cv::Size(m_inWidth, m_inHeight), colorFrame.size()); + for (size_t i = 0; i < crops.size(); ++i) + { + const auto& crop = crops[i]; + //std::cout << "Crop " << i << ": " << crop << std::endl; + DetectInCrop(colorFrame, crop, tmpRegions); + } + } + nms3(tmpRegions, m_regions, m_nmsThreshold, + [](const CRegion& reg) { return reg.m_brect; }, + [](const CRegion& reg) { return reg.m_confidence; }, + [](const CRegion& reg) { return reg.m_type; }, + 0, static_cast(0)); +} + +/// +/// \brief OCVDNNDetector::DetectInCrop +/// \param colorFrame +/// \param crop +/// \param tmpRegions +/// +void OCVDNNDetector::DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) +{ + //Convert Mat to batch of images + cv::dnn::blobFromImage(colorFrame(crop), m_inputBlob, 1.0, cv::Size(m_inWidth, m_inHeight), m_meanVal, m_swapRB, false, CV_8U); + + m_net.setInput(m_inputBlob, "", m_inScaleFactor, m_meanVal); //set the network input + + if (m_net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN + { + //cv::resize(frame, frame, cv::Size(m_inWidth, m_inHeight)); + cv::Mat imInfo = (cv::Mat_(1, 3) << m_inHeight, m_inWidth, 1.6f); + m_net.setInput(imInfo, "im_info"); + } + + std::vector detections; + m_net.forward(detections, m_outNames); //compute output + + switch (m_netType) + { + case ModelType::YOLOV5: + ParseYOLOv5(crop, detections, tmpRegions); + break; + case ModelType::YOLOV8: + ParseYOLOv8(crop, detections, tmpRegions); + break; + case ModelType::YOLOV9: + ParseYOLOv9(crop, detections, tmpRegions); + break; + case ModelType::YOLOV10: + ParseYOLOv10(crop, detections, tmpRegions); + break; + case ModelType::YOLOV11: + ParseYOLOv11(crop, detections, tmpRegions); + break; + case ModelType::YOLOV12: + ParseYOLOv11(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV5_OBB: + case ModelType::YOLOV8_OBB: + case ModelType::YOLOV11_OBB: + ParseYOLOv5_8_11_obb(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV5Mask: + case ModelType::YOLOV8Mask: + case ModelType::YOLOV11Mask: + ParseYOLOv5_8_11_seg(crop, detections, tmpRegions); + break; + + case ModelType::RFDETR: + ParseRFDETR(crop, detections, tmpRegions); + break; + + case ModelType::RFDETR_IS: + ParseRFDETR_IS(crop, detections, tmpRegions); + break; + + case ModelType::DFINE: + ParseDFINE(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV13: + ParseYOLOv11(crop, detections, tmpRegions); + break; + + case ModelType::DFINE_IS: + ParseDFINE_IS(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26: + ParseYOLOv26(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26_OBB: + ParseYOLOv26_obb(crop, detections, tmpRegions); + break; + + case ModelType::YOLOV26Mask: + ParseYOLOv26_seg(crop, detections, tmpRegions); + break; + + default: + ParseOldYOLO(crop, detections, tmpRegions); + break; + } +} + +/// +/// \brief OCVDNNDetector::ParseOldYOLO +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseOldYOLO(const cv::Rect& crop, const std::vector& detections, regions_t& tmpRegions) +{ + if (m_outLayerTypes[0] == "DetectionOutput") + { + // Network produces output blob with a shape 1x1xNx7 where N is a number of detections and an every detection is a vector of values + // [batchId, classId, confidence, left, top, right, bottom] + CV_Assert(detections.size() > 0); + for (size_t k = 0; k < detections.size(); ++k) + { + const float* data = reinterpret_cast(detections[k].data); + for (size_t i = 0; i < detections[k].total(); i += 7) + { + float confidence = data[i + 2]; + if (confidence > m_confidenceThreshold) + { + int left = (int)data[i + 3]; + int top = (int)data[i + 4]; + int right = (int)data[i + 5]; + int bottom = (int)data[i + 6]; + int width = right - left + 1; + int height = bottom - top + 1; + if (width <= 2 || height <= 2) + { + left = cvRound(data[i + 3] * crop.width); + top = cvRound(data[i + 4] * crop.height); + right = cvRound(data[i + 5] * crop.width); + bottom = cvRound(data[i + 6] * crop.height); + width = right - left + 1; + height = bottom - top + 1; + } + size_t objectClass = (int)(data[i + 1]) - 1; + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(objectClass)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(objectClass), confidence); + } + } + } + } + else if (m_outLayerTypes[0] == "Region") + { + for (size_t i = 0; i < detections.size(); ++i) + { + // Network produces output blob with a shape NxC where N is a number of detected objects and C is a number of classes + 4 where the first 4 + // numbers are [center_x, center_y, width, height] + const float* data = reinterpret_cast(detections[i].data); + for (int j = 0; j < detections[i].rows; ++j, data += detections[i].cols) + { + cv::Mat scores = detections[i].row(j).colRange(5, detections[i].cols); + cv::Point classIdPoint; + double confidence = 0; + cv::minMaxLoc(scores, 0, &confidence, 0, &classIdPoint); + if (confidence > m_confidenceThreshold) + { + int centerX = cvRound(data[0] * crop.width); + int centerY = cvRound(data[1] * crop.height); + int width = cvRound(data[2] * crop.width); + int height = cvRound(data[3] * crop.height); + int left = centerX - width / 2; + int top = centerY - height / 2; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classIdPoint.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classIdPoint.x), static_cast(confidence)); + } + } + } + } + else + { + CV_Error(cv::Error::StsNotImplemented, "OCVDNNDetector::ParseOldYOLO: Unknown output layer type: " + m_outLayerTypes[0] + ", net type " + std::to_string((int)m_netType)); + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv5 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv5(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float confidence = data[4]; + + if (confidence >= m_confidenceThreshold) + { + float* classes_scores = data + 5; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(class_id.x), static_cast(maxClassScore)); + } + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv8 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv8(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float* classes_scores = data + 4; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(class_id.x), static_cast(maxClassScore)); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv9 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv9(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float* classes_scores = data + 4; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(class_id.x), static_cast(maxClassScore)); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv10 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv10(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + int left = cvRound(x_factor * data[0]); + int top = cvRound(y_factor * data[1]); + int width = cvRound(x_factor * (data[2] - data[0])); + int height = cvRound(y_factor * (data[3] - data[1])); + float confidence = data[4]; + int classId = cvRound(data[5]); + + if (confidence >= m_confidenceThreshold) + { + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), confidence); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv11 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv11(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float* classes_scores = data + 4; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0]; + float y = data[1]; + float w = data[2]; + float h = data[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(class_id.x), static_cast(maxClassScore)); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv5_8_11_obb +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv5_8_11_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float* classes_scores = data + 4; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0] * x_factor + crop.x; + float y = data[1] * y_factor + crop.y; + float w = data[2] * x_factor; + float h = data[3] * y_factor; + float angle = 180.f * data[4 + scores.cols] / static_cast(M_PI); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(w, h), angle), T2T(class_id.x), static_cast(maxClassScore)); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv5_8_11_seg +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv5_8_11_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensions = detections[0].size[2]; + + // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c]) + // yolov8 has an output of shape (batchSize, 84, 8400) (Num classes + box[x,y,w,h]) + if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8) + { + rows = detections[0].size[2]; + dimensions = detections[0].size[1]; + + detections[0] = detections[0].reshape(1, dimensions); + cv::transpose(detections[0], detections[0]); + } + float* data = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float* classes_scores = data + 4; + + cv::Mat scores(1, static_cast(m_classNames.size()), CV_32FC1, classes_scores); + cv::Point class_id; + double maxClassScore = 0; + cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id); + + if (maxClassScore > m_confidenceThreshold) + { + float x = data[0] * x_factor + crop.x; + float y = data[1] * y_factor + crop.y; + float w = data[2] * x_factor; + float h = data[3] * y_factor; + //float angle = 180.f * data[4 + scores.cols] / M_PI; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(class_id.x)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(w, h), 0), T2T(class_id.x), static_cast(maxClassScore)); + } + data += dimensions; + } +} + +/// +/// \brief OCVDNNDetector::ParseRFDETR +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseRFDETR(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensionsDets = detections[0].size[2]; + int dimensionsLabels = detections[1].size[2]; + + //0: name: input, size : 1x3x560x560 + //1: name: dets, size : 1x300x4 + //2: name: labels, size : 1x300x91 + + float* dets = (float*)detections[0].data; + float* labels = (float*)detections[1].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (int i = 0; i < rows; ++i) + { + float maxClassScore = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < static_cast(dimensionsLabels); ++cli) + { + auto conf = L2Conf(labels[cli]); + if (maxClassScore < conf) + { + maxClassScore = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[0]; + float y = dets[1]; + float w = dets[2]; + float h = dets[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + dets += dimensionsDets; + labels += dimensionsLabels; + } +} + +/// +/// \brief OCVDNNDetector::ParseRFDETR_IS +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + int dimensionsDets = detections[0].size[2]; + int dimensionsLabels = detections[1].size[2]; + + //0: name: input, size : 1x3x560x560 + //1: name: dets, size : 1x300x4 + //2: name: labels, size : 1x300x91 + + float* dets = (float*)detections[0].data; + float* labels = (float*)detections[1].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (int i = 0; i < rows; ++i) + { + float maxClassScore = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < static_cast(dimensionsLabels); ++cli) + { + auto conf = L2Conf(labels[cli]); + if (maxClassScore < conf) + { + maxClassScore = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[0]; + float y = dets[1]; + float w = dets[2]; + float h = dets[3]; + + int left = cvRound((x - 0.5f * w) * x_factor); + int top = cvRound((y - 0.5f * h) * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + dets += dimensionsDets; + labels += dimensionsLabels; + } +} + +/// +/// \brief OCVDNNDetector::ParseDFINE +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size : 1x3x640x640 + //1: name: orig_target_sizes, size : 1x2 + //2: name: labels, size : 1x300 + //3: name: boxes, size : 1x300x4 + //4: name: scores, size : 1x300 + + int64_t* labels = (int64_t*)detections[0].data; + float* dets = (float*)detections[1].data; + float* scores = (float*)detections[2].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + for (int i = 0; i < rows; ++i) + { + float maxClassScore = scores[i]; + size_t classId = labels[i]; + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[4 * i + 0]; + float y = dets[4 * i + 1]; + float w = dets[4 * i + 2] - x; + float h = dets[4 * i + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseDFINE_IS +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + assert(0); +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26 +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 6 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_obb +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 7 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0] * x_factor; + float y = dets[ind + 1] * y_factor; + float w = dets[ind + 2] * x_factor; + float h = dets[ind + 3] * y_factor; + float angle = 180.f * dets[ind + 6] / static_cast(M_PI); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::RotatedRect(cv::Point2f(x + crop.x, y + crop.y), cv::Size2f(w, h), angle), T2T(classId), static_cast(maxClassScore)); + } + } +} + +/// +/// \brief OCVDNNDetector::ParseYOLOv26_seg +/// \param crop +/// \param detections +/// \param tmpRegions +/// +void OCVDNNDetector::ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions) +{ + int rows = detections[0].size[1]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + float* dets = (float*)detections[0].data; + + float x_factor = crop.width / static_cast(m_inWidth); + float y_factor = crop.height / static_cast(m_inHeight); + + //std::cout << "detections: " << rows << std::endl; + + for (int i = 0; i < rows; ++i) + { + auto ind = 38 * i; + + float maxClassScore = dets[ind + 4]; + size_t classId = static_cast(dets[ind + 5]); + + if (maxClassScore > m_confidenceThreshold) + { + float x = dets[ind + 0]; + float y = dets[ind + 1]; + float w = dets[ind + 2] - x; + float h = dets[ind + 3] - y; + + int left = cvRound(x * x_factor); + int top = cvRound(y * y_factor); + + int width = cvRound(w * x_factor); + int height = cvRound(h * y_factor); + + //std::cout << "ind: " << ind << ", score = " << maxClassScore << ", class = " << classId << ", rect = " << cv::Rect(left, top, width, height) << std::endl; + + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(classId)) != std::end(m_classesWhiteList)) + tmpRegions.emplace_back(cv::Rect(left + crop.x, top + crop.y, width, height), T2T(classId), static_cast(maxClassScore)); + } + } +} diff --git a/src/Detector/OCVDNNDetector.h b/src/Detector/OCVDNNDetector.h index 0ceb4bdfa..3a55dd675 100644 --- a/src/Detector/OCVDNNDetector.h +++ b/src/Detector/OCVDNNDetector.h @@ -1,46 +1,102 @@ -#pragma once - -#include "BaseDetector.h" - -#include -#include - -/// -/// \brief The OCVDNNDetector class -/// -class OCVDNNDetector : public BaseDetector -{ -public: - OCVDNNDetector(const cv::UMat& colorFrame); - ~OCVDNNDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& colorFrame); - - bool CanGrayProcessing() const - { - return false; - } - -private: - cv::dnn::Net m_net; - - void DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions); - - int m_inWidth = 608; - int m_inHeight = 608; - - float m_WHRatio = 1.f; - float m_inScaleFactor = 0.003921f; - float m_meanVal = 0.f; - float m_confidenceThreshold = 0.24f; - float m_nmsThreshold = 0.4f; - bool m_swapRB = false; - float m_maxCropRatio = 2.0f; - std::vector m_classNames; - std::vector m_outNames; - std::vector m_outLayers; - std::string m_outLayerType; - cv::UMat m_inputBlob; -}; +#pragma once + +#ifdef USE_OCV_DNN + +#include "BaseDetector.h" + +#include +#include + +/// +/// \brief The OCVDNNDetector class +/// +class OCVDNNDetector final : public BaseDetector +{ +public: + OCVDNNDetector(const cv::UMat& colorFrame); + OCVDNNDetector(const cv::Mat& colorFrame); + ~OCVDNNDetector(void) = default; + + bool Init(const config_t& config) override; + + void Detect(const cv::UMat& colorFrame) override; + + bool CanGrayProcessing() const override + { + return false; + } + +private: + enum class ModelType + { + Unknown, + YOLOV3, + YOLOV3_TINY, + YOLOV4, + YOLOV4_TINY, + YOLOV5, + YOLOV5_OBB, + YOLOV5Mask, + YOLOV6, + YOLOV7, + YOLOV7Mask, + YOLOV8, + YOLOV8_OBB, + YOLOV8Mask, + YOLOV9, + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask, + YOLOV12, + RFDETR, + RFDETR_IS, + DFINE, + YOLOV13, + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask + }; + + cv::dnn::Net m_net; + + void DetectInCrop(const cv::UMat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions); + + int m_inWidth = 608; + int m_inHeight = 608; + + float m_WHRatio = 1.f; + double m_inScaleFactor = 0.003921; // 1 / 255 + //double m_inScaleFactor = 1.0; + cv::Scalar m_meanVal = {0, 0, 0}; + float m_confidenceThreshold = 0.24f; + track_t m_nmsThreshold = static_cast(0.4); + bool m_swapRB = true; + float m_maxCropRatio = 2.0f; + ModelType m_netType = ModelType::Unknown; + std::vector m_classNames; + std::vector m_outNames; + std::vector m_outLayers; + std::vector m_outLayerTypes; + cv::UMat m_inputBlob; + + void ParseOldYOLO(const cv::Rect& crop, const std::vector& detections, regions_t& tmpRegions); + + void ParseYOLOv5(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv8(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv9(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv10(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv11(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv5_8_11_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv5_8_11_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseRFDETR(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseRFDETR_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseDFINE(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseDFINE_IS(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_obb(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); + void ParseYOLOv26_seg(const cv::Rect& crop, std::vector& detections, regions_t& tmpRegions); +}; + +#endif diff --git a/src/Detector/ONNXTensorRTDetector.cpp b/src/Detector/ONNXTensorRTDetector.cpp new file mode 100644 index 000000000..b0a734a77 --- /dev/null +++ b/src/Detector/ONNXTensorRTDetector.cpp @@ -0,0 +1,340 @@ +#include +#include "ONNXTensorRTDetector.h" +#include "mtracking/nms.h" + +/// +/// \brief ONNXTensorRTDetector::ONNXTensorRTDetector +/// \param colorFrame +/// +ONNXTensorRTDetector::ONNXTensorRTDetector(const cv::UMat& colorFrame) + : BaseDetector(colorFrame) +{ + m_localConfig.m_calibrationImageListFileTxt = ""; + m_localConfig.m_inferencePrecision = tensor_rt::FP32; + m_localConfig.m_netType = tensor_rt::YOLOV4; + m_localConfig.m_detectThresh = 0.5f; + m_localConfig.m_gpuInd = 0; +} + +/// +/// \brief ONNXTensorRTDetector::ONNXTensorRTDetector +/// \param colorFrame +/// +ONNXTensorRTDetector::ONNXTensorRTDetector(const cv::Mat& colorFrame) + : BaseDetector(colorFrame) +{ + m_localConfig.m_calibrationImageListFileTxt = ""; + m_localConfig.m_inferencePrecision = tensor_rt::FP32; + m_localConfig.m_netType = tensor_rt::YOLOV4; + m_localConfig.m_detectThresh = 0.5f; + m_localConfig.m_gpuInd = 0; +} + +/// +/// \brief ONNXTensorRTDetector::Init +/// \return +/// +bool ONNXTensorRTDetector::Init(const config_t& config) +{ + //std::cout << "YoloTensorRTDetector::Init" << std::endl; + + m_detector.reset(); + + auto modelConfiguration = config.find("modelConfiguration"); + auto modelBinary = config.find("modelBinary"); + if (modelConfiguration == config.end() || modelBinary == config.end()) + return false; + + auto confidenceThreshold = config.find("confidenceThreshold"); + if (confidenceThreshold != config.end()) + m_localConfig.m_detectThresh = std::stof(confidenceThreshold->second); + + auto gpuId = config.find("gpuId"); + if (gpuId != config.end()) + m_localConfig.m_gpuInd = std::max(0, std::stoi(gpuId->second)); + + auto maxBatch = config.find("maxBatch"); + if (maxBatch != config.end()) + m_batchSize = std::max(1, std::stoi(maxBatch->second)); + m_localConfig.m_batchSize = static_cast(m_batchSize); + + auto videoMemory = config.find("video_memory"); + if (videoMemory != config.end()) + m_localConfig.m_videoMemory = std::max(0, std::stoul(videoMemory->second)); + + m_localConfig.m_fileModelCfg = modelConfiguration->second; + m_localConfig.m_fileModelWeights = modelBinary->second; + + auto inference_precision = config.find("inference_precision"); + if (inference_precision != config.end()) + { + std::map dictPrecision; + dictPrecision["INT8"] = tensor_rt::INT8; + dictPrecision["FP16"] = tensor_rt::FP16; + dictPrecision["FP32"] = tensor_rt::FP32; + dictPrecision["FP8"] = tensor_rt::FP8; + auto precision = dictPrecision.find(inference_precision->second); + if (precision != dictPrecision.end()) + m_localConfig.m_inferencePrecision = precision->second; + } + + auto net_type = config.find("net_type"); + if (net_type != config.end()) + { + std::map dictNetType; + dictNetType["YOLOV3"] = tensor_rt::YOLOV3; + dictNetType["YOLOV4"] = tensor_rt::YOLOV4; + dictNetType["YOLOV4_TINY"] = tensor_rt::YOLOV4_TINY; + dictNetType["YOLOV5"] = tensor_rt::YOLOV5; + dictNetType["YOLOV6"] = tensor_rt::YOLOV6; + dictNetType["YOLOV7"] = tensor_rt::YOLOV7; + dictNetType["YOLOV7Mask"] = tensor_rt::YOLOV7Mask; + dictNetType["YOLOV8"] = tensor_rt::YOLOV8; + dictNetType["YOLOV8_OBB"] = tensor_rt::YOLOV8_OBB; + dictNetType["YOLOV8Mask"] = tensor_rt::YOLOV8Mask; + dictNetType["YOLOV9"] = tensor_rt::YOLOV9; + dictNetType["YOLOV10"] = tensor_rt::YOLOV10; + dictNetType["YOLOV11"] = tensor_rt::YOLOV11; + dictNetType["YOLOV11_OBB"] = tensor_rt::YOLOV11_OBB; + dictNetType["YOLOV11Mask"] = tensor_rt::YOLOV11Mask; + dictNetType["YOLOV12"] = tensor_rt::YOLOV12; + dictNetType["RFDETR"] = tensor_rt::RFDETR; + dictNetType["RFDETR_IS"] = tensor_rt::RFDETR_IS; + dictNetType["DFINE"] = tensor_rt::DFINE; + dictNetType["YOLOV13"] = tensor_rt::YOLOV13; + dictNetType["DFINE_IS"] = tensor_rt::DFINE_IS; + dictNetType["YOLOV26"] = tensor_rt::YOLOV26; + dictNetType["YOLOV26_OBB"] = tensor_rt::YOLOV26_OBB; + dictNetType["YOLOV26Mask"] = tensor_rt::YOLOV26Mask; + + auto netType = dictNetType.find(net_type->second); + if (netType != dictNetType.end()) + m_localConfig.m_netType = netType->second; + else + { + assert(netType == dictNetType.end()); + std::cerr << "net_type = " << net_type->second << ", " << (int)m_localConfig.m_netType << std::endl; + } + } + + auto classNames = config.find("classNames"); + if (classNames != config.end()) + { + std::ifstream classNamesFile(classNames->second); + if (classNamesFile.is_open()) + { + m_classNames.clear(); + std::string className; + for (; std::getline(classNamesFile, className); ) + { + className.erase(className.find_last_not_of(" \t\n\r\f\v") + 1); + m_classNames.push_back(className); + } + if (!FillTypesMap(m_classNames)) + { + std::cout << "Unknown types in class names!" << std::endl; + assert(0); + } + } + else + { + std::cout << "File with class names can not be opened!" << std::endl; + assert(0); + } + } + + m_classesWhiteList.clear(); + auto whiteRange = config.equal_range("white_list"); + for (auto it = whiteRange.first; it != whiteRange.second; ++it) + { + m_classesWhiteList.insert(TypeConverter::Str2Type(it->second)); + } + + auto maxCropRatio = config.find("maxCropRatio"); + if (maxCropRatio != config.end()) + m_maxCropRatio = std::stof(maxCropRatio->second); + + + std::cout << "YoloTensorRTDetector::Init: tensor_rt::Detector" << std::endl; + m_detector = std::make_unique(); + if (m_detector) + m_detector->Init(m_localConfig); + + std::cout << "YoloTensorRTDetector::Init: Detector created = " << (m_detector.get() != nullptr) << std::endl; + + return m_detector.get() != nullptr; +} + +/// +/// \brief ONNXTensorRTDetector::Detect +/// \param gray +/// +void ONNXTensorRTDetector::Detect(const cv::UMat& colorFrame) +{ + m_regions.clear(); + cv::Mat colorMat = colorFrame.getMat(cv::ACCESS_READ); + +#define DRAW_MASK 0 +#if DRAW_MASK + cv::Mat img = colorMat.clone(); + std::vector color; + srand(time(0)); + for (int i = 0; i < m_classNames.size(); i++) + { + int b = rand() % 256; + int g = rand() % 256; + int r = rand() % 256; + color.emplace_back(b, g, r); + } + cv::Mat mask = img.clone(); +#endif + + if (m_maxCropRatio <= 0) + { + std::vector batch = { colorMat }; + std::vector detects; + m_detector->Detect(batch, detects); + for (const tensor_rt::BatchResult& dets : detects) + { + for (const tensor_rt::Result& bbox : dets) + { + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) + { + m_regions.emplace_back(bbox.m_rrect, bbox.m_brect, T2T(bbox.m_id), bbox.m_prob, bbox.m_boxMask); + + //std::cout << "YoloTensorRTDetector::Detect: bbox.m_rrect " << bbox.m_rrect.center << ", " << bbox.m_rrect.angle << ", " << bbox.m_rrect.size << std::endl; + //std::cout << "YoloTensorRTDetector::Detect: m_regions.back().m_rrect " << m_regions.back().m_rrect.center << ", " << m_regions.back().m_rrect.angle << ", " << m_regions.back().m_rrect.size << std::endl; +#if DRAW_MASK + rectangle(img, bbox.m_brect, color[bbox.m_id], 2, 8); + mask(bbox.m_brect).setTo(color[bbox.m_id], bbox.m_boxMask); +#endif + } + } + } +#if DRAW_MASK + cv::addWeighted(img, 0.5, mask, 0.5, 0, img); + cv::imshow("mask", mask); + cv::waitKey(1); +#endif + } + else + { + std::vector crops = GetCrops(m_maxCropRatio, m_detector->GetInputSize(), colorMat.size()); + //std::cout << "Image on " << crops.size() << " crops with size " << crops.front().size() << ", input size " << m_detector->GetInputSize() << ", batch " << m_batchSize << ", frame " << colorMat.size() << std::endl; + regions_t tmpRegions; + std::vector batch; + batch.reserve(m_batchSize); + for (size_t i = 0; i < crops.size(); i += m_batchSize) + { + size_t batchSize = std::min(static_cast(m_batchSize), crops.size() - i); + batch.clear(); + for (size_t j = 0; j < batchSize; ++j) + { + batch.emplace_back(colorMat, crops[i + j]); + } + std::vector detects; + m_detector->Detect(batch, detects); + + for (size_t j = 0; j < batchSize; ++j) + { + const auto& crop = crops[i + j]; + //std::cout << "batch " << (i / batchSize) << ", crop " << (i + j) << ": " << crop << std::endl; + + for (const tensor_rt::Result& bbox : detects[j]) + { + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) + { + cv::RotatedRect newRRect(bbox.m_rrect); + newRRect.center.x += crop.x; + newRRect.center.y += crop.y; + tmpRegions.emplace_back(newRRect, T2T(bbox.m_id), bbox.m_prob); + } + } + } + } + + if (crops.size() > 1) + { + nms3(tmpRegions, m_regions, static_cast(0.4), + [](const CRegion& reg) { return reg.m_brect; }, + [](const CRegion& reg) { return reg.m_confidence; }, + [](const CRegion& reg) { return reg.m_type; }, + 0, static_cast(0)); + //std::cout << "nms for " << tmpRegions.size() << " objects - result " << m_regions.size() << std::endl; + } + } +} + +/// +/// \brief ONNXTensorRTDetector::Detect +/// \param frames +/// \param regions +/// +void ONNXTensorRTDetector::Detect(const std::vector& frames, std::vector& regions) +{ + if (frames.size() == 1) + { + Detect(frames.front()); + regions[0].assign(std::begin(m_regions), std::end(m_regions)); + } + else + { + std::vector batch; + for (const auto& frame : frames) + { + batch.emplace_back(frame.getMat(cv::ACCESS_READ)); + } + + std::vector detects; + m_detector->Detect(batch, detects); + for (size_t i = 0; i < detects.size(); ++i) + { + const tensor_rt::BatchResult& dets = detects[i]; + for (const tensor_rt::Result& bbox : dets) + { + if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.m_id)) != std::end(m_classesWhiteList)) + regions[i].emplace_back(bbox.m_rrect, T2T(bbox.m_id), bbox.m_prob); + } + } + m_regions.assign(std::begin(regions.back()), std::end(regions.back())); + } +} + +/// +/// \brief CalcMotionMap +/// \param frame +/// +void ONNXTensorRTDetector::CalcMotionMap(cv::Mat& frame) +{ + if (m_localConfig.m_netType == tensor_rt::YOLOV7Mask + || m_localConfig.m_netType == tensor_rt::YOLOV8Mask + || m_localConfig.m_netType == tensor_rt::YOLOV11Mask + || m_localConfig.m_netType == tensor_rt::YOLOV26Mask) + { + static std::vector color; + if (color.empty()) + { + srand((unsigned int)time(0)); + for (int i = 0; i < m_classNames.size(); i++) + { + int b = rand() % 256; + int g = rand() % 256; + int r = rand() % 256; + color.emplace_back(b, g, r); + } + } + cv::Mat mask = frame.clone(); + + for (const auto& region : m_regions) + { + //cv::rectangle(frame, region.m_brect, color[region.m_type], 2, 8); + if (!region.m_boxMask.empty()) + mask(region.m_brect).setTo(color[region.m_type], region.m_boxMask); + } + cv::addWeighted(frame, 0.5, mask, 0.5, 0, frame); + } + else + { + BaseDetector::CalcMotionMap(frame); + } +} diff --git a/src/Detector/ONNXTensorRTDetector.h b/src/Detector/ONNXTensorRTDetector.h new file mode 100644 index 000000000..39c5fd711 --- /dev/null +++ b/src/Detector/ONNXTensorRTDetector.h @@ -0,0 +1,36 @@ +#pragma once + +#include "BaseDetector.h" +#include "tensorrt_onnx/class_detector.h" + +/// +/// \brief The ONNXTensorRTDetector class +/// +class ONNXTensorRTDetector final : public BaseDetector +{ +public: + ONNXTensorRTDetector(const cv::UMat& colorFrame); + ONNXTensorRTDetector(const cv::Mat& colorFrame); + ~ONNXTensorRTDetector(void) = default; + + bool Init(const config_t& config) override; + + void Detect(const cv::UMat& colorFrame) override; + void Detect(const std::vector& frames, std::vector& regions) override; + + bool CanGrayProcessing() const override + { + return false; + } + + void CalcMotionMap(cv::Mat& frame); + +private: + std::unique_ptr m_detector; + + float m_maxCropRatio = 3.0f; + std::vector m_classNames; + + tensor_rt::Config m_localConfig; + size_t m_batchSize = 1; +}; diff --git a/src/Detector/PedestrianDetector.cpp b/src/Detector/PedestrianDetector.cpp deleted file mode 100644 index 2728abeea..000000000 --- a/src/Detector/PedestrianDetector.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include "PedestrianDetector.h" -#include "nms.h" - -/// -/// \brief PedestrianDetector::PedestrianDetector -/// \param gray -/// -PedestrianDetector::PedestrianDetector(const cv::UMat& gray) - : - BaseDetector(gray), - m_scannerC4(HUMAN_height, HUMAN_width, HUMAN_xdiv, HUMAN_ydiv, 256, 0.8) -{ -} - -/// -/// \brief PedestrianDetector::Init -/// \param cascadeFileName -/// \return -/// -bool PedestrianDetector::Init(const config_t& config) -{ - auto detectorType = config.find("detectorType"); - - if (detectorType == config.end()) - { - m_detectorType = HOG; - } - else - { - m_detectorType = (detectorType->second == "HOG") ? HOG : C4; - } - - switch (m_detectorType) - { - case HOG: - m_hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); - return true; - - case C4: - { - auto cascadeFileName1 = config.find("cascadeFileName1"); - auto cascadeFileName2 = config.find("cascadeFileName2"); - if (cascadeFileName1 == config.end() || cascadeFileName2 == config.end()) - { - return false; - } - else - { - LoadCascade(cascadeFileName1->second, cascadeFileName2->second, m_scannerC4); - return true; - } - } - - default: - return false; - } - - return false; -} - -/// -/// \brief PedestrianDetector::Detect -/// \param gray -/// -void PedestrianDetector::Detect(const cv::UMat& gray) -{ - std::vector foundRects; - std::vector filteredRects; - - int neighbors = 0; - if (m_detectorType == HOG) - { - m_hog.detectMultiScale(gray, foundRects, 0, cv::Size(8, 8), cv::Size(32, 32), 1.05, 4, false); - } - else - { - IntImage original; - original.Load(gray.getMat(cv::ACCESS_READ)); - - m_scannerC4.FastScan(original, foundRects, 2); - neighbors = 1; - } - - nms(foundRects, filteredRects, 0.3f, neighbors); - - m_regions.clear(); - for (auto rect : filteredRects) - { - rect.x += cvRound(rect.width * 0.1f); - rect.width = cvRound(rect.width * 0.8f); - rect.y += cvRound(rect.height * 0.07f); - rect.height = cvRound(rect.height * 0.8f); - - m_regions.push_back(rect); - } -} diff --git a/src/Detector/PedestrianDetector.h b/src/Detector/PedestrianDetector.h deleted file mode 100644 index f6a392aa2..000000000 --- a/src/Detector/PedestrianDetector.h +++ /dev/null @@ -1,48 +0,0 @@ -#pragma once - -#include "BaseDetector.h" -#include "pedestrians/c4-pedestrian-detector.h" - -/// -/// \brief The PedestrianDetector class -/// -class PedestrianDetector : public BaseDetector -{ -public: - enum DetectorTypes - { - HOG, - C4 - }; - - PedestrianDetector(const cv::UMat& gray); - ~PedestrianDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& gray); - - bool CanGrayProcessing() const - { - return true; - } - -private: - DetectorTypes m_detectorType = HOG; - - /// - /// \brief m_hog - /// HOG detector - /// - cv::HOGDescriptor m_hog; - - /// - /// \brief m_scannerC4 - /// C4 detector - /// - DetectionScanner m_scannerC4; - static const int HUMAN_height = 108; - static const int HUMAN_width = 36; - static const int HUMAN_xdiv = 9; - static const int HUMAN_ydiv = 4; -}; diff --git a/src/Detector/Subsense/BackgroundSubtractorLBSP.cpp b/src/Detector/Subsense/BackgroundSubtractorLBSP.cpp deleted file mode 100644 index 11d3f87f3..000000000 --- a/src/Detector/Subsense/BackgroundSubtractorLBSP.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "BackgroundSubtractorLBSP.h" -#include "DistanceUtils.h" -#include "RandUtils.h" -#include -#include -#include - -// local define used to determine the default median blur kernel size -#define DEFAULT_MEDIAN_BLUR_KERNEL_SIZE (9) - -BackgroundSubtractorLBSP::BackgroundSubtractorLBSP(float fRelLBSPThreshold, size_t nLBSPThresholdOffset) - : m_nImgChannels(0) - ,m_nImgType(0) - ,m_nLBSPThresholdOffset(nLBSPThresholdOffset) - ,m_fRelLBSPThreshold(fRelLBSPThreshold) - ,m_nTotPxCount(0) - ,m_nTotRelevantPxCount(0) - ,m_nFrameIndex(SIZE_MAX) - ,m_nFramesSinceLastReset(0) - ,m_nModelResetCooldown(0) - ,m_aPxIdxLUT(nullptr) - ,m_aPxInfoLUT(nullptr) - ,m_nDefaultMedianBlurKernelSize(DEFAULT_MEDIAN_BLUR_KERNEL_SIZE) - ,m_bInitialized(false) - ,m_bAutoModelResetEnabled(true) - ,m_bUsingMovingCamera(false) - ,nDebugCoordX(0),nDebugCoordY(0) { - CV_Assert(m_fRelLBSPThreshold>=0); -} - -/// -void BackgroundSubtractorLBSP::initialize(const cv::Mat& oInitImg) -{ - this->initialize(oInitImg,cv::Mat()); -} - -//cv::Algorithm* BackgroundSubtractorLBSP::info() const { -// return nullptr; -//} - -cv::Mat BackgroundSubtractorLBSP::getROICopy() const { - return m_oROI.clone(); -} - -void BackgroundSubtractorLBSP::setROI(cv::Mat& oROI) { - LBSP::validateROI(oROI); - CV_Assert(cv::countNonZero(oROI)>0); - if(m_bInitialized) { - cv::Mat oLatestBackgroundImage; - getBackgroundImage(oLatestBackgroundImage); - initialize(oLatestBackgroundImage,oROI); - } - else - m_oROI = oROI.clone(); -} - -void BackgroundSubtractorLBSP::setAutomaticModelReset(bool bVal) { - m_bAutoModelResetEnabled = bVal; -} diff --git a/src/Detector/Subsense/BackgroundSubtractorLBSP.h b/src/Detector/Subsense/BackgroundSubtractorLBSP.h deleted file mode 100644 index 647db5c49..000000000 --- a/src/Detector/Subsense/BackgroundSubtractorLBSP.h +++ /dev/null @@ -1,85 +0,0 @@ -#pragma once - -#include -#include "LBSP.h" - -/*! - Local Binary Similarity Pattern (LBSP)-based change detection algorithm (abstract version/base class). - - For more details on the different parameters, see P.-L. St-Charles and G.-A. Bilodeau, "Improving Background - Subtraction using Local Binary Similarity Patterns", in WACV 2014, or G.-A. Bilodeau et al, "Change Detection - in Feature Space Using Local Binary Similarity Patterns", in CRV 2013. - - This algorithm is currently NOT thread-safe. - */ -class BackgroundSubtractorLBSP : public cv::BackgroundSubtractor - { -public: - //! full constructor - BackgroundSubtractorLBSP(float fRelLBSPThreshold, size_t nLBSPThresholdOffset=0); - //! default destructor - virtual ~BackgroundSubtractorLBSP() = default; - //! (re)initiaization method; needs to be called before starting background subtraction - virtual void initialize(const cv::Mat& oInitImg); - //! (re)initiaization method; needs to be called before starting background subtraction - virtual void initialize(const cv::Mat& oInitImg, const cv::Mat& oROI)=0; - //! primary model update function; the learning param is used to override the internal learning speed (ignored when <= 0) - virtual void operator()(cv::InputArray image, cv::OutputArray fgmask, double learningRate=0)=0; - //! unused, always returns nullptr - //virtual cv::Algorithm* info() const; - //! returns a copy of the ROI used for descriptor extraction - virtual cv::Mat getROICopy() const; - //! sets the ROI to be used for descriptor extraction (note: this function will reinit the model and return the usable ROI) - virtual void setROI(cv::Mat& oROI); - //! turns automatic model reset on or off - void setAutomaticModelReset(bool); - -protected: - struct PxInfoBase { - int nImgCoord_Y; - int nImgCoord_X; - size_t nModelIdx; - }; - //! background model ROI used for LBSP descriptor extraction (specific to the input image size) - cv::Mat m_oROI; - //! input image size - cv::Size m_oImgSize; - //! input image channel size - size_t m_nImgChannels; - //! input image type - int m_nImgType; - //! LBSP internal threshold offset value, used to reduce texture noise in dark regions - const size_t m_nLBSPThresholdOffset; - //! LBSP relative internal threshold (kept here since we don't keep an LBSP object) - const float m_fRelLBSPThreshold; - //! total number of pixels (depends on the input frame size) & total number of relevant pixels - size_t m_nTotPxCount, m_nTotRelevantPxCount; - //! current frame index, frame count since last model reset & model reset cooldown counters - size_t m_nFrameIndex, m_nFramesSinceLastReset, m_nModelResetCooldown; - //! pre-allocated internal LBSP threshold values LUT for all possible 8-bit intensities - size_t m_anLBSPThreshold_8bitLUT[UCHAR_MAX+1]; - //! internal pixel index LUT for all relevant analysis regions (based on the provided ROI) - size_t* m_aPxIdxLUT; - //! internal pixel info LUT for all possible pixel indexes - PxInfoBase* m_aPxInfoLUT; - //! default kernel size for median blur post-proc filtering - const int m_nDefaultMedianBlurKernelSize; - //! specifies whether the algorithm is fully initialized or not - bool m_bInitialized = false; - //! specifies whether automatic model resets are enabled or not - bool m_bAutoModelResetEnabled; - //! specifies whether the camera is considered moving or not - bool m_bUsingMovingCamera; - //! copy of latest pixel intensities (used when refreshing model) - cv::Mat m_oLastColorFrame; - //! copy of latest descriptors (used when refreshing model) - cv::Mat m_oLastDescFrame; - //! the foreground mask generated by the method at [t-1] - cv::Mat m_oLastFGMask; - -public: - // ######## DEBUG PURPOSES ONLY ########## - int nDebugCoordX, nDebugCoordY; - std::string sDebugName; -}; - diff --git a/src/Detector/Subsense/BackgroundSubtractorLOBSTER.cpp b/src/Detector/Subsense/BackgroundSubtractorLOBSTER.cpp deleted file mode 100644 index 933f962fb..000000000 --- a/src/Detector/Subsense/BackgroundSubtractorLOBSTER.cpp +++ /dev/null @@ -1,337 +0,0 @@ -#include "BackgroundSubtractorLOBSTER.h" -#include "DistanceUtils.h" -#include "RandUtils.h" -#include -#include - -BackgroundSubtractorLOBSTER::BackgroundSubtractorLOBSTER(float fRelLBSPThreshold, - size_t nLBSPThresholdOffset, - size_t nDescDistThreshold, - size_t nColorDistThreshold, - size_t nBGSamples, - size_t nRequiredBGSamples) - : BackgroundSubtractorLBSP(fRelLBSPThreshold,nLBSPThresholdOffset) - ,m_nColorDistThreshold(nColorDistThreshold) - ,m_nDescDistThreshold(nDescDistThreshold) - ,m_nBGSamples(nBGSamples) - ,m_nRequiredBGSamples(nRequiredBGSamples) - { - CV_Assert(m_nRequiredBGSamples<=m_nBGSamples); - m_bAutoModelResetEnabled = false; // @@@@@@ not supported here for now -} - -BackgroundSubtractorLOBSTER::~BackgroundSubtractorLOBSTER() { - if(m_aPxIdxLUT) - delete[] m_aPxIdxLUT; - if(m_aPxInfoLUT) - delete[] m_aPxInfoLUT; -} - -void BackgroundSubtractorLOBSTER::initialize(const cv::Mat& oInitImg, const cv::Mat& oROI) { - CV_Assert(!oInitImg.empty() && oInitImg.cols>0 && oInitImg.rows>0); - CV_Assert(oInitImg.isContinuous()); - CV_Assert(oInitImg.type()==CV_8UC1 || oInitImg.type()==CV_8UC3); - if(oInitImg.type()==CV_8UC3) { - std::vector voInitImgChannels; - cv::split(oInitImg,voInitImgChannels); - if(!cv::countNonZero((voInitImgChannels[0]!=voInitImgChannels[1])|(voInitImgChannels[2]!=voInitImgChannels[1]))) - std::cout << std::endl << "\tBackgroundSubtractorLOBSTER : Warning, grayscale images should always be passed in CV_8UC1 format for optimal performance." << std::endl; - } - cv::Mat oNewBGROI; - if(oROI.empty() && (m_oROI.empty() || oROI.size()!=oInitImg.size())) { - oNewBGROI.create(oInitImg.size(),CV_8UC1); - oNewBGROI = cv::Scalar_(UCHAR_MAX); - } - else if(oROI.empty()) - oNewBGROI = m_oROI; - else { - CV_Assert(oROI.size()==oInitImg.size() && oROI.type()==CV_8UC1); - CV_Assert(cv::countNonZero((oROI0))==0); - oNewBGROI = oROI.clone(); - } - LBSP::validateROI(oNewBGROI); - const size_t nROIPxCount = (size_t)cv::countNonZero(oNewBGROI); - CV_Assert(nROIPxCount>0); - m_oROI = oNewBGROI; - m_oImgSize = oInitImg.size(); - m_nImgType = oInitImg.type(); - m_nImgChannels = oInitImg.channels(); - m_nTotPxCount = m_oImgSize.area(); - m_nTotRelevantPxCount = nROIPxCount; - m_nFrameIndex = 0; - m_nFramesSinceLastReset = 0; - m_nModelResetCooldown = 0; - m_oLastFGMask.create(m_oImgSize,CV_8UC1); - m_oLastFGMask = cv::Scalar_(0); - m_oLastColorFrame.create(m_oImgSize,CV_8UC((int)m_nImgChannels)); - m_oLastColorFrame = cv::Scalar_::all(0); - m_oLastDescFrame.create(m_oImgSize,CV_16UC((int)m_nImgChannels)); - m_oLastDescFrame = cv::Scalar_::all(0); - m_voBGColorSamples.resize(m_nBGSamples); - m_voBGDescSamples.resize(m_nBGSamples); - for(size_t s=0; s::all(0); - m_voBGDescSamples[s].create(m_oImgSize,CV_16UC((int)m_nImgChannels)); - m_voBGDescSamples[s] = cv::Scalar_::all(0); - } - if(m_aPxIdxLUT) - delete[] m_aPxIdxLUT; - if(m_aPxInfoLUT) - delete[] m_aPxInfoLUT; - m_aPxIdxLUT = new size_t[m_nTotRelevantPxCount]; - m_aPxInfoLUT = new PxInfoBase[m_nTotPxCount]; - if(m_nImgChannels==1) { - CV_Assert(m_oLastColorFrame.step.p[0]==(size_t)m_oImgSize.width && m_oLastColorFrame.step.p[1]==1); - CV_Assert(m_oLastDescFrame.step.p[0]==m_oLastColorFrame.step.p[0]*2 && m_oLastDescFrame.step.p[1]==m_oLastColorFrame.step.p[1]*2); - for(size_t t=0; t<=UCHAR_MAX; ++t) - m_anLBSPThreshold_8bitLUT[t] = cv::saturate_cast((t*m_fRelLBSPThreshold+m_nLBSPThresholdOffset)/2); - for(size_t nPxIter=0, nModelIter=0; nPxIter(t*m_fRelLBSPThreshold+m_nLBSPThresholdOffset); - for(size_t nPxIter=0, nModelIter=0; nPxIter0.0f && fSamplesRefreshFrac<=1.0f); - const size_t nModelsToRefresh = fSamplesRefreshFrac<1.0f?(size_t)(fSamplesRefreshFrac*m_nBGSamples):m_nBGSamples; - const size_t nRefreshStartPos = fSamplesRefreshFrac<1.0f?rand()%m_nBGSamples:0; - if(m_nImgChannels==1) { - for(size_t nModelIter=0; nModelIter0); - cv::Mat oInputImg = _image.getMat(); - CV_Assert(oInputImg.type()==m_nImgType && oInputImg.size()==m_oImgSize); - CV_Assert(oInputImg.isContinuous()); - _fgmask.create(m_oImgSize,CV_8UC1); - cv::Mat oCurrFGMask = _fgmask.getMat(); - oCurrFGMask = cv::Scalar_(0); - const size_t nLearningRate = (size_t)ceil(learningRate); - if(m_nImgChannels==1) { - for(size_t nModelIter=0; nModelIterm_nColorDistThreshold/2) - goto failedcheck1ch; - LBSP::computeGrayscaleDescriptor(oInputImg,nBGColor,nCurrImgCoord_X,nCurrImgCoord_Y,m_anLBSPThreshold_8bitLUT[nBGColor],nCurrInputDesc); - const size_t nDescDist = hdist(nCurrInputDesc,*((ushort*)(m_voBGDescSamples[nModelIdx].data+nDescIter))); - if(nDescDist>m_nDescDistThreshold) - goto failedcheck1ch; - nGoodSamplesCount++; - } - failedcheck1ch: - nModelIdx++; - } - if(nGoodSamplesCount(nSampleImgCoord_Y,nSampleImgCoord_X); - LBSP::computeGrayscaleDescriptor(oInputImg,nCurrColor,nCurrImgCoord_X,nCurrImgCoord_Y,m_anLBSPThreshold_8bitLUT[nCurrColor],nRandInputDesc); - m_voBGColorSamples[nSampleModelIdx].at(nSampleImgCoord_Y,nSampleImgCoord_X) = nCurrColor; - } - } - } - } - else { //m_nImgChannels==3 - const size_t nCurrDescDistThreshold = m_nDescDistThreshold*3; - const size_t nCurrColorDistThreshold = m_nColorDistThreshold*3; - const size_t nCurrSCDescDistThreshold = nCurrDescDistThreshold/2; - const size_t nCurrSCColorDistThreshold = nCurrColorDistThreshold/2; - const size_t desc_row_step = m_voBGDescSamples[0].step.p[0]; - const size_t img_row_step = m_voBGColorSamples[0].step.p[0]; - for(size_t nModelIter=0; nModelIternCurrSCColorDistThreshold) - goto failedcheck3ch; - LBSP::computeSingleRGBDescriptor(oInputImg,anBGColor[c],nCurrImgCoord_X,nCurrImgCoord_Y,c,m_anLBSPThreshold_8bitLUT[anBGColor[c]],anCurrInputDesc[c]); - const size_t nDescDist = hdist(anCurrInputDesc[c],anBGDesc[c]); - if(nDescDist>nCurrSCDescDistThreshold) - goto failedcheck3ch; - nTotColorDist += nColorDist; - nTotDescDist += nDescDist; - } - if(nTotDescDist<=nCurrDescDistThreshold && nTotColorDist<=nCurrColorDistThreshold) - nGoodSamplesCount++; - failedcheck3ch: - nModelIdx++; - } - if(nGoodSamplesCount 0 (smaller values == faster adaptation) - virtual void operator()(cv::InputArray image, cv::OutputArray fgmask, double learningRate=BGSLOBSTER_DEFAULT_LEARNING_RATE); - //! returns a copy of the latest reconstructed background image - void getBackgroundImage(cv::OutputArray backgroundImage) const; - //! returns a copy of the latest reconstructed background descriptors image - virtual void getBackgroundDescriptorsImage(cv::OutputArray backgroundDescImage) const; - //! compute foreground mask - virtual void apply(cv::InputArray image, cv::OutputArray fgmask, double learningRateOverride=BGSLOBSTER_DEFAULT_LEARNING_RATE); - -protected: - //! absolute color distance threshold - const size_t m_nColorDistThreshold; - //! absolute descriptor distance threshold - const size_t m_nDescDistThreshold; - //! number of different samples per pixel/block to be taken from input frames to build the background model - const size_t m_nBGSamples; - //! number of similar samples needed to consider the current pixel/block as 'background' - const size_t m_nRequiredBGSamples; - //! background model pixel intensity samples - std::vector m_voBGColorSamples; - //! background model descriptors samples - std::vector m_voBGDescSamples; -}; - diff --git a/src/Detector/Subsense/BackgroundSubtractorSuBSENSE.cpp b/src/Detector/Subsense/BackgroundSubtractorSuBSENSE.cpp deleted file mode 100644 index 90a3b5d3c..000000000 --- a/src/Detector/Subsense/BackgroundSubtractorSuBSENSE.cpp +++ /dev/null @@ -1,744 +0,0 @@ -#include "BackgroundSubtractorSuBSENSE.h" -#include "DistanceUtils.h" -#include "RandUtils.h" -#include -#include - -/* - * - * Intrinsic parameters for our method are defined here; tuning these for better - * performance should not be required in most cases -- although improvements in - * very specific scenarios are always possible. - * - */ -//! defines the threshold value(s) used to detect long-term ghosting and trigger the fast edge-based absorption heuristic -#define GHOSTDET_D_MAX (0.010f) // defines 'negligible' change here -#define GHOSTDET_S_MIN (0.995f) // defines the required minimum local foreground saturation value -//! parameter used to scale dynamic distance threshold adjustments ('R(x)') -#define FEEDBACK_R_VAR (0.01f) -//! parameters used to adjust the variation step size of 'v(x)' -#define FEEDBACK_V_INCR (1.000f) -#define FEEDBACK_V_DECR (0.100f) -//! parameters used to scale dynamic learning rate adjustments ('T(x)') -#define FEEDBACK_T_DECR (0.2500f) -#define FEEDBACK_T_INCR (0.5000f) -#define FEEDBACK_T_LOWER (2.0000f) -#define FEEDBACK_T_UPPER (256.00f) -//! parameters used to define 'unstable' regions, based on segm noise/bg dynamics and local dist threshold values -#define UNSTABLE_REG_RATIO_MIN (0.100f) -#define UNSTABLE_REG_RDIST_MIN (3.000f) -//! parameters used to scale the relative LBSP intensity threshold used for internal comparisons -#define LBSPDESC_NONZERO_RATIO_MIN (0.100f) -#define LBSPDESC_NONZERO_RATIO_MAX (0.500f) -//! parameters used to define model reset/learning rate boosts in our frame-level component -#define FRAMELEVEL_MIN_COLOR_DIFF_THRESHOLD (m_nMinColorDistThreshold/2) -#define FRAMELEVEL_ANALYSIS_DOWNSAMPLE_RATIO (8) - -// local define used to display debug information -#define DISPLAY_SUBSENSE_DEBUG_INFO 0 -// local define used to specify the default frame size (320x240 = QVGA) -#define DEFAULT_FRAME_SIZE cv::Size(320,240) -// local define used to specify the color dist threshold offset used for unstable regions -#define STAB_COLOR_DIST_OFFSET (m_nMinColorDistThreshold/5) -// local define used to specify the desc dist threshold offset used for unstable regions -#define UNSTAB_DESC_DIST_OFFSET (m_nDescDistThresholdOffset) - -static const size_t s_nColorMaxDataRange_1ch = UCHAR_MAX; -static const size_t s_nDescMaxDataRange_1ch = LBSP::DESC_SIZE*8; -static const size_t s_nColorMaxDataRange_3ch = s_nColorMaxDataRange_1ch*3; -static const size_t s_nDescMaxDataRange_3ch = s_nDescMaxDataRange_1ch*3; - -BackgroundSubtractorSuBSENSE::BackgroundSubtractorSuBSENSE(float fRelLBSPThreshold, - size_t nDescDistThresholdOffset, - size_t nMinColorDistThreshold, - size_t nBGSamples, - size_t nRequiredBGSamples, - size_t nSamplesForMovingAvgs) - : BackgroundSubtractorLBSP(fRelLBSPThreshold) - ,m_nMinColorDistThreshold(nMinColorDistThreshold) - ,m_nDescDistThresholdOffset(nDescDistThresholdOffset) - ,m_nBGSamples(nBGSamples) - ,m_nRequiredBGSamples(nRequiredBGSamples) - ,m_nSamplesForMovingAvgs(nSamplesForMovingAvgs) - ,m_fLastNonZeroDescRatio(0.0f) - ,m_bLearningRateScalingEnabled(true) - ,m_fCurrLearningRateLowerCap(FEEDBACK_T_LOWER) - ,m_fCurrLearningRateUpperCap(FEEDBACK_T_UPPER) - ,m_nMedianBlurKernelSize(m_nDefaultMedianBlurKernelSize) - ,m_bUse3x3Spread(true) - ,m_defaultMorphologyKernel(cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3))) { - CV_Assert(m_nBGSamples>0 && m_nRequiredBGSamples<=m_nBGSamples); - CV_Assert(m_nMinColorDistThreshold>=STAB_COLOR_DIST_OFFSET); -} - -BackgroundSubtractorSuBSENSE::~BackgroundSubtractorSuBSENSE() { - if(m_aPxIdxLUT) - delete[] m_aPxIdxLUT; - if(m_aPxInfoLUT) - delete[] m_aPxInfoLUT; -} - -void BackgroundSubtractorSuBSENSE::initialize(const cv::Mat& oInitImg, const cv::Mat& oROI) { - // == init - CV_Assert(!oInitImg.empty() && oInitImg.cols>0 && oInitImg.rows>0); - CV_Assert(oInitImg.isContinuous()); - CV_Assert(oInitImg.type()==CV_8UC3 || oInitImg.type()==CV_8UC1); - if(oInitImg.type()==CV_8UC3) { - std::vector voInitImgChannels; - cv::split(oInitImg,voInitImgChannels); - if(!cv::countNonZero((voInitImgChannels[0]!=voInitImgChannels[1])|(voInitImgChannels[2]!=voInitImgChannels[1]))) - std::cout << std::endl << "\tBackgroundSubtractorSuBSENSE : Warning, grayscale images should always be passed in CV_8UC1 format for optimal performance." << std::endl; - } - cv::Mat oNewBGROI; - if(oROI.empty() && (m_oROI.empty() || oROI.size()!=oInitImg.size())) { - oNewBGROI.create(oInitImg.size(),CV_8UC1); - oNewBGROI = cv::Scalar_(UCHAR_MAX); - } - else if(oROI.empty()) - oNewBGROI = m_oROI; - else { - CV_Assert(oROI.size()==oInitImg.size() && oROI.type()==CV_8UC1); - CV_Assert(cv::countNonZero((oROI0))==0); - oNewBGROI = oROI.clone(); - cv::Mat oTempROI; - cv::dilate(oNewBGROI,oTempROI,m_defaultMorphologyKernel,cv::Point(-1,-1),LBSP::PATCH_SIZE/2); - cv::bitwise_or(oNewBGROI,oTempROI/2,oNewBGROI); - } - const size_t nOrigROIPxCount = (size_t)cv::countNonZero(oNewBGROI); - CV_Assert(nOrigROIPxCount>0); - LBSP::validateROI(oNewBGROI); - const size_t nFinalROIPxCount = (size_t)cv::countNonZero(oNewBGROI); - CV_Assert(nFinalROIPxCount>0); - m_oROI = oNewBGROI; - m_oImgSize = oInitImg.size(); - m_nImgType = oInitImg.type(); - m_nImgChannels = oInitImg.channels(); - m_nTotPxCount = m_oImgSize.area(); - m_nTotRelevantPxCount = nFinalROIPxCount; - m_nFrameIndex = 0; - m_nFramesSinceLastReset = 0; - m_nModelResetCooldown = 0; - m_fLastNonZeroDescRatio = 0.0f; - const int nTotImgPixels = m_oImgSize.height*m_oImgSize.width; - if(nOrigROIPxCount>=m_nTotPxCount/2 && (int)m_nTotPxCount>=DEFAULT_FRAME_SIZE.area()) { - m_bLearningRateScalingEnabled = true; - m_bAutoModelResetEnabled = true; - m_bUse3x3Spread = !(nTotImgPixels>DEFAULT_FRAME_SIZE.area()*2); - const int nRawMedianBlurKernelSize = std::min((int)floor((float)nTotImgPixels/DEFAULT_FRAME_SIZE.area()+0.5f)+m_nDefaultMedianBlurKernelSize,14); - m_nMedianBlurKernelSize = (nRawMedianBlurKernelSize%2)?nRawMedianBlurKernelSize:nRawMedianBlurKernelSize-1; - m_fCurrLearningRateLowerCap = FEEDBACK_T_LOWER; - m_fCurrLearningRateUpperCap = FEEDBACK_T_UPPER; - } - else { - m_bLearningRateScalingEnabled = false; - m_bAutoModelResetEnabled = false; - m_bUse3x3Spread = true; - m_nMedianBlurKernelSize = m_nDefaultMedianBlurKernelSize; - m_fCurrLearningRateLowerCap = FEEDBACK_T_LOWER*2; - m_fCurrLearningRateUpperCap = FEEDBACK_T_UPPER*2; - } - m_oUpdateRateFrame.create(m_oImgSize,CV_32FC1); - m_oUpdateRateFrame = cv::Scalar(m_fCurrLearningRateLowerCap); - m_oDistThresholdFrame.create(m_oImgSize,CV_32FC1); - m_oDistThresholdFrame = cv::Scalar(1.0f); - m_oVariationModulatorFrame.create(m_oImgSize,CV_32FC1); - m_oVariationModulatorFrame = cv::Scalar(10.0f); // should always be >= FEEDBACK_V_DECR - m_oMeanLastDistFrame.create(m_oImgSize,CV_32FC1); - m_oMeanLastDistFrame = cv::Scalar(0.0f); - m_oMeanMinDistFrame_LT.create(m_oImgSize,CV_32FC1); - m_oMeanMinDistFrame_LT = cv::Scalar(0.0f); - m_oMeanMinDistFrame_ST.create(m_oImgSize,CV_32FC1); - m_oMeanMinDistFrame_ST = cv::Scalar(0.0f); - m_oDownSampledFrameSize = cv::Size(m_oImgSize.width/FRAMELEVEL_ANALYSIS_DOWNSAMPLE_RATIO,m_oImgSize.height/FRAMELEVEL_ANALYSIS_DOWNSAMPLE_RATIO); - m_oMeanDownSampledLastDistFrame_LT.create(m_oDownSampledFrameSize,CV_32FC((int)m_nImgChannels)); - m_oMeanDownSampledLastDistFrame_LT = cv::Scalar(0.0f); - m_oMeanDownSampledLastDistFrame_ST.create(m_oDownSampledFrameSize,CV_32FC((int)m_nImgChannels)); - m_oMeanDownSampledLastDistFrame_ST = cv::Scalar(0.0f); - m_oMeanRawSegmResFrame_LT.create(m_oImgSize,CV_32FC1); - m_oMeanRawSegmResFrame_LT = cv::Scalar(0.0f); - m_oMeanRawSegmResFrame_ST.create(m_oImgSize,CV_32FC1); - m_oMeanRawSegmResFrame_ST = cv::Scalar(0.0f); - m_oMeanFinalSegmResFrame_LT.create(m_oImgSize,CV_32FC1); - m_oMeanFinalSegmResFrame_LT = cv::Scalar(0.0f); - m_oMeanFinalSegmResFrame_ST.create(m_oImgSize,CV_32FC1); - m_oMeanFinalSegmResFrame_ST = cv::Scalar(0.0f); - m_oUnstableRegionMask.create(m_oImgSize,CV_8UC1); - m_oUnstableRegionMask = cv::Scalar_(0); - m_oBlinksFrame.create(m_oImgSize,CV_8UC1); - m_oBlinksFrame = cv::Scalar_(0); - m_oDownSampledFrame_MotionAnalysis.create(m_oDownSampledFrameSize,CV_8UC((int)m_nImgChannels)); - m_oDownSampledFrame_MotionAnalysis = cv::Scalar_::all(0); - m_oLastColorFrame.create(m_oImgSize,CV_8UC((int)m_nImgChannels)); - m_oLastColorFrame = cv::Scalar_::all(0); - m_oLastDescFrame.create(m_oImgSize,CV_16UC((int)m_nImgChannels)); - m_oLastDescFrame = cv::Scalar_::all(0); - m_oLastRawFGMask.create(m_oImgSize,CV_8UC1); - m_oLastRawFGMask = cv::Scalar_(0); - m_oLastFGMask.create(m_oImgSize,CV_8UC1); - m_oLastFGMask = cv::Scalar_(0); - m_oLastFGMask_dilated.create(m_oImgSize,CV_8UC1); - m_oLastFGMask_dilated = cv::Scalar_(0); - m_oLastFGMask_dilated_inverted.create(m_oImgSize,CV_8UC1); - m_oLastFGMask_dilated_inverted = cv::Scalar_(0); - m_oFGMask_FloodedHoles.create(m_oImgSize,CV_8UC1); - m_oFGMask_FloodedHoles = cv::Scalar_(0); - m_oFGMask_PreFlood.create(m_oImgSize,CV_8UC1); - m_oFGMask_PreFlood = cv::Scalar_(0); - m_oCurrRawFGBlinkMask.create(m_oImgSize,CV_8UC1); - m_oCurrRawFGBlinkMask = cv::Scalar_(0); - m_oLastRawFGBlinkMask.create(m_oImgSize,CV_8UC1); - m_oLastRawFGBlinkMask = cv::Scalar_(0); - m_voBGColorSamples.resize(m_nBGSamples); - m_voBGDescSamples.resize(m_nBGSamples); - for(size_t s=0; s::all(0); - m_voBGDescSamples[s].create(m_oImgSize,CV_16UC((int)m_nImgChannels)); - m_voBGDescSamples[s] = cv::Scalar_::all(0); - } - if(m_aPxIdxLUT) - delete[] m_aPxIdxLUT; - if(m_aPxInfoLUT) - delete[] m_aPxInfoLUT; - m_aPxIdxLUT = new size_t[m_nTotRelevantPxCount]; - m_aPxInfoLUT = new PxInfoBase[m_nTotPxCount]; - if(m_nImgChannels==1) { - CV_Assert(m_oLastColorFrame.step.p[0]==(size_t)m_oImgSize.width && m_oLastColorFrame.step.p[1]==1); - CV_Assert(m_oLastDescFrame.step.p[0]==m_oLastColorFrame.step.p[0]*2 && m_oLastDescFrame.step.p[1]==m_oLastColorFrame.step.p[1]*2); - for(size_t t=0; t<=UCHAR_MAX; ++t) - m_anLBSPThreshold_8bitLUT[t] = cv::saturate_cast((m_nLBSPThresholdOffset+t*m_fRelLBSPThreshold)/3); - for(size_t nPxIter=0, nModelIter=0; nPxIter(m_nLBSPThresholdOffset+t*m_fRelLBSPThreshold); - for(size_t nPxIter=0, nModelIter=0; nPxIter0.0f && fSamplesRefreshFrac<=1.0f); - const size_t nModelsToRefresh = fSamplesRefreshFrac<1.0f?(size_t)(fSamplesRefreshFrac*m_nBGSamples):m_nBGSamples; - const size_t nRefreshStartPos = fSamplesRefreshFrac<1.0f?rand()%m_nBGSamples:0; - if(m_nImgChannels==1) { - for(size_t nModelIter=0; nModelIter(oCurrFGMask.cols) * static_cast(oCurrFGMask.rows)); - size_t nNonZeroDescCount = 0; - const float fRollAvgFactor_LT = 1.0f/std::min(++m_nFrameIndex,m_nSamplesForMovingAvgs); - const float fRollAvgFactor_ST = 1.0f/std::min(m_nFrameIndex,m_nSamplesForMovingAvgs/4); - if(m_nImgChannels==1) { - for(size_t nModelIter=0; nModelIterUNSTABLE_REG_RDIST_MIN || (*pfCurrMeanRawSegmRes_LT-*pfCurrMeanFinalSegmRes_LT)>UNSTABLE_REG_RATIO_MIN || (*pfCurrMeanRawSegmRes_ST-*pfCurrMeanFinalSegmRes_ST)>UNSTABLE_REG_RATIO_MIN)?1:0; - size_t nGoodSamplesCount=0, nSampleIdx=0; - while(nGoodSamplesCountnCurrColorDistThreshold) - goto failedcheck1ch; - const ushort& nBGIntraDesc = *((ushort*)(m_voBGDescSamples[nSampleIdx].data+nDescIter)); - const size_t nIntraDescDist = hdist(nCurrIntraDesc,nBGIntraDesc); - LBSP::computeGrayscaleDescriptor(oInputImg,nBGColor,nCurrImgCoord_X,nCurrImgCoord_Y,m_anLBSPThreshold_8bitLUT[nBGColor],nCurrInterDesc); - const size_t nInterDescDist = hdist(nCurrInterDesc,nBGIntraDesc); - const size_t nDescDist = (nIntraDescDist+nInterDescDist)/2; - if(nDescDist>nCurrDescDistThreshold) - goto failedcheck1ch; - const size_t nSumDist = std::min((nDescDist/4)*(s_nColorMaxDataRange_1ch/s_nDescMaxDataRange_1ch)+nColorDist,s_nColorMaxDataRange_1ch); - if(nSumDist>nCurrColorDistThreshold) - goto failedcheck1ch; - if(nMinDescDist>nDescDist) - nMinDescDist = nDescDist; - if(nMinSumDist>nSumDist) - nMinSumDist = nSumDist; - nGoodSamplesCount++; - } - failedcheck1ch: - nSampleIdx++; - } - const float fNormalizedLastDist = ((float)L1dist(nLastColor,nCurrColor)/s_nColorMaxDataRange_1ch+(float)hdist(nLastIntraDesc,nCurrIntraDesc)/s_nDescMaxDataRange_1ch)/2; - *pfCurrMeanLastDist = (*pfCurrMeanLastDist)*(1.0f-fRollAvgFactor_ST) + fNormalizedLastDist*fRollAvgFactor_ST; - if(nGoodSamplesCount0?(size_t)ceil(learningRateOverride):(size_t)ceil(*pfCurrLearningRate); - if((rand()%nLearningRate)==0) { - const size_t s_rand = rand()%m_nBGSamples; - *((ushort*)(m_voBGDescSamples[s_rand].data+nDescIter)) = nCurrIntraDesc; - m_voBGColorSamples[s_rand].data[nPxIter] = nCurrColor; - } - int nSampleImgCoord_Y, nSampleImgCoord_X; - const bool bCurrUsing3x3Spread = m_bUse3x3Spread && !m_oUnstableRegionMask.data[nPxIter]; - if(bCurrUsing3x3Spread) - getRandNeighborPosition_3x3(nSampleImgCoord_X,nSampleImgCoord_Y,nCurrImgCoord_X,nCurrImgCoord_Y,LBSP::PATCH_SIZE/2,m_oImgSize); - else - getRandNeighborPosition_5x5(nSampleImgCoord_X,nSampleImgCoord_Y,nCurrImgCoord_X,nCurrImgCoord_Y,LBSP::PATCH_SIZE/2,m_oImgSize); - const size_t n_rand = rand(); - const size_t idx_rand_uchar = m_oImgSize.width*nSampleImgCoord_Y + nSampleImgCoord_X; - const size_t idx_rand_flt32 = idx_rand_uchar*4; - const float fRandMeanLastDist = *((float*)(m_oMeanLastDistFrame.data+idx_rand_flt32)); - const float fRandMeanRawSegmRes = *((float*)(m_oMeanRawSegmResFrame_ST.data+idx_rand_flt32)); - if((n_rand%(bCurrUsing3x3Spread?nLearningRate:(nLearningRate/2+1)))==0 - || (fRandMeanRawSegmRes>GHOSTDET_S_MIN && fRandMeanLastDistm_fCurrLearningRateLowerCap) - *pfCurrLearningRate -= FEEDBACK_T_DECR*(*pfCurrVariationFactor)/std::max(*pfCurrMeanMinDist_LT,*pfCurrMeanMinDist_ST); - if((*pfCurrLearningRate)m_fCurrLearningRateUpperCap) - *pfCurrLearningRate = m_fCurrLearningRateUpperCap; - if(std::max(*pfCurrMeanMinDist_LT,*pfCurrMeanMinDist_ST)>UNSTABLE_REG_RATIO_MIN && m_oBlinksFrame.data[nPxIter]) - (*pfCurrVariationFactor) += FEEDBACK_V_INCR; - else if((*pfCurrVariationFactor)>FEEDBACK_V_DECR) { - (*pfCurrVariationFactor) -= m_oLastFGMask.data[nPxIter]?FEEDBACK_V_DECR/4:m_oUnstableRegionMask.data[nPxIter]?FEEDBACK_V_DECR/2:FEEDBACK_V_DECR; - if((*pfCurrVariationFactor)=2) - ++nNonZeroDescCount; - nLastIntraDesc = nCurrIntraDesc; - nLastColor = nCurrColor; - } - } - else { //m_nImgChannels==3 - for(size_t nModelIter=0; nModelIterUNSTABLE_REG_RDIST_MIN || (*pfCurrMeanRawSegmRes_LT-*pfCurrMeanFinalSegmRes_LT)>UNSTABLE_REG_RATIO_MIN || (*pfCurrMeanRawSegmRes_ST-*pfCurrMeanFinalSegmRes_ST)>UNSTABLE_REG_RATIO_MIN)?1:0; - size_t nGoodSamplesCount=0, nSampleIdx=0; - while(nGoodSamplesCountnCurrSCColorDistThreshold) - goto failedcheck3ch; - const size_t nIntraDescDist = hdist(anCurrIntraDesc[c],anBGIntraDesc[c]); - LBSP::computeSingleRGBDescriptor(oInputImg,anBGColor[c],nCurrImgCoord_X,nCurrImgCoord_Y,c,m_anLBSPThreshold_8bitLUT[anBGColor[c]],anCurrInterDesc[c]); - const size_t nInterDescDist = hdist(anCurrInterDesc[c],anBGIntraDesc[c]); - const size_t nDescDist = (nIntraDescDist+nInterDescDist)/2; - const size_t nSumDist = std::min((nDescDist/2)*(s_nColorMaxDataRange_1ch/s_nDescMaxDataRange_1ch)+nColorDist,s_nColorMaxDataRange_1ch); - if(nSumDist>nCurrSCColorDistThreshold) - goto failedcheck3ch; - nTotDescDist += nDescDist; - nTotSumDist += nSumDist; - } - if(nTotDescDist>nCurrTotDescDistThreshold || nTotSumDist>nCurrTotColorDistThreshold) - goto failedcheck3ch; - if(nMinTotDescDist>nTotDescDist) - nMinTotDescDist = nTotDescDist; - if(nMinTotSumDist>nTotSumDist) - nMinTotSumDist = nTotSumDist; - nGoodSamplesCount++; - failedcheck3ch: - nSampleIdx++; - } - const float fNormalizedLastDist = ((float)L1dist<3>(anLastColor,anCurrColor)/s_nColorMaxDataRange_3ch+(float)hdist<3>(anLastIntraDesc,anCurrIntraDesc)/s_nDescMaxDataRange_3ch)/2; - *pfCurrMeanLastDist = (*pfCurrMeanLastDist)*(1.0f-fRollAvgFactor_ST) + fNormalizedLastDist*fRollAvgFactor_ST; - if(nGoodSamplesCount0?(size_t)ceil(learningRateOverride):(size_t)ceil(*pfCurrLearningRate); - if((rand()%nLearningRate)==0) { - const size_t s_rand = rand()%m_nBGSamples; - for(size_t c=0; c<3; ++c) { - *((ushort*)(m_voBGDescSamples[s_rand].data+nDescIterRGB+2*c)) = anCurrIntraDesc[c]; - *(m_voBGColorSamples[s_rand].data+nPxIterRGB+c) = anCurrColor[c]; - } - } - int nSampleImgCoord_Y, nSampleImgCoord_X; - const bool bCurrUsing3x3Spread = m_bUse3x3Spread && !m_oUnstableRegionMask.data[nPxIter]; - if(bCurrUsing3x3Spread) - getRandNeighborPosition_3x3(nSampleImgCoord_X,nSampleImgCoord_Y,nCurrImgCoord_X,nCurrImgCoord_Y,LBSP::PATCH_SIZE/2,m_oImgSize); - else - getRandNeighborPosition_5x5(nSampleImgCoord_X,nSampleImgCoord_Y,nCurrImgCoord_X,nCurrImgCoord_Y,LBSP::PATCH_SIZE/2,m_oImgSize); - const size_t n_rand = rand(); - const size_t idx_rand_uchar = m_oImgSize.width*nSampleImgCoord_Y + nSampleImgCoord_X; - const size_t idx_rand_flt32 = idx_rand_uchar*4; - const float fRandMeanLastDist = *((float*)(m_oMeanLastDistFrame.data+idx_rand_flt32)); - const float fRandMeanRawSegmRes = *((float*)(m_oMeanRawSegmResFrame_ST.data+idx_rand_flt32)); - if((n_rand%(bCurrUsing3x3Spread?nLearningRate:(nLearningRate/2+1)))==0 - || (fRandMeanRawSegmRes>GHOSTDET_S_MIN && fRandMeanLastDistm_fCurrLearningRateLowerCap) - *pfCurrLearningRate -= FEEDBACK_T_DECR*(*pfCurrVariationFactor)/std::max(*pfCurrMeanMinDist_LT,*pfCurrMeanMinDist_ST); - if((*pfCurrLearningRate)m_fCurrLearningRateUpperCap) - *pfCurrLearningRate = m_fCurrLearningRateUpperCap; - if(std::max(*pfCurrMeanMinDist_LT,*pfCurrMeanMinDist_ST)>UNSTABLE_REG_RATIO_MIN && m_oBlinksFrame.data[nPxIter]) - (*pfCurrVariationFactor) += FEEDBACK_V_INCR; - else if((*pfCurrVariationFactor)>FEEDBACK_V_DECR) { - (*pfCurrVariationFactor) -= m_oLastFGMask.data[nPxIter]?FEEDBACK_V_DECR/4:m_oUnstableRegionMask.data[nPxIter]?FEEDBACK_V_DECR/2:FEEDBACK_V_DECR; - if((*pfCurrVariationFactor)(anCurrIntraDesc)>=4) - ++nNonZeroDescCount; - for(size_t c=0; c<3; ++c) { - anLastIntraDesc[c] = anCurrIntraDesc[c]; - anLastColor[c] = anCurrColor[c]; - } - } - } -#if DISPLAY_SUBSENSE_DEBUG_INFO - std::cout << std::endl; - cv::Point dbgpt(nDebugCoordX,nDebugCoordY); - cv::Mat oMeanMinDistFrameNormalized; m_oMeanMinDistFrame_ST.copyTo(oMeanMinDistFrameNormalized); - cv::circle(oMeanMinDistFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oMeanMinDistFrameNormalized,oMeanMinDistFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("d_min(x)",oMeanMinDistFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " d_min(" << dbgpt << ") = " << m_oMeanMinDistFrame_ST.at(dbgpt) << std::endl; - cv::Mat oMeanLastDistFrameNormalized; m_oMeanLastDistFrame.copyTo(oMeanLastDistFrameNormalized); - cv::circle(oMeanLastDistFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oMeanLastDistFrameNormalized,oMeanLastDistFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("d_last(x)",oMeanLastDistFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " d_last(" << dbgpt << ") = " << m_oMeanLastDistFrame.at(dbgpt) << std::endl; - cv::Mat oMeanRawSegmResFrameNormalized; m_oMeanRawSegmResFrame_ST.copyTo(oMeanRawSegmResFrameNormalized); - cv::circle(oMeanRawSegmResFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oMeanRawSegmResFrameNormalized,oMeanRawSegmResFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("s_avg(x)",oMeanRawSegmResFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " s_avg(" << dbgpt << ") = " << m_oMeanRawSegmResFrame_ST.at(dbgpt) << std::endl; - cv::Mat oMeanFinalSegmResFrameNormalized; m_oMeanFinalSegmResFrame_ST.copyTo(oMeanFinalSegmResFrameNormalized); - cv::circle(oMeanFinalSegmResFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oMeanFinalSegmResFrameNormalized,oMeanFinalSegmResFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("z_avg(x)",oMeanFinalSegmResFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " z_avg(" << dbgpt << ") = " << m_oMeanFinalSegmResFrame_ST.at(dbgpt) << std::endl; - cv::Mat oDistThresholdFrameNormalized; m_oDistThresholdFrame.convertTo(oDistThresholdFrameNormalized,CV_32FC1,0.25f,-0.25f); - cv::circle(oDistThresholdFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oDistThresholdFrameNormalized,oDistThresholdFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("r(x)",oDistThresholdFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " r(" << dbgpt << ") = " << m_oDistThresholdFrame.at(dbgpt) << std::endl; - cv::Mat oVariationModulatorFrameNormalized; cv::normalize(m_oVariationModulatorFrame,oVariationModulatorFrameNormalized,0,255,cv::NORM_MINMAX,CV_8UC1); - cv::circle(oVariationModulatorFrameNormalized,dbgpt,5,cv::Scalar(255)); - cv::resize(oVariationModulatorFrameNormalized,oVariationModulatorFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("v(x)",oVariationModulatorFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " v(" << dbgpt << ") = " << m_oVariationModulatorFrame.at(dbgpt) << std::endl; - cv::Mat oUpdateRateFrameNormalized; m_oUpdateRateFrame.convertTo(oUpdateRateFrameNormalized,CV_32FC1,1.0f/FEEDBACK_T_UPPER,-FEEDBACK_T_LOWER/FEEDBACK_T_UPPER); - cv::circle(oUpdateRateFrameNormalized,dbgpt,5,cv::Scalar(1.0f)); - cv::resize(oUpdateRateFrameNormalized,oUpdateRateFrameNormalized,DEFAULT_FRAME_SIZE); - cv::imshow("t(x)",oUpdateRateFrameNormalized); - std::cout << std::fixed << std::setprecision(5) << " t(" << dbgpt << ") = " << m_oUpdateRateFrame.at(dbgpt) << std::endl; -#endif //DISPLAY_SUBSENSE_DEBUG_INFO - cv::bitwise_xor(oCurrFGMask,m_oLastRawFGMask,m_oCurrRawFGBlinkMask); - cv::bitwise_or(m_oCurrRawFGBlinkMask,m_oLastRawFGBlinkMask,m_oBlinksFrame); - m_oCurrRawFGBlinkMask.copyTo(m_oLastRawFGBlinkMask); - oCurrFGMask.copyTo(m_oLastRawFGMask); - cv::morphologyEx(oCurrFGMask,m_oFGMask_PreFlood,cv::MORPH_CLOSE, m_defaultMorphologyKernel); - m_oFGMask_PreFlood.copyTo(m_oFGMask_FloodedHoles); - cv::floodFill(m_oFGMask_FloodedHoles,cv::Point(0,0),UCHAR_MAX); - cv::bitwise_not(m_oFGMask_FloodedHoles,m_oFGMask_FloodedHoles); - cv::erode(m_oFGMask_PreFlood,m_oFGMask_PreFlood,m_defaultMorphologyKernel,cv::Point(-1,-1),3); - cv::bitwise_or(oCurrFGMask,m_oFGMask_FloodedHoles,oCurrFGMask); - cv::bitwise_or(oCurrFGMask,m_oFGMask_PreFlood,oCurrFGMask); - cv::medianBlur(oCurrFGMask,m_oLastFGMask,m_nMedianBlurKernelSize); - cv::dilate(m_oLastFGMask,m_oLastFGMask_dilated,m_defaultMorphologyKernel,cv::Point(-1,-1),3); - cv::bitwise_and(m_oBlinksFrame,m_oLastFGMask_dilated_inverted,m_oBlinksFrame); - cv::bitwise_not(m_oLastFGMask_dilated,m_oLastFGMask_dilated_inverted); - cv::bitwise_and(m_oBlinksFrame,m_oLastFGMask_dilated_inverted,m_oBlinksFrame); - m_oLastFGMask.copyTo(oCurrFGMask); - cv::addWeighted(m_oMeanFinalSegmResFrame_LT,(1.0f-fRollAvgFactor_LT),m_oLastFGMask,(1.0/UCHAR_MAX)*fRollAvgFactor_LT,0,m_oMeanFinalSegmResFrame_LT,CV_32F); - cv::addWeighted(m_oMeanFinalSegmResFrame_ST,(1.0f-fRollAvgFactor_ST),m_oLastFGMask,(1.0/UCHAR_MAX)*fRollAvgFactor_ST,0,m_oMeanFinalSegmResFrame_ST,CV_32F); - const float fCurrNonZeroDescRatio = (float)nNonZeroDescCount/m_nTotRelevantPxCount; - if(fCurrNonZeroDescRatiocv::saturate_cast(m_nLBSPThresholdOffset+ceil(t*m_fRelLBSPThreshold/4))) - --m_anLBSPThreshold_8bitLUT[t]; - } - else if(fCurrNonZeroDescRatio>LBSPDESC_NONZERO_RATIO_MAX && m_fLastNonZeroDescRatio>LBSPDESC_NONZERO_RATIO_MAX) { - for(size_t t=0; t<=UCHAR_MAX; ++t) - if(m_anLBSPThreshold_8bitLUT[t](m_nLBSPThresholdOffset+UCHAR_MAX*m_fRelLBSPThreshold)) - ++m_anLBSPThreshold_8bitLUT[t]; - } - m_fLastNonZeroDescRatio = fCurrNonZeroDescRatio; - if(m_bLearningRateScalingEnabled) { - cv::resize(oInputImg,m_oDownSampledFrame_MotionAnalysis,m_oDownSampledFrameSize,0,0,cv::INTER_AREA); - cv::accumulateWeighted(m_oDownSampledFrame_MotionAnalysis,m_oMeanDownSampledLastDistFrame_LT,fRollAvgFactor_LT); - cv::accumulateWeighted(m_oDownSampledFrame_MotionAnalysis,m_oMeanDownSampledLastDistFrame_ST,fRollAvgFactor_ST); - size_t nTotColorDiff = 0; - for(int i=0; i1000) - m_bAutoModelResetEnabled = false; - else if(fCurrColorDiffRatio>=FRAMELEVEL_MIN_COLOR_DIFF_THRESHOLD && m_nModelResetCooldown==0) { - m_nFramesSinceLastReset = 0; - refreshModel(0.1f); // reset 10% of the bg model - m_nModelResetCooldown = m_nSamplesForMovingAvgs/4; - m_oUpdateRateFrame = cv::Scalar(1.0f); - } - else - ++m_nFramesSinceLastReset; - } - else if(fCurrColorDiffRatio>=FRAMELEVEL_MIN_COLOR_DIFF_THRESHOLD*2) { - m_nFramesSinceLastReset = 0; - m_bAutoModelResetEnabled = true; - } - if(fCurrColorDiffRatio>=FRAMELEVEL_MIN_COLOR_DIFF_THRESHOLD/2) { - m_fCurrLearningRateLowerCap = (float)std::max((int)FEEDBACK_T_LOWER>>(int)(fCurrColorDiffRatio/2),1); - m_fCurrLearningRateUpperCap = (float)std::max((int)FEEDBACK_T_UPPER>>(int)(fCurrColorDiffRatio/2),1); - } - else { - m_fCurrLearningRateLowerCap = FEEDBACK_T_LOWER; - m_fCurrLearningRateUpperCap = FEEDBACK_T_UPPER; - } - if(m_nModelResetCooldown>0) - --m_nModelResetCooldown; - } -} - -void BackgroundSubtractorSuBSENSE::getBackgroundImage(cv::OutputArray backgroundImage) const { - CV_Assert(m_bInitialized); - cv::Mat oAvgBGImg = cv::Mat::zeros(m_oImgSize,CV_32FC((int)m_nImgChannels)); - for(size_t s=0; s m_voBGColorSamples; - //! background model descriptors samples - std::vector m_voBGDescSamples; - - //! per-pixel update rates ('T(x)' in PBAS, which contains pixel-level 'sigmas', as referred to in ViBe) - cv::Mat m_oUpdateRateFrame; - //! per-pixel distance thresholds (equivalent to 'R(x)' in PBAS, but used as a relative value to determine both intensity and descriptor variation thresholds) - cv::Mat m_oDistThresholdFrame; - //! per-pixel distance variation modulators ('v(x)', relative value used to modulate 'R(x)' and 'T(x)' variations) - cv::Mat m_oVariationModulatorFrame; - //! per-pixel mean distances between consecutive frames ('D_last(x)', used to detect ghosts and high variation regions in the sequence) - cv::Mat m_oMeanLastDistFrame; - //! per-pixel mean minimal distances from the model ('D_min(x)' in PBAS, used to control variation magnitude and direction of 'T(x)' and 'R(x)') - cv::Mat m_oMeanMinDistFrame_LT, m_oMeanMinDistFrame_ST; - //! per-pixel mean downsampled distances between consecutive frames (used to analyze camera movement and control max learning rates globally) - cv::Mat m_oMeanDownSampledLastDistFrame_LT, m_oMeanDownSampledLastDistFrame_ST; - //! per-pixel mean raw segmentation results (used to detect unstable segmentation regions) - cv::Mat m_oMeanRawSegmResFrame_LT, m_oMeanRawSegmResFrame_ST; - //! per-pixel mean raw segmentation results (used to detect unstable segmentation regions) - cv::Mat m_oMeanFinalSegmResFrame_LT, m_oMeanFinalSegmResFrame_ST; - //! a lookup map used to keep track of unstable regions (based on segm. noise & local dist. thresholds) - cv::Mat m_oUnstableRegionMask; - //! per-pixel blink detection map ('Z(x)') - cv::Mat m_oBlinksFrame; - //! pre-allocated matrix used to downsample the input frame when needed - cv::Mat m_oDownSampledFrame_MotionAnalysis; - //! the foreground mask generated by the method at [t-1] (without post-proc, used for blinking px detection) - cv::Mat m_oLastRawFGMask; - - //! pre-allocated CV_8UC1 matrices used to speed up morph ops - cv::Mat m_oFGMask_PreFlood; - cv::Mat m_oFGMask_FloodedHoles; - cv::Mat m_oLastFGMask_dilated; - cv::Mat m_oLastFGMask_dilated_inverted; - cv::Mat m_oCurrRawFGBlinkMask; - cv::Mat m_oLastRawFGBlinkMask; - - //! default kernel for morphology operations - cv::Mat m_defaultMorphologyKernel; -}; - diff --git a/src/Detector/Subsense/DistanceUtils.h b/src/Detector/Subsense/DistanceUtils.h deleted file mode 100644 index 54d1cecd9..000000000 --- a/src/Detector/Subsense/DistanceUtils.h +++ /dev/null @@ -1,316 +0,0 @@ -#pragma once - -#include - -//! computes the L1 distance between two integer values -template static inline typename std::enable_if::value,size_t>::type L1dist(T a, T b) { - return (size_t)abs((int)a-b); -} - -//! computes the L1 distance between two float values -template static inline typename std::enable_if::value,float>::type L1dist(T a, T b) { - return fabs((float)a-(float)b); -} - -//! computes the L1 distance between two generic arrays -template static inline auto L1dist(const T* a, const T* b) -> decltype(L1dist(*a,*b)) { - decltype(L1dist(*a,*b)) oResult = 0; - for(size_t c=0; c static inline auto L1dist(const T* a, const T* b, size_t nElements, const uchar* m=NULL) -> decltype(L1dist(a,b)) { - decltype(L1dist(a,b)) oResult = 0; - size_t nTotElements = nElements*nChannels; - if(m) { - for(size_t n=0,i=0; n(a+n,b+n); - } - else { - for(size_t n=0; n(a+n,b+n); - } - return oResult; -} - -//! computes the L1 distance between two generic arrays -template static inline auto L1dist(const T* a, const T* b, size_t nElements, size_t nChannels, const uchar* m=NULL) -> decltype(L1dist<3>(a,b,nElements,m)) { - CV_Assert(nChannels>0 && nChannels<=4); - switch(nChannels) { - case 1: return L1dist<1>(a,b,nElements,m); - case 2: return L1dist<2>(a,b,nElements,m); - case 3: return L1dist<3>(a,b,nElements,m); - case 4: return L1dist<4>(a,b,nElements,m); - default: return 0; - } -} - -//! computes the L1 distance between two opencv vectors -template static inline auto L1dist_(const cv::Vec& a, const cv::Vec& b) -> decltype(L1dist((T*)(0),(T*)(0))) { - T a_array[nChannels], b_array[nChannels]; - for(size_t c=0; c(a_array,b_array); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -//! computes the squared L2 distance between two generic variables -template static inline auto L2sqrdist(T a, T b) -> decltype(L1dist(a,b)) { - auto oResult = L1dist(a,b); - return oResult*oResult; -} - -//! computes the squared L2 distance between two generic arrays -template static inline auto L2sqrdist(const T* a, const T* b) -> decltype(L2sqrdist(*a,*b)) { - decltype(L2sqrdist(*a,*b)) oResult = 0; - for(size_t c=0; c static inline auto L2sqrdist(const T* a, const T* b, size_t nElements, const uchar* m=NULL) -> decltype(L2sqrdist(a,b)) { - decltype(L2sqrdist(a,b)) oResult = 0; - size_t nTotElements = nElements*nChannels; - if(m) { - for(size_t n=0,i=0; n(a+n,b+n); - } - else { - for(size_t n=0; n(a+n,b+n); - } - return oResult; -} - -//! computes the squared L2 distance between two generic arrays -template static inline auto L2sqrdist(const T* a, const T* b, size_t nElements, size_t nChannels, const uchar* m=NULL) -> decltype(L2sqrdist<3>(a,b,nElements,m)) { - CV_Assert(nChannels>0 && nChannels<=4); - switch(nChannels) { - case 1: return L2sqrdist<1>(a,b,nElements,m); - case 2: return L2sqrdist<2>(a,b,nElements,m); - case 3: return L2sqrdist<3>(a,b,nElements,m); - case 4: return L2sqrdist<4>(a,b,nElements,m); - default: return 0; - } -} - -//! computes the squared L2 distance between two opencv vectors -template static inline auto L2sqrdist_(const cv::Vec& a, const cv::Vec& b) -> decltype(L2sqrdist((T*)(0),(T*)(0))) { - T a_array[nChannels], b_array[nChannels]; - for(size_t c=0; c(a_array,b_array); -} - -//! computes the L2 distance between two generic arrays -template static inline float L2dist(const T* a, const T* b) { - decltype(L2sqrdist(*a,*b)) oResult = 0; - for(size_t c=0; c static inline float L2dist(const T* a, const T* b, size_t nElements, const uchar* m=NULL) { - decltype(L2sqrdist(a,b)) oResult = 0; - size_t nTotElements = nElements*nChannels; - if(m) { - for(size_t n=0,i=0; n(a+n,b+n); - } - else { - for(size_t n=0; n(a+n,b+n); - } - return sqrt((float)oResult); -} - -//! computes the squared L2 distance between two generic arrays -template static inline float L2dist(const T* a, const T* b, size_t nElements, size_t nChannels, const uchar* m=NULL) { - CV_Assert(nChannels>0 && nChannels<=4); - switch(nChannels) { - case 1: return L2dist<1>(a,b,nElements,m); - case 2: return L2dist<2>(a,b,nElements,m); - case 3: return L2dist<3>(a,b,nElements,m); - case 4: return L2dist<4>(a,b,nElements,m); - default: return 0; - } -} - -//! computes the L2 distance between two opencv vectors -template static inline float L2dist_(const cv::Vec& a, const cv::Vec& b) { - T a_array[nChannels], b_array[nChannels]; - for(size_t c=0; c(a_array,b_array); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -//! computes the color distortion between two integer arrays -template static inline typename std::enable_if::value,size_t>::type cdist(const T* curr, const T* bg) { - static_assert(nChannels>1,"cdist: requires more than one channel"); - size_t curr_sqr = 0; - bool bSkip = true; - for(size_t c=0; c static inline typename std::enable_if::value,float>::type cdist(const T* curr, const T* bg) { - static_assert(nChannels>1,"cdist: requires more than one channel"); - float curr_sqr = 0; - bool bSkip = true; - for(size_t c=0; c static inline auto cdist(const T* a, const T* b, size_t nElements, const uchar* m=NULL) -> decltype(cdist(a,b)) { - decltype(cdist(a,b)) oResult = 0; - size_t nTotElements = nElements*nChannels; - if(m) { - for(size_t n=0,i=0; n(a+n,b+n); - } - else { - for(size_t n=0; n(a+n,b+n); - } - return oResult; -} - -//! computes the color distortion between two generic arrays -template static inline auto cdist(const T* a, const T* b, size_t nElements, size_t nChannels, const uchar* m=NULL) -> decltype(cdist<3>(a,b,nElements,m)) { - CV_Assert(nChannels>1 && nChannels<=4); - switch(nChannels) { - case 2: return cdist<2>(a,b,nElements,m); - case 3: return cdist<3>(a,b,nElements,m); - case 4: return cdist<4>(a,b,nElements,m); - default: return 0; - } -} - -//! computes the color distortion between two opencv vectors -template static inline auto cdist_(const cv::Vec& a, const cv::Vec& b) -> decltype(cdist((T*)(0),(T*)(0))) { - T a_array[nChannels], b_array[nChannels]; - for(size_t c=0; c(a_array,b_array); -} - -//! computes a color distortion-distance mix using two generic distances -template static inline T cmixdist(T oL1Distance, T oCDistortion) { - return (oL1Distance/2+oCDistortion*4); -} - -//! computes a color distoirtion-distance mix using two generic arrays -template static inline typename std::enable_if::value,size_t>::type cmixdist(const T* curr, const T* bg) { - return cmixdist(L1dist(curr,bg),cdist(curr,bg)); -} - -template static inline typename std::enable_if::value,float>::type cmixdist(const T* curr, const T* bg) { - return cmixdist(L1dist(curr,bg),cdist(curr,bg)); -} - -/////////////////////////////////////////////////////////////////////////////////////////////////// - -//! popcount LUT for 8-bit vectors -static const uchar popcount_LUT8[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8, -}; - -//! computes the population count of an N-byte vector using an 8-bit popcount LUT -template static inline size_t popcount(T x) { - size_t nBytes = sizeof(T); - size_t nResult = 0; - for(size_t l=0; l>l*8)]; - return nResult; -} - -//! computes the hamming distance between two N-byte vectors using an 8-bit popcount LUT -template static inline size_t hdist(T a, T b) { - return popcount(a^b); -} - -//! computes the gradient magnitude distance between two N-byte vectors using an 8-bit popcount LUT -template static inline size_t gdist(T a, T b) { - return L1dist(popcount(a),popcount(b)); -} - -//! computes the population count of a (nChannels*N)-byte vector using an 8-bit popcount LUT -template static inline size_t popcount(const T* x) { - size_t nBytes = sizeof(T); - size_t nResult = 0; - for(size_t c=0; c>l*8)]; - return nResult; -} - -//! computes the hamming distance between two (nChannels*N)-byte vectors using an 8-bit popcount LUT -template static inline size_t hdist(const T* a, const T* b) { - T xor_array[nChannels]; - for(size_t c=0; c(xor_array); -} - -//! computes the gradient magnitude distance between two (nChannels*N)-byte vectors using an 8-bit popcount LUT -template static inline size_t gdist(const T* a, const T* b) { - return L1dist(popcount(a),popcount(b)); -} diff --git a/src/Detector/Subsense/LBSP.cpp b/src/Detector/Subsense/LBSP.cpp deleted file mode 100644 index 6c8f65574..000000000 --- a/src/Detector/Subsense/LBSP.cpp +++ /dev/null @@ -1,316 +0,0 @@ -#include "LBSP.h" - -LBSP::LBSP(size_t nThreshold) - : m_bOnlyUsingAbsThreshold(true) - ,m_fRelThreshold(0) // unused - ,m_nThreshold(nThreshold) - ,m_oRefImage() {} - -LBSP::LBSP(float fRelThreshold, size_t nThresholdOffset) - : m_bOnlyUsingAbsThreshold(false) - ,m_fRelThreshold(fRelThreshold) - ,m_nThreshold(nThresholdOffset) - ,m_oRefImage() { - CV_Assert(m_fRelThreshold>=0); -} - -void LBSP::read(const cv::FileNode& /*fn*/) { - // ... = fn["..."]; -} - -void LBSP::write(cv::FileStorage& /*fs*/) const { - //fs << "..." << ...; -} - -void LBSP::setReference(const cv::Mat& img) { - CV_DbgAssert(img.empty() || img.type()==CV_8UC1 || img.type()==CV_8UC3); - m_oRefImage = img; -} - -int LBSP::descriptorSize() const { - return DESC_SIZE; -} - -int LBSP::descriptorType() const { - return CV_16U; -} - -bool LBSP::isUsingRelThreshold() const { - return !m_bOnlyUsingAbsThreshold; -} - -float LBSP::getRelThreshold() const { - return m_fRelThreshold; -} - -size_t LBSP::getAbsThreshold() const { - return m_nThreshold; -} - -static inline void lbsp_computeImpl( const cv::Mat& oInputImg, - const cv::Mat& oRefImg, - const std::vector& voKeyPoints, - cv::Mat& oDesc, - size_t _t) { - CV_DbgAssert(oRefImg.empty() || (oRefImg.size==oInputImg.size && oRefImg.type()==oInputImg.type())); - CV_DbgAssert(oInputImg.type()==CV_8UC1 || oInputImg.type()==CV_8UC3); - CV_DbgAssert(LBSP::DESC_SIZE==2); // @@@ also relies on a constant desc size - const size_t nChannels = (size_t)oInputImg.channels(); - const size_t _step_row = oInputImg.step.p[0]; - const uchar* _data = oInputImg.data; - const uchar* _refdata = oRefImg.empty()?oInputImg.data:oRefImg.data; - const size_t nKeyPoints = voKeyPoints.size(); - if(nChannels==1) { - oDesc.create((int)nKeyPoints,1,CV_16UC1); - for(size_t k=0; k((int)k); - #include "LBSP_16bits_dbcross_1ch.i" - } - } - else { //nChannels==3 - oDesc.create((int)nKeyPoints,1,CV_16UC3); - for(size_t k=0; k& voKeyPoints, - cv::Mat& oDesc, - float fThreshold, - size_t nThresholdOffset) { - CV_DbgAssert(oRefImg.empty() || (oRefImg.size==oInputImg.size && oRefImg.type()==oInputImg.type())); - CV_DbgAssert(oInputImg.type()==CV_8UC1 || oInputImg.type()==CV_8UC3); - CV_DbgAssert(LBSP::DESC_SIZE==2); // @@@ also relies on a constant desc size - CV_DbgAssert(fThreshold>=0); - const size_t nChannels = (size_t)oInputImg.channels(); - const size_t _step_row = oInputImg.step.p[0]; - const uchar* _data = oInputImg.data; - const uchar* _refdata = oRefImg.empty()?oInputImg.data:oRefImg.data; - const size_t nKeyPoints = voKeyPoints.size(); - if(nChannels==1) { - oDesc.create((int)nKeyPoints,1,CV_16UC1); - for(size_t k=0; k((int)k); - const size_t _t = (size_t)(_ref*fThreshold)+nThresholdOffset; - #include "LBSP_16bits_dbcross_1ch.i" - } - } - else { //nChannels==3 - oDesc.create((int)nKeyPoints,1,CV_16UC3); - for(size_t k=0; k& voKeyPoints, - cv::Mat& oDesc, - size_t _t) { - CV_DbgAssert(oRefImg.empty() || (oRefImg.size==oInputImg.size && oRefImg.type()==oInputImg.type())); - CV_DbgAssert(oInputImg.type()==CV_8UC1 || oInputImg.type()==CV_8UC3); - CV_DbgAssert(LBSP::DESC_SIZE==2); // @@@ also relies on a constant desc size - const size_t nChannels = (size_t)oInputImg.channels(); - const size_t _step_row = oInputImg.step.p[0]; - const uchar* _data = oInputImg.data; - const uchar* _refdata = oRefImg.empty()?oInputImg.data:oRefImg.data; - const size_t nKeyPoints = voKeyPoints.size(); - if(nChannels==1) { - oDesc.create(oInputImg.size(),CV_16UC1); - for(size_t k=0; k(_y,_x); - #include "LBSP_16bits_dbcross_1ch.i" - } - } - else { //nChannels==3 - oDesc.create(oInputImg.size(),CV_16UC3); - for(size_t k=0; k& voKeyPoints, - cv::Mat& oDesc, - float fThreshold, - size_t nThresholdOffset) { - CV_DbgAssert(oRefImg.empty() || (oRefImg.size==oInputImg.size && oRefImg.type()==oInputImg.type())); - CV_DbgAssert(oInputImg.type()==CV_8UC1 || oInputImg.type()==CV_8UC3); - CV_DbgAssert(LBSP::DESC_SIZE==2); // @@@ also relies on a constant desc size - CV_DbgAssert(fThreshold>=0); - const size_t nChannels = (size_t)oInputImg.channels(); - const size_t _step_row = oInputImg.step.p[0]; - const uchar* _data = oInputImg.data; - const uchar* _refdata = oRefImg.empty()?oInputImg.data:oRefImg.data; - const size_t nKeyPoints = voKeyPoints.size(); - if(nChannels==1) { - oDesc.create(oInputImg.size(),CV_16UC1); - for(size_t k=0; k(_y,_x); - const size_t _t = (size_t)(_ref*fThreshold)+nThresholdOffset; - #include "LBSP_16bits_dbcross_1ch.i" - } - } - else { //nChannels==3 - oDesc.create(oInputImg.size(),CV_16UC3); - for(size_t k=0; k& voKeypoints, cv::Mat& oDescriptors) const { - CV_Assert(!oImage.empty()); - cv::KeyPointsFilter::runByImageBorder(voKeypoints,oImage.size(),PATCH_SIZE/2); - cv::KeyPointsFilter::runByKeypointSize(voKeypoints,std::numeric_limits::epsilon()); - if(voKeypoints.empty()) { - oDescriptors.release(); - return; - } - if(m_bOnlyUsingAbsThreshold) - lbsp_computeImpl2(oImage,m_oRefImage,voKeypoints,oDescriptors,m_nThreshold); - else - lbsp_computeImpl2(oImage,m_oRefImage,voKeypoints,oDescriptors,m_fRelThreshold,m_nThreshold); -} - -void LBSP::compute2(const std::vector& voImageCollection, std::vector >& vvoPointCollection, std::vector& voDescCollection) const { - CV_Assert(voImageCollection.size() == vvoPointCollection.size()); - voDescCollection.resize(voImageCollection.size()); - for(size_t i=0; i& voKeypoints, cv::Mat& oDescriptors) const { - CV_Assert(!oImage.empty()); - cv::KeyPointsFilter::runByImageBorder(voKeypoints,oImage.size(),PATCH_SIZE/2); - cv::KeyPointsFilter::runByKeypointSize(voKeypoints,std::numeric_limits::epsilon()); - if(voKeypoints.empty()) { - oDescriptors.release(); - return; - } - if(m_bOnlyUsingAbsThreshold) - lbsp_computeImpl(oImage,m_oRefImage,voKeypoints,oDescriptors,m_nThreshold); - else - lbsp_computeImpl(oImage,m_oRefImage,voKeypoints,oDescriptors,m_fRelThreshold,m_nThreshold); -} - -void LBSP::reshapeDesc(cv::Size oSize, const std::vector& voKeypoints, const cv::Mat& oDescriptors, cv::Mat& oOutput) { - CV_DbgAssert(!voKeypoints.empty()); - CV_DbgAssert(!oDescriptors.empty() && oDescriptors.cols==1); - CV_DbgAssert(oSize.width>0 && oSize.height>0); - CV_DbgAssert(DESC_SIZE==2); // @@@ also relies on a constant desc size - CV_DbgAssert(oDescriptors.type()==CV_16UC1 || oDescriptors.type()==CV_16UC3); - const size_t nChannels = (size_t)oDescriptors.channels(); - const size_t nKeyPoints = voKeypoints.size(); - if(nChannels==1) { - oOutput.create(oSize,CV_16UC1); - oOutput = cv::Scalar_(0); - for(size_t k=0; k(voKeypoints[k].pt) = oDescriptors.at((int)k); - } - else { //nChannels==3 - oOutput.create(oSize,CV_16UC3); - oOutput = cv::Scalar_(0,0,0); - for(size_t k=0; k(i,j) = (uchar)(fScaleFactor*hdist(desc1_ptr[j],desc2_ptr[j])); - } - } - else { //nChannels==3 - if(bForceMergeChannels) - oOutput.create(oDesc1.size(),CV_8UC1); - else - oOutput.create(oDesc1.size(),CV_8UC3); - oOutput = cv::Scalar::all(0); - for(int i=0; i& voKeypoints, cv::Size oImgSize) { - cv::KeyPointsFilter::runByImageBorder(voKeypoints,oImgSize,PATCH_SIZE/2); -} - -void LBSP::validateROI(cv::Mat& oROI) { - CV_Assert(!oROI.empty() && oROI.type()==CV_8UC1); - cv::Mat oROI_new(oROI.size(),CV_8UC1,cv::Scalar_(0)); - const size_t nBorderSize = PATCH_SIZE/2; - const cv::Rect nROI_inner(nBorderSize,nBorderSize,oROI.cols-nBorderSize*2,oROI.rows-nBorderSize*2); - cv::Mat(oROI,nROI_inner).copyTo(cv::Mat(oROI_new,nROI_inner)); - oROI = oROI_new; -} diff --git a/src/Detector/Subsense/LBSP.h b/src/Detector/Subsense/LBSP.h deleted file mode 100644 index a78d7123d..000000000 --- a/src/Detector/Subsense/LBSP.h +++ /dev/null @@ -1,118 +0,0 @@ -#pragma once - -#include -#include -#include "DistanceUtils.h" - -/*! - Local Binary Similarity Pattern (LBSP) feature extractor - - Note 1: both grayscale and RGB/BGR images may be used with this extractor. - Note 2: using LBSP::compute2(...) is logically equivalent to using LBSP::compute(...) followed by LBSP::reshapeDesc(...). - - For more details on the different parameters, see G.-A. Bilodeau et al, "Change Detection in Feature Space Using Local - Binary Similarity Patterns", in CRV 2013. - - This algorithm is currently NOT thread-safe. - */ -class LBSP : public cv::DescriptorExtractor -{ -public: - //! constructor 1, threshold = absolute intensity 'similarity' threshold used when computing comparisons - LBSP(size_t nThreshold); - //! constructor 2, threshold = relative intensity 'similarity' threshold used when computing comparisons - LBSP(float fRelThreshold, size_t nThresholdOffset=0); - //! default destructor - virtual ~LBSP() = default; - //! loads extractor params from the specified file node @@@@ not impl - virtual void read(const cv::FileNode&); - //! writes extractor params to the specified file storage @@@@ not impl - virtual void write(cv::FileStorage&) const; - //! sets the 'reference' image to be used for inter-frame comparisons (note: if no image is set or if the image is empty, the algorithm will default back to intra-frame comparisons) - virtual void setReference(const cv::Mat&); - //! returns the current descriptor size, in bytes - virtual int descriptorSize() const; - //! returns the current descriptor data type - virtual int descriptorType() const; - //! returns whether this extractor is using a relative threshold or not - virtual bool isUsingRelThreshold() const; - //! returns the current relative threshold used for comparisons (-1 = invalid/not used) - virtual float getRelThreshold() const; - //! returns the current absolute threshold used for comparisons (-1 = invalid/not used) - virtual size_t getAbsThreshold() const; - - //! similar to DescriptorExtractor::compute(const cv::Mat& image, ...), but in this case, the descriptors matrix has the same shape as the input matrix (possibly slower, but the result can be displayed) - void compute2(const cv::Mat& oImage, std::vector& voKeypoints, cv::Mat& oDescriptors) const; - //! batch version of LBSP::compute2(const cv::Mat& image, ...), also similar to DescriptorExtractor::compute(const std::vector& imageCollection, ...) - void compute2(const std::vector& voImageCollection, std::vector >& vvoPointCollection, std::vector& voDescCollection) const; - - //! utility function, shortcut/lightweight/direct single-point LBSP computation function for extra flexibility (1-channel version) - inline static void computeGrayscaleDescriptor(const cv::Mat& oInputImg, const uchar _ref, const int _x, const int _y, const size_t _t, ushort& _res) { - CV_DbgAssert(!oInputImg.empty()); - CV_DbgAssert(oInputImg.type()==CV_8UC1); - CV_DbgAssert(LBSP::DESC_SIZE==2); // @@@ also relies on a constant desc size - CV_DbgAssert(_x>=(int)LBSP::PATCH_SIZE/2 && _y>=(int)LBSP::PATCH_SIZE/2); - CV_DbgAssert(_x=(int)LBSP::PATCH_SIZE/2 && _y>=(int)LBSP::PATCH_SIZE/2); - CV_DbgAssert(_x=(int)LBSP::PATCH_SIZE/2 && _y>=(int)LBSP::PATCH_SIZE/2); - CV_DbgAssert(_x=(int)LBSP::PATCH_SIZE/2 && _y>=(int)LBSP::PATCH_SIZE/2); - CV_DbgAssert(_x& voKeypoints, const cv::Mat& oDescriptors, cv::Mat& oOutput); - //! utility function, used to illustrate the difference between two descriptor images - static void calcDescImgDiff(const cv::Mat& oDesc1, const cv::Mat& oDesc2, cv::Mat& oOutput, bool bForceMergeChannels=false); - //! utility function, used to filter out bad keypoints that would trigger out of bounds error because they're too close to the image border - static void validateKeyPoints(std::vector& voKeypoints, cv::Size oImgSize); - //! utility function, used to filter out bad pixels in a ROI that would trigger out of bounds error because they're too close to the image border - static void validateROI(cv::Mat& oROI); - //! utility, specifies the pixel size of the pattern used (width and height) - static constexpr size_t PATCH_SIZE = 5; - //! utility, specifies the number of bytes per descriptor (should be the same as calling 'descriptorSize()') - static constexpr size_t DESC_SIZE = 2; - -protected: - //! classic 'compute' implementation, based on the regular DescriptorExtractor::computeImpl arguments & expected output - virtual void computeImpl(const cv::Mat& oImage, std::vector& voKeypoints, cv::Mat& oDescriptors) const; - - const bool m_bOnlyUsingAbsThreshold; - const float m_fRelThreshold; - const size_t m_nThreshold; - cv::Mat m_oRefImage; -}; diff --git a/src/Detector/Subsense/LBSP_16bits_dbcross_1ch.i b/src/Detector/Subsense/LBSP_16bits_dbcross_1ch.i deleted file mode 100644 index ba5ffd0b1..000000000 --- a/src/Detector/Subsense/LBSP_16bits_dbcross_1ch.i +++ /dev/null @@ -1,44 +0,0 @@ -// note: this is the LBSP 16 bit double-cross single channel pattern as used in -// the original article by G.-A. Bilodeau et al. -// -// O O O 4 .. 3 .. 6 -// O O O .. 15 8 13 .. -// O O X O O => 0 9 X 11 1 -// O O O .. 12 10 14 .. -// O O O 7 .. 2 .. 5 -// -// -// must be defined externally: -// _t (size_t, absolute threshold used for comparisons) -// _ref (uchar, 'central' value used for comparisons) -// _data (uchar*, single-channel data to be covered by the pattern) -// _y (int, pattern rows location in the image data) -// _x (int, pattern cols location in the image data) -// _step_row (size_t, step size between rows, including padding) -// _res (ushort, 16 bit result vector) -// L1dist (function, returns the absolute difference between two uchars) - -#ifdef _val -#error "definitions clash detected" -#else -#define _val(x,y) _data[_step_row*(_y+y)+_x+x] -#endif - -_res = ((L1dist(_val(-1, 1),_ref) > _t) << 15) - + ((L1dist(_val( 1,-1),_ref) > _t) << 14) - + ((L1dist(_val( 1, 1),_ref) > _t) << 13) - + ((L1dist(_val(-1,-1),_ref) > _t) << 12) - + ((L1dist(_val( 1, 0),_ref) > _t) << 11) - + ((L1dist(_val( 0,-1),_ref) > _t) << 10) - + ((L1dist(_val(-1, 0),_ref) > _t) << 9) - + ((L1dist(_val( 0, 1),_ref) > _t) << 8) - + ((L1dist(_val(-2,-2),_ref) > _t) << 7) - + ((L1dist(_val( 2, 2),_ref) > _t) << 6) - + ((L1dist(_val( 2,-2),_ref) > _t) << 5) - + ((L1dist(_val(-2, 2),_ref) > _t) << 4) - + ((L1dist(_val( 0, 2),_ref) > _t) << 3) - + ((L1dist(_val( 0,-2),_ref) > _t) << 2) - + ((L1dist(_val( 2, 0),_ref) > _t) << 1) - + ((L1dist(_val(-2, 0),_ref) > _t)); - -#undef _val diff --git a/src/Detector/Subsense/LBSP_16bits_dbcross_3ch1t.i b/src/Detector/Subsense/LBSP_16bits_dbcross_3ch1t.i deleted file mode 100644 index da0ebf9a2..000000000 --- a/src/Detector/Subsense/LBSP_16bits_dbcross_3ch1t.i +++ /dev/null @@ -1,46 +0,0 @@ -// note: this is the LBSP 16 bit double-cross indiv RGB pattern as used in -// the original article by G.-A. Bilodeau et al. -// -// O O O 4 .. 3 .. 6 -// O O O .. 15 8 13 .. -// O O X O O => 0 9 X 11 1 -// O O O .. 12 10 14 .. -// O O O 7 .. 2 .. 5 -// 3x 3x -// -// must be defined externally: -// _t (size_t, absolute threshold used for comparisons) -// _ref (uchar[3], 'central' values used for comparisons) -// _data (uchar*, triple-channel data to be covered by the pattern) -// _y (int, pattern rows location in the image data) -// _x (int, pattern cols location in the image data) -// _step_row (size_t, step size between rows, including padding) -// _res (ushort[3], 16 bit result vectors vector) -// L1dist (function, returns the absolute difference between two uchars) - -#ifdef _val -#error "definitions clash detected" -#else -#define _val(x,y,n) _data[_step_row*(_y+y)+3*(_x+x)+n] -#endif - -for(int n=0; n<3; ++n) { - _res[n] = ((L1dist(_val(-1, 1, n),_ref[n]) > _t) << 15) - + ((L1dist(_val( 1,-1, n),_ref[n]) > _t) << 14) - + ((L1dist(_val( 1, 1, n),_ref[n]) > _t) << 13) - + ((L1dist(_val(-1,-1, n),_ref[n]) > _t) << 12) - + ((L1dist(_val( 1, 0, n),_ref[n]) > _t) << 11) - + ((L1dist(_val( 0,-1, n),_ref[n]) > _t) << 10) - + ((L1dist(_val(-1, 0, n),_ref[n]) > _t) << 9) - + ((L1dist(_val( 0, 1, n),_ref[n]) > _t) << 8) - + ((L1dist(_val(-2,-2, n),_ref[n]) > _t) << 7) - + ((L1dist(_val( 2, 2, n),_ref[n]) > _t) << 6) - + ((L1dist(_val( 2,-2, n),_ref[n]) > _t) << 5) - + ((L1dist(_val(-2, 2, n),_ref[n]) > _t) << 4) - + ((L1dist(_val( 0, 2, n),_ref[n]) > _t) << 3) - + ((L1dist(_val( 0,-2, n),_ref[n]) > _t) << 2) - + ((L1dist(_val( 2, 0, n),_ref[n]) > _t) << 1) - + ((L1dist(_val(-2, 0, n),_ref[n]) > _t)); -} - -#undef _val diff --git a/src/Detector/Subsense/LBSP_16bits_dbcross_3ch3t.i b/src/Detector/Subsense/LBSP_16bits_dbcross_3ch3t.i deleted file mode 100644 index 4662367b8..000000000 --- a/src/Detector/Subsense/LBSP_16bits_dbcross_3ch3t.i +++ /dev/null @@ -1,46 +0,0 @@ -// note: this is the LBSP 16 bit double-cross indiv RGB pattern as used in -// the original article by G.-A. Bilodeau et al. -// -// O O O 4 .. 3 .. 6 -// O O O .. 15 8 13 .. -// O O X O O => 0 9 X 11 1 -// O O O .. 12 10 14 .. -// O O O 7 .. 2 .. 5 -// 3x 3x -// -// must be defined externally: -// _t (size_t[3], absolute thresholds used for comparisons) -// _ref (uchar[3], 'central' values used for comparisons) -// _data (uchar*, triple-channel data to be covered by the pattern) -// _y (int, pattern rows location in the image data) -// _x (int, pattern cols location in the image data) -// _step_row (size_t, step size between rows, including padding) -// _res (ushort[3], 16 bit result vectors vector) -// L1dist (function, returns the absolute difference between two uchars) - -#ifdef _val -#error "definitions clash detected" -#else -#define _val(x,y,n) _data[_step_row*(_y+y)+3*(_x+x)+n] -#endif - -for(int n=0; n<3; ++n) { - _res[n] = ((L1dist(_val(-1, 1, n),_ref[n]) > _t[n]) << 15) - + ((L1dist(_val( 1,-1, n),_ref[n]) > _t[n]) << 14) - + ((L1dist(_val( 1, 1, n),_ref[n]) > _t[n]) << 13) - + ((L1dist(_val(-1,-1, n),_ref[n]) > _t[n]) << 12) - + ((L1dist(_val( 1, 0, n),_ref[n]) > _t[n]) << 11) - + ((L1dist(_val( 0,-1, n),_ref[n]) > _t[n]) << 10) - + ((L1dist(_val(-1, 0, n),_ref[n]) > _t[n]) << 9) - + ((L1dist(_val( 0, 1, n),_ref[n]) > _t[n]) << 8) - + ((L1dist(_val(-2,-2, n),_ref[n]) > _t[n]) << 7) - + ((L1dist(_val( 2, 2, n),_ref[n]) > _t[n]) << 6) - + ((L1dist(_val( 2,-2, n),_ref[n]) > _t[n]) << 5) - + ((L1dist(_val(-2, 2, n),_ref[n]) > _t[n]) << 4) - + ((L1dist(_val( 0, 2, n),_ref[n]) > _t[n]) << 3) - + ((L1dist(_val( 0,-2, n),_ref[n]) > _t[n]) << 2) - + ((L1dist(_val( 2, 0, n),_ref[n]) > _t[n]) << 1) - + ((L1dist(_val(-2, 0, n),_ref[n]) > _t[n])); -} - -#undef _val diff --git a/src/Detector/Subsense/LBSP_16bits_dbcross_s3ch.i b/src/Detector/Subsense/LBSP_16bits_dbcross_s3ch.i deleted file mode 100644 index 6fdc67606..000000000 --- a/src/Detector/Subsense/LBSP_16bits_dbcross_s3ch.i +++ /dev/null @@ -1,45 +0,0 @@ -// note: this is the LBSP 16 bit double-cross indiv RGB pattern as used in -// the original article by G.-A. Bilodeau et al. -// -// O O O 4 .. 3 .. 6 -// O O O .. 15 8 13 .. -// O O X O O => 0 9 X 11 1 -// O O O .. 12 10 14 .. -// O O O 7 .. 2 .. 5 -// (single/3x) (single/3x) -// -// must be defined externally: -// _t (size_t, absolute threshold used for comparisons) -// _ref (uchar, 'central' value used for comparisons) -// _data (uchar*, triple-channel data to be covered by the pattern) -// _y (int, pattern rows location in the image data) -// _x (int, pattern cols location in the image data) -// _c (size_t, pattern channel location in the image data) -// _step_row (size_t, step size between rows, including padding) -// _res (ushort, 16 bit result vector) -// L1dist (function, returns the absolute difference between two uchars) - -#ifdef _val -#error "definitions clash detected" -#else -#define _val(x,y,n) _data[_step_row*(_y+y)+3*(_x+x)+n] -#endif - -_res = ((L1dist(_val(-1, 1, _c),_ref) > _t) << 15) - + ((L1dist(_val( 1,-1, _c),_ref) > _t) << 14) - + ((L1dist(_val( 1, 1, _c),_ref) > _t) << 13) - + ((L1dist(_val(-1,-1, _c),_ref) > _t) << 12) - + ((L1dist(_val( 1, 0, _c),_ref) > _t) << 11) - + ((L1dist(_val( 0,-1, _c),_ref) > _t) << 10) - + ((L1dist(_val(-1, 0, _c),_ref) > _t) << 9) - + ((L1dist(_val( 0, 1, _c),_ref) > _t) << 8) - + ((L1dist(_val(-2,-2, _c),_ref) > _t) << 7) - + ((L1dist(_val( 2, 2, _c),_ref) > _t) << 6) - + ((L1dist(_val( 2,-2, _c),_ref) > _t) << 5) - + ((L1dist(_val(-2, 2, _c),_ref) > _t) << 4) - + ((L1dist(_val( 0, 2, _c),_ref) > _t) << 3) - + ((L1dist(_val( 0,-2, _c),_ref) > _t) << 2) - + ((L1dist(_val( 2, 0, _c),_ref) > _t) << 1) - + ((L1dist(_val(-2, 0, _c),_ref) > _t)); - -#undef _val diff --git a/src/Detector/Subsense/LICENSE.txt b/src/Detector/Subsense/LICENSE.txt deleted file mode 100644 index bfe0e9d86..000000000 --- a/src/Detector/Subsense/LICENSE.txt +++ /dev/null @@ -1,26 +0,0 @@ -Copyright (c) 2014, P.-L. St-Charles (pierre-luc.st-charles@polymtl.ca) -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of Ecole Polytechnique de Montreal nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. diff --git a/src/Detector/Subsense/README.txt b/src/Detector/Subsense/README.txt deleted file mode 100644 index a41aabb15..000000000 --- a/src/Detector/Subsense/README.txt +++ /dev/null @@ -1,21 +0,0 @@ -This directory contains a 'cleaned' version of the SuBSENSE method configuration as presented in -the 2014 CVPRW paper 'Flexible Background Subtraction With Self-Balanced Local Sensitivity'. - -The main class used for background subtraction is BackgroundSubtractionSuBSENSE; all other files -contain either dependencies, utilities or interfaces for this method. It is based on OpenCV's -BackgroundSubtractor interface, and has been tested with versions 2.4.5 and 2.4.7. By default, -its constructor uses the parameters suggested in the paper. - - -TL;DR : - -BackgroundSubtractorSuBSENSE bgs(...); -bgs.initialize(...); -for(all frames in the video) { - ... - bgs(input,output); - ... -} - - -See LICENSE.txt for terms of use and contact information. diff --git a/src/Detector/Subsense/RandUtils.h b/src/Detector/Subsense/RandUtils.h deleted file mode 100644 index 24ca5f683..000000000 --- a/src/Detector/Subsense/RandUtils.h +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once - -/*// gaussian 3x3 pattern, based on 'floor(fspecial('gaussian', 3, 1)*256)' -static const int s_nSamplesInitPatternWidth = 3; -static const int s_nSamplesInitPatternHeight = 3; -static const int s_nSamplesInitPatternTot = 256; -static const int s_anSamplesInitPattern[s_nSamplesInitPatternHeight][s_nSamplesInitPatternWidth] = { - {19, 32, 19,}, - {32, 52, 32,}, - {19, 32, 19,}, -};*/ - -// gaussian 7x7 pattern, based on 'floor(fspecial('gaussian',7,2)*512)' -static const int s_nSamplesInitPatternWidth = 7; -static const int s_nSamplesInitPatternHeight = 7; -static const int s_nSamplesInitPatternTot = 512; -static const int s_anSamplesInitPattern[s_nSamplesInitPatternHeight][s_nSamplesInitPatternWidth] = { - {2, 4, 6, 7, 6, 4, 2,}, - {4, 8, 12, 14, 12, 8, 4,}, - {6, 12, 21, 25, 21, 12, 6,}, - {7, 14, 25, 28, 25, 14, 7,}, - {6, 12, 21, 25, 21, 12, 6,}, - {4, 8, 12, 14, 12, 8, 4,}, - {2, 4, 6, 7, 6, 4, 2,}, -}; - -//! returns a random init/sampling position for the specified pixel position; also guards against out-of-bounds values via image/border size check. -static inline void getRandSamplePosition(int& x_sample, int& y_sample, const int x_orig, const int y_orig, const int border, const cv::Size& imgsize) { - int r = 1+rand()%s_nSamplesInitPatternTot; - for(x_sample=0; x_sample=imgsize.width-border) - x_sample = imgsize.width-border-1; - if(y_sample=imgsize.height-border) - y_sample = imgsize.height-border-1; -} - -// simple 8-connected (3x3) neighbors pattern -static const int s_anNeighborPatternSize_3x3 = 8; -static const int s_anNeighborPattern_3x3[8][2] = { - {-1, 1}, { 0, 1}, { 1, 1}, - {-1, 0}, { 1, 0}, - {-1,-1}, { 0,-1}, { 1,-1}, -}; - -//! returns a random neighbor position for the specified pixel position; also guards against out-of-bounds values via image/border size check. -static inline void getRandNeighborPosition_3x3(int& x_neighbor, int& y_neighbor, const int x_orig, const int y_orig, const int border, const cv::Size& imgsize) { - int r = rand()%s_anNeighborPatternSize_3x3; - x_neighbor = x_orig+s_anNeighborPattern_3x3[r][0]; - y_neighbor = y_orig+s_anNeighborPattern_3x3[r][1]; - if(x_neighbor=imgsize.width-border) - x_neighbor = imgsize.width-border-1; - if(y_neighbor=imgsize.height-border) - y_neighbor = imgsize.height-border-1; -} - -// 5x5 neighbors pattern -static const int s_anNeighborPatternSize_5x5 = 24; -static const int s_anNeighborPattern_5x5[24][2] = { - {-2, 2}, {-1, 2}, { 0, 2}, { 1, 2}, { 2, 2}, - {-2, 1}, {-1, 1}, { 0, 1}, { 1, 1}, { 2, 1}, - {-2, 0}, {-1, 0}, { 1, 0}, { 2, 0}, - {-2,-1}, {-1,-1}, { 0,-1}, { 1,-1}, { 2,-1}, - {-2,-2}, {-1,-2}, { 0,-2}, { 1,-2}, { 2,-2}, -}; - -//! returns a random neighbor position for the specified pixel position; also guards against out-of-bounds values via image/border size check. -static inline void getRandNeighborPosition_5x5(int& x_neighbor, int& y_neighbor, const int x_orig, const int y_orig, const int border, const cv::Size& imgsize) { - int r = rand()%s_anNeighborPatternSize_5x5; - x_neighbor = x_orig+s_anNeighborPattern_5x5[r][0]; - y_neighbor = y_orig+s_anNeighborPattern_5x5[r][1]; - if(x_neighbor=imgsize.width-border) - x_neighbor = imgsize.width-border-1; - if(y_neighbor=imgsize.height-border) - y_neighbor = imgsize.height-border-1; -} diff --git a/src/Detector/YoloDarknetDetector.cpp b/src/Detector/YoloDarknetDetector.cpp deleted file mode 100644 index 48170fac2..000000000 --- a/src/Detector/YoloDarknetDetector.cpp +++ /dev/null @@ -1,345 +0,0 @@ -#include -#include "YoloDarknetDetector.h" -#include "nms.h" - -/// -/// \brief YoloDarknetDetector::YoloDarknetDetector -/// \param gray -/// -YoloDarknetDetector::YoloDarknetDetector(const cv::UMat& colorFrame) - : BaseDetector(colorFrame) -{ - m_classNames = { "background", - "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", - "cow", "diningtable", "dog", "horse", - "motorbike", "person", "pottedplant", - "sheep", "sofa", "train", "tvmonitor" }; -} - -/// -/// \brief YoloDarknetDetector::Init -/// \return -/// -bool YoloDarknetDetector::Init(const config_t& config) -{ - m_detector.reset(); - - auto modelConfiguration = config.find("modelConfiguration"); - auto modelBinary = config.find("modelBinary"); - if (modelConfiguration == config.end() || modelBinary == config.end()) - return false; - - int currGPUID = 0; - auto gpuId = config.find("gpuId"); - if (gpuId != config.end()) - currGPUID = std::max(0, std::stoi(gpuId->second)); - - auto maxBatch = config.find("maxBatch"); - if (maxBatch != config.end()) - m_batchSize = std::max(1, std::stoi(maxBatch->second)); - - m_detector = std::make_unique(modelConfiguration->second, modelBinary->second, currGPUID, static_cast(m_batchSize)); - m_detector->nms = 0.2f; - - auto classNames = config.find("classNames"); - if (classNames != config.end()) - { - std::ifstream classNamesFile(classNames->second); - if (classNamesFile.is_open()) - { - m_classNames.clear(); - std::string className; - for (; std::getline(classNamesFile, className); ) - { - className.erase(className.find_last_not_of(" \t\n\r\f\v") + 1); - m_classNames.push_back(className); - } - if (!FillTypesMap(m_classNames)) - { - std::cout << "Unknown types in class names!" << std::endl; - assert(0); - } - } - } - - auto confidenceThreshold = config.find("confidenceThreshold"); - if (confidenceThreshold != config.end()) - m_confidenceThreshold = std::stof(confidenceThreshold->second); - - auto maxCropRatio = config.find("maxCropRatio"); - if (maxCropRatio != config.end()) - m_maxCropRatio = std::stof(maxCropRatio->second); - - m_classesWhiteList.clear(); - auto whiteRange = config.equal_range("white_list"); - for (auto it = whiteRange.first; it != whiteRange.second; ++it) - { - m_classesWhiteList.insert(std::stoi(it->second)); - } - - bool correct = m_detector.get() != nullptr; - - m_netSize = cv::Size(m_detector->get_net_width(), m_detector->get_net_height()); - - return correct; -} - -/// -/// \brief YoloDarknetDetector::Detect -/// \param gray -/// -void YoloDarknetDetector::Detect(const cv::UMat& colorFrame) -{ - m_regions.clear(); - cv::Mat colorMat = colorFrame.getMat(cv::ACCESS_READ); - - if (m_maxCropRatio <= 0) - { - Detect(colorMat, m_regions); - } - else - { - std::vector crops = GetCrops(m_maxCropRatio, m_netSize, colorMat.size()); - regions_t tmpRegions; - if (m_batchSize > 1) - { - std::vector batch; - batch.reserve(m_batchSize); - - for (size_t i = 0; i < crops.size(); i += m_batchSize) - { - size_t batchSize = std::min(static_cast(m_batchSize), crops.size() - i); - batch.clear(); - for (size_t j = 0; j < batchSize; ++j) - { - batch.emplace_back(colorMat, crops[i + j]); - } - - image_t detImage; - FillBatchImg(batch, detImage); - std::vector> result_vec = m_detector->detectBatch(detImage, static_cast(batchSize), m_netSize.width, m_netSize.height, m_confidenceThreshold); - - const float wk = static_cast(crops[i].width) / m_netSize.width; - const float hk = static_cast(crops[i].height) / m_netSize.height; - for (size_t j = 0; j < batchSize; ++j) - { - for (const auto& bbox : result_vec[j]) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.obj_id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(crops[i + j].x + cvRound(wk * bbox.x), crops[i + j].y + cvRound(hk * bbox.y), - cvRound(wk * bbox.w), cvRound(hk * bbox.h)), - T2T(bbox.obj_id), bbox.prob); - } - } - } - } - else - { - for (size_t i = 0; i < crops.size(); ++i) - { - const auto& crop = crops[i]; - //std::cout << "Crop " << i << ": " << crop << std::endl; - DetectInCrop(colorMat, crop, tmpRegions); - } - } - - if (crops.size() > 1 || m_batchSize > 1) - { - nms3(tmpRegions, m_regions, 0.4f, - [](const CRegion& reg) { return reg.m_brect; }, - [](const CRegion& reg) { return reg.m_confidence; }, - [](const CRegion& reg) { return reg.m_type; }, - 0, 0.f); - //std::cout << "nms for " << tmpRegions.size() << " objects - result " << m_regions.size() << std::endl; - } - } - //std::cout << "Finally " << m_regions.size() << " objects, " << colorMat.u->refcount << ", " << colorMat.u->urefcount << std::endl; -} - -/// -/// \brief YoloDarknetDetector::DetectInCrop -/// \param colorFrame -/// \param crop -/// \param tmpRegions -/// -void YoloDarknetDetector::DetectInCrop(const cv::Mat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions) -{ - if (crop.width == m_netSize.width && crop.height == m_netSize.height) - m_tmpImg = colorFrame(crop); - else - cv::resize(colorFrame(crop), m_tmpImg, m_netSize, 0, 0, cv::INTER_LINEAR); - - image_t detImage; - FillImg(detImage); - - std::vector detects = m_detector->detect(detImage, m_confidenceThreshold, false); - - float wk = (float)crop.width / detImage.w; - float hk = (float)crop.height / detImage.h; - - for (const bbox_t& bbox : detects) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.obj_id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(cvRound(wk * bbox.x) + crop.x, cvRound(hk * bbox.y) + crop.y, cvRound(wk * bbox.w), cvRound(hk * bbox.h)), T2T(bbox.obj_id), bbox.prob); - } - if (crop.width == m_netSize.width && crop.height == m_netSize.height) - m_tmpImg.release(); - //std::cout << "Detected " << detects.size() << " objects" << std::endl; -} - -/// -/// \brief YoloDarknetDetector::Detect -/// \param colorFrame -/// \param crop -/// \param tmpRegions -/// -void YoloDarknetDetector::Detect(const cv::Mat& colorFrame, regions_t& tmpRegions) -{ - if (colorFrame.cols == m_netSize.width && colorFrame.rows == m_netSize.height) - m_tmpImg = colorFrame; - else - cv::resize(colorFrame, m_tmpImg, m_netSize, 0, 0, cv::INTER_LINEAR); - - image_t detImage; - FillImg(detImage); - - std::vector detects = m_detector->detect(detImage, m_confidenceThreshold, false); - - float wk = (float)colorFrame.cols / detImage.w; - float hk = (float)colorFrame.rows / detImage.h; - - for (const bbox_t& bbox : detects) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.obj_id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(cvRound(wk * bbox.x), cvRound(hk * bbox.y), cvRound(wk * bbox.w), cvRound(hk * bbox.h)), T2T(bbox.obj_id), bbox.prob); - } - //std::cout << "Detected " << detects.size() << " objects" << std::endl; -} - -/// -/// \brief YoloDarknetDetector::FillImg -/// \param detImage -/// -void YoloDarknetDetector::FillImg(image_t& detImage) -{ - detImage.w = m_tmpImg.cols; - detImage.h = m_tmpImg.rows; - detImage.c = m_tmpImg.channels(); - assert(detImage.c == 3); - size_t newSize = static_cast(detImage.w * detImage.h * detImage.c); - if (newSize != m_tmpBuf.size()) - m_tmpBuf.resize(newSize); - detImage.data = &m_tmpBuf[0]; - - const int h = detImage.h; - const int w = detImage.w; - constexpr float knorm = 1.f / 255.f; - for (int y = 0; y < h; ++y) - { - for (int c = 0; c < 3; ++c) - { - const unsigned char *data = m_tmpImg.ptr(y) + 2 - c; - float* fdata = detImage.data + static_cast(c * w * h) + static_cast(y * w); - for (int x = 0; x < w; ++x) - { - *fdata = knorm * data[0]; - ++fdata; - data += 3; - } - } - } -} - -/// -/// \brief YoloDarknetDetector::FillBatchImg -/// \param batch -/// \param detImage -/// -void YoloDarknetDetector::FillBatchImg(const std::vector& batch, image_t& detImage) -{ - detImage.w = m_netSize.width; - detImage.h = m_netSize.height; - detImage.c = 3; - assert(detImage.c == 3); - size_t imgSize = static_cast(detImage.w * detImage.h * detImage.c); - size_t newSize = batch.size() * imgSize; - if (newSize > m_tmpBuf.size()) - m_tmpBuf.resize(newSize); - detImage.data = &m_tmpBuf[0]; - - for (size_t i = 0; i < batch.size(); ++i) - { - if (batch[i].cols == m_netSize.width && batch[i].rows == m_netSize.height) - m_tmpImg = batch[i]; - else - cv::resize(batch[i], m_tmpImg, m_netSize, 0, 0, cv::INTER_LINEAR); - - float* fImgStart = detImage.data + i * imgSize; - - const int h = m_tmpImg.rows; - const int w = m_tmpImg.cols; - constexpr float knorm = 1.f / 255.f; - for (int y = 0; y < h; ++y) - { - for (int c = 0; c < 3; ++c) - { - const unsigned char* data = m_tmpImg.ptr(y) + 2 - c; - float* fdata = fImgStart + static_cast(c * w * h) + static_cast(y * w); - for (int x = 0; x < w; ++x) - { - *fdata = knorm * data[0]; - ++fdata; - data += 3; - } - } - } - } -} - -/// -/// \brief YoloDarknetDetector::Detect -/// \param frames -/// \param regions -/// -void YoloDarknetDetector::Detect(const std::vector& frames, std::vector& regions) -{ - if (frames.size() == 1) - { - Detect(frames[0].getMat(cv::ACCESS_READ), regions[0]); - } - else - { - std::vector batch; - for (const auto& frame : frames) - { - batch.emplace_back(frame.getMat(cv::ACCESS_READ)); - } - - image_t detImage; - FillBatchImg(batch, detImage); - std::vector> result_vec = m_detector->detectBatch(detImage, static_cast(frames.size()), m_netSize.width, m_netSize.height, m_confidenceThreshold); - - regions_t tmpRegions; - tmpRegions.reserve(result_vec[0].size() + 16); - float wk = static_cast(frames[0].cols) / m_netSize.width; - float hk = static_cast(frames[0].rows) / m_netSize.height; - for (size_t i = 0; i < regions.size(); ++i) - { - tmpRegions.clear(); - for (const auto& bbox : result_vec[i]) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.obj_id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(cvRound(wk * bbox.x), cvRound(hk * bbox.y), cvRound(wk * bbox.w), cvRound(hk * bbox.h)), T2T(bbox.obj_id), bbox.prob); - } - - nms3(tmpRegions, regions[i], 0.4f, - [](const CRegion& reg) { return reg.m_brect; }, - [](const CRegion& reg) { return reg.m_confidence; }, - [](const CRegion& reg) { return reg.m_type; }, - 0, 0.f); - } - - m_regions.assign(std::begin(regions.back()), std::end(regions.back())); - } -} diff --git a/src/Detector/YoloDarknetDetector.h b/src/Detector/YoloDarknetDetector.h deleted file mode 100644 index 3d476813c..000000000 --- a/src/Detector/YoloDarknetDetector.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include "BaseDetector.h" - -#include "darknet/include/yolo_v2_class.hpp" -// You only look once (YOLO)-Detector (https://arxiv.org/abs/1612.08242) to detect objects -// Models can be downloaded here: https://pjreddie.com/darknet/yolo/ -// Default network is 416x416 -// Class names can be downloaded here: https://github.com/pjreddie/darknet/tree/master/data - - -/// -/// \brief The YoloDarknetDetector class -/// -class YoloDarknetDetector : public BaseDetector -{ -public: - YoloDarknetDetector(const cv::UMat& colorFrame); - ~YoloDarknetDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& colorFrame); - void Detect(const std::vector& frames, std::vector& regions); - - bool CanGrayProcessing() const - { - return false; - } - -private: - std::unique_ptr m_detector; - - float m_confidenceThreshold = 0.5f; - float m_maxCropRatio = 3.0f; - size_t m_batchSize = 1; - std::vector m_classNames; - cv::Size m_netSize; - - void DetectInCrop(const cv::Mat& colorFrame, const cv::Rect& crop, regions_t& tmpRegions); - void Detect(const cv::Mat& colorFrame, regions_t& tmpRegions); - void FillImg(image_t& detImage); - void FillBatchImg(const std::vector& batch, image_t& detImage); - - cv::Mat m_tmpImg; - std::vector m_tmpBuf; -}; diff --git a/src/Detector/YoloTensorRTDetector.cpp b/src/Detector/YoloTensorRTDetector.cpp deleted file mode 100644 index 8b7208962..000000000 --- a/src/Detector/YoloTensorRTDetector.cpp +++ /dev/null @@ -1,210 +0,0 @@ -#include -#include "YoloTensorRTDetector.h" -#include "nms.h" - -/// -/// \brief YoloTensorRTDetector::YoloTensorRTDetector -/// \param gray -/// -YoloTensorRTDetector::YoloTensorRTDetector(const cv::UMat& colorFrame) - : BaseDetector(colorFrame) -{ - m_classNames = { "background", - "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", - "cow", "diningtable", "dog", "horse", - "motorbike", "person", "pottedplant", - "sheep", "sofa", "train", "tvmonitor" }; - - m_localConfig.calibration_image_list_file_txt = ""; - m_localConfig.inference_precison = tensor_rt::FP32; - m_localConfig.net_type = tensor_rt::YOLOV4; - m_localConfig.detect_thresh = 0.5f; - m_localConfig.gpu_id = 0; -} - -/// -/// \brief YoloDarknetDetector::Init -/// \return -/// -bool YoloTensorRTDetector::Init(const config_t& config) -{ - m_detector.reset(); - - auto modelConfiguration = config.find("modelConfiguration"); - auto modelBinary = config.find("modelBinary"); - if (modelConfiguration == config.end() || modelBinary == config.end()) - return false; - - auto confidenceThreshold = config.find("confidenceThreshold"); - if (confidenceThreshold != config.end()) - m_localConfig.detect_thresh = std::stof(confidenceThreshold->second); - - auto gpuId = config.find("gpuId"); - if (gpuId != config.end()) - m_localConfig.gpu_id = std::max(0, std::stoi(gpuId->second)); - - auto maxBatch = config.find("maxBatch"); - if (maxBatch != config.end()) - m_batchSize = std::max(1, std::stoi(maxBatch->second)); - m_localConfig.batch_size = static_cast(m_batchSize); - - m_localConfig.file_model_cfg = modelConfiguration->second; - m_localConfig.file_model_weights = modelBinary->second; - - auto inference_precison = config.find("inference_precison"); - if (inference_precison != config.end()) - { - std::map dictPrecison; - dictPrecison["INT8"] = tensor_rt::INT8; - dictPrecison["FP16"] = tensor_rt::FP16; - dictPrecison["FP32"] = tensor_rt::FP32; - auto precison = dictPrecison.find(inference_precison->second); - if (precison != dictPrecison.end()) - m_localConfig.inference_precison = precison->second; - } - - auto net_type = config.find("net_type"); - if (net_type != config.end()) - { - std::map dictNetType; - dictNetType["YOLOV2"] = tensor_rt::YOLOV2; - dictNetType["YOLOV3"] = tensor_rt::YOLOV3; - dictNetType["YOLOV2_TINY"] = tensor_rt::YOLOV2_TINY; - dictNetType["YOLOV3_TINY"] = tensor_rt::YOLOV3_TINY; - dictNetType["YOLOV4"] = tensor_rt::YOLOV4; - dictNetType["YOLOV4_TINY"] = tensor_rt::YOLOV4_TINY; - dictNetType["YOLOV5"] = tensor_rt::YOLOV5; - - auto netType = dictNetType.find(net_type->second); - if (netType != dictNetType.end()) - m_localConfig.net_type = netType->second; - } - - auto classNames = config.find("classNames"); - if (classNames != config.end()) - { - std::ifstream classNamesFile(classNames->second); - if (classNamesFile.is_open()) - { - m_classNames.clear(); - std::string className; - for (; std::getline(classNamesFile, className); ) - { - m_classNames.push_back(className); - } - if (!FillTypesMap(m_classNames)) - { - std::cout << "Unknown types in class names!" << std::endl; - assert(0); - } - } - } - - m_classesWhiteList.clear(); - auto whiteRange = config.equal_range("white_list"); - for (auto it = whiteRange.first; it != whiteRange.second; ++it) - { - m_classesWhiteList.insert(std::stoi(it->second)); - } - - auto maxCropRatio = config.find("maxCropRatio"); - if (maxCropRatio != config.end()) - m_maxCropRatio = std::stof(maxCropRatio->second); - - m_detector = std::make_unique(); - m_detector->init(m_localConfig); - return m_detector.get() != nullptr; -} - -/// -/// \brief YoloTensorRTDetector::Detect -/// \param gray -/// -void YoloTensorRTDetector::Detect(const cv::UMat& colorFrame) -{ - m_regions.clear(); - cv::Mat colorMat = colorFrame.getMat(cv::ACCESS_READ); - - if (m_maxCropRatio <= 0) - { - std::vector batch = { colorMat }; - std::vector detects; - m_detector->detect(batch, detects); - for (const tensor_rt::BatchResult& dets : detects) - { - for (const tensor_rt::Result& bbox : dets) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.id)) != std::end(m_classesWhiteList)) - m_regions.emplace_back(bbox.rect, T2T(bbox.id), bbox.prob); - } - } - } - else - { - std::vector crops = GetCrops(m_maxCropRatio, m_detector->get_input_size(), colorMat.size()); - regions_t tmpRegions; - std::vector batch; - batch.reserve(m_batchSize); - for (size_t i = 0; i < crops.size(); i += m_batchSize) - { - size_t batchSize = std::min(static_cast(m_batchSize), crops.size() - i); - batch.clear(); - for (size_t j = 0; j < batchSize; ++j) - { - batch.emplace_back(colorMat, crops[i + j]); - } - std::vector detects; - m_detector->detect(batch, detects); - - for (size_t j = 0; j < batchSize; ++j) - { - const auto& crop = crops[i + j]; - //std::cout << "Crop " << (i + j) << ": " << crop << std::endl; - - for (const tensor_rt::Result& bbox : detects[j]) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.id)) != std::end(m_classesWhiteList)) - tmpRegions.emplace_back(cv::Rect(bbox.rect.x + crop.x, bbox.rect.y + crop.y, bbox.rect.width, bbox.rect.height), T2T(bbox.id), bbox.prob); - } - } - } - - if (crops.size() > 1) - { - nms3(tmpRegions, m_regions, 0.4f, - [](const CRegion& reg) { return reg.m_brect; }, - [](const CRegion& reg) { return reg.m_confidence; }, - [](const CRegion& reg) { return reg.m_type; }, - 0, 0.f); - //std::cout << "nms for " << tmpRegions.size() << " objects - result " << m_regions.size() << std::endl; - } - } -} - -/// -/// \brief YoloTensorRTDetector::Detect -/// \param frames -/// \param regions -/// -void YoloTensorRTDetector::Detect(const std::vector& frames, std::vector& regions) -{ - std::vector batch; - for (const auto& frame : frames) - { - batch.emplace_back(frame.getMat(cv::ACCESS_READ)); - } - - std::vector detects; - m_detector->detect(batch, detects); - for (size_t i = 0; i < detects.size(); ++i) - { - const tensor_rt::BatchResult& dets = detects[i]; - for (const tensor_rt::Result& bbox : dets) - { - if (m_classesWhiteList.empty() || m_classesWhiteList.find(T2T(bbox.id)) != std::end(m_classesWhiteList)) - regions[i].emplace_back(bbox.rect, T2T(bbox.id), bbox.prob); - } - } - m_regions.assign(std::begin(regions.back()), std::end(regions.back())); -} diff --git a/src/Detector/YoloTensorRTDetector.h b/src/Detector/YoloTensorRTDetector.h deleted file mode 100644 index c1fa44fa9..000000000 --- a/src/Detector/YoloTensorRTDetector.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include "BaseDetector.h" -#include "tensorrt_yolo/class_detector.h" - -/// -/// \brief The YoloTensorRTDetector class -/// -class YoloTensorRTDetector : public BaseDetector -{ -public: - YoloTensorRTDetector(const cv::UMat& colorFrame); - ~YoloTensorRTDetector(void) = default; - - bool Init(const config_t& config); - - void Detect(const cv::UMat& colorFrame); - void Detect(const std::vector& frames, std::vector& regions); - - bool CanGrayProcessing() const - { - return false; - } - -private: - std::unique_ptr m_detector; - - float m_maxCropRatio = 3.0f; - std::vector m_classNames; - - tensor_rt::Config m_localConfig; - size_t m_batchSize = 1; -}; diff --git a/src/Detector/darknet/3rdparty/dll/x64/pthreadGC2.dll b/src/Detector/darknet/3rdparty/dll/x64/pthreadGC2.dll deleted file mode 100644 index 841d4a216..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x64/pthreadGC2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x64/pthreadVC2.dll b/src/Detector/darknet/3rdparty/dll/x64/pthreadVC2.dll deleted file mode 100644 index 165b4d26e..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x64/pthreadVC2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x86/pthreadGC2.dll b/src/Detector/darknet/3rdparty/dll/x86/pthreadGC2.dll deleted file mode 100644 index 67b9289df..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x86/pthreadGC2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x86/pthreadGCE2.dll b/src/Detector/darknet/3rdparty/dll/x86/pthreadGCE2.dll deleted file mode 100644 index 9e18ea24b..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x86/pthreadGCE2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x86/pthreadVC2.dll b/src/Detector/darknet/3rdparty/dll/x86/pthreadVC2.dll deleted file mode 100644 index fcb5d9dcc..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x86/pthreadVC2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x86/pthreadVCE2.dll b/src/Detector/darknet/3rdparty/dll/x86/pthreadVCE2.dll deleted file mode 100644 index 9d148cc0d..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x86/pthreadVCE2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/dll/x86/pthreadVSE2.dll b/src/Detector/darknet/3rdparty/dll/x86/pthreadVSE2.dll deleted file mode 100644 index 8129116fd..000000000 Binary files a/src/Detector/darknet/3rdparty/dll/x86/pthreadVSE2.dll and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/include/pthread.h b/src/Detector/darknet/3rdparty/include/pthread.h deleted file mode 100644 index b4072f72c..000000000 --- a/src/Detector/darknet/3rdparty/include/pthread.h +++ /dev/null @@ -1,1368 +0,0 @@ -/* This is an implementation of the threads API of POSIX 1003.1-2001. - * - * -------------------------------------------------------------------------- - * - * Pthreads-win32 - POSIX Threads Library for Win32 - * Copyright(C) 1998 John E. Bossom - * Copyright(C) 1999,2005 Pthreads-win32 contributors - * - * Contact Email: rpj@callisto.canberra.edu.au - * - * The current list of contributors is contained - * in the file CONTRIBUTORS included with the source - * code distribution. The list can also be seen at the - * following World Wide Web location: - * http://sources.redhat.com/pthreads-win32/contributors.html - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library in the file COPYING.LIB; - * if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#if !defined( PTHREAD_H ) -#define PTHREAD_H - -/* - * See the README file for an explanation of the pthreads-win32 version - * numbering scheme and how the DLL is named etc. - */ -#define PTW32_VERSION 2,9,1,0 -#define PTW32_VERSION_STRING "2, 9, 1, 0\0" - -/* There are three implementations of cancel cleanup. - * Note that pthread.h is included in both application - * compilation units and also internally for the library. - * The code here and within the library aims to work - * for all reasonable combinations of environments. - * - * The three implementations are: - * - * WIN32 SEH - * C - * C++ - * - * Please note that exiting a push/pop block via - * "return", "exit", "break", or "continue" will - * lead to different behaviour amongst applications - * depending upon whether the library was built - * using SEH, C++, or C. For example, a library built - * with SEH will call the cleanup routine, while both - * C++ and C built versions will not. - */ - -/* - * Define defaults for cleanup code. - * Note: Unless the build explicitly defines one of the following, then - * we default to standard C style cleanup. This style uses setjmp/longjmp - * in the cancelation and thread exit implementations and therefore won't - * do stack unwinding if linked to applications that have it (e.g. - * C++ apps). This is currently consistent with most/all commercial Unix - * POSIX threads implementations. - */ -#if !defined( __CLEANUP_SEH ) && !defined( __CLEANUP_CXX ) && !defined( __CLEANUP_C ) -# define __CLEANUP_C -#endif - -#if defined( __CLEANUP_SEH ) && ( !defined( _MSC_VER ) && !defined(PTW32_RC_MSC)) -#error ERROR [__FILE__, line __LINE__]: SEH is not supported for this compiler. -#endif - -/* - * Stop here if we are being included by the resource compiler. - */ -#if !defined(RC_INVOKED) - -#undef PTW32_LEVEL - -#if defined(_POSIX_SOURCE) -#define PTW32_LEVEL 0 -/* Early POSIX */ -#endif - -#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309 -#undef PTW32_LEVEL -#define PTW32_LEVEL 1 -/* Include 1b, 1c and 1d */ -#endif - -#if defined(INCLUDE_NP) -#undef PTW32_LEVEL -#define PTW32_LEVEL 2 -/* Include Non-Portable extensions */ -#endif - -#define PTW32_LEVEL_MAX 3 - -#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 ) || !defined(PTW32_LEVEL) -#define PTW32_LEVEL PTW32_LEVEL_MAX -/* Include everything */ -#endif - -#if defined(_UWIN) -# define HAVE_STRUCT_TIMESPEC 1 -# define HAVE_SIGNAL_H 1 -# undef HAVE_PTW32_CONFIG_H -# pragma comment(lib, "pthread") -#endif - -/* - * ------------------------------------------------------------- - * - * - * Module: pthread.h - * - * Purpose: - * Provides an implementation of PThreads based upon the - * standard: - * - * POSIX 1003.1-2001 - * and - * The Single Unix Specification version 3 - * - * (these two are equivalent) - * - * in order to enhance code portability between Windows, - * various commercial Unix implementations, and Linux. - * - * See the ANNOUNCE file for a full list of conforming - * routines and defined constants, and a list of missing - * routines and constants not defined in this implementation. - * - * Authors: - * There have been many contributors to this library. - * The initial implementation was contributed by - * John Bossom, and several others have provided major - * sections or revisions of parts of the implementation. - * Often significant effort has been contributed to - * find and fix important bugs and other problems to - * improve the reliability of the library, which sometimes - * is not reflected in the amount of code which changed as - * result. - * As much as possible, the contributors are acknowledged - * in the ChangeLog file in the source code distribution - * where their changes are noted in detail. - * - * Contributors are listed in the CONTRIBUTORS file. - * - * As usual, all bouquets go to the contributors, and all - * brickbats go to the project maintainer. - * - * Maintainer: - * The code base for this project is coordinated and - * eventually pre-tested, packaged, and made available by - * - * Ross Johnson - * - * QA Testers: - * Ultimately, the library is tested in the real world by - * a host of competent and demanding scientists and - * engineers who report bugs and/or provide solutions - * which are then fixed or incorporated into subsequent - * versions of the library. Each time a bug is fixed, a - * test case is written to prove the fix and ensure - * that later changes to the code don't reintroduce the - * same error. The number of test cases is slowly growing - * and therefore so is the code reliability. - * - * Compliance: - * See the file ANNOUNCE for the list of implemented - * and not-implemented routines and defined options. - * Of course, these are all defined is this file as well. - * - * Web site: - * The source code and other information about this library - * are available from - * - * http://sources.redhat.com/pthreads-win32/ - * - * ------------------------------------------------------------- - */ - -/* Try to avoid including windows.h */ -#if (defined(__MINGW64__) || defined(__MINGW32__)) && defined(__cplusplus) -#define PTW32_INCLUDE_WINDOWS_H -#endif - -#if defined(PTW32_INCLUDE_WINDOWS_H) -#include -#endif - -#if defined(_MSC_VER) && _MSC_VER < 1300 || defined(__DMC__) -/* - * VC++6.0 or early compiler's header has no DWORD_PTR type. - */ -typedef unsigned long DWORD_PTR; -typedef unsigned long ULONG_PTR; -#endif -/* - * ----------------- - * autoconf switches - * ----------------- - */ - -#if defined(HAVE_PTW32_CONFIG_H) -#include "config.h" -#endif /* HAVE_PTW32_CONFIG_H */ - -#if !defined(NEED_FTIME) -#include -#else /* NEED_FTIME */ -/* use native WIN32 time API */ -#endif /* NEED_FTIME */ - -#if defined(HAVE_SIGNAL_H) -#include -#endif /* HAVE_SIGNAL_H */ - -#include - -/* - * Boolean values to make us independent of system includes. - */ -enum { - PTW32_FALSE = 0, - PTW32_TRUE = (! PTW32_FALSE) -}; - -/* - * This is a duplicate of what is in the autoconf config.h, - * which is only used when building the pthread-win32 libraries. - */ - -#if !defined(PTW32_CONFIG_H) -# if defined(WINCE) -# define NEED_ERRNO -# define NEED_SEM -# endif -# if defined(__MINGW64__) -# define HAVE_STRUCT_TIMESPEC -# define HAVE_MODE_T -# elif defined(_UWIN) || defined(__MINGW32__) -# define HAVE_MODE_T -# endif -#endif - -/* - * - */ - -#if PTW32_LEVEL >= PTW32_LEVEL_MAX -#if defined(NEED_ERRNO) -#include "need_errno.h" -#else -#include -#endif -#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */ - -/* - * Several systems don't define some error numbers. - */ -#if !defined(ENOTSUP) -# define ENOTSUP 48 /* This is the value in Solaris. */ -#endif - -#if !defined(ETIMEDOUT) -# define ETIMEDOUT 10060 /* Same as WSAETIMEDOUT */ -#endif - -#if !defined(ENOSYS) -# define ENOSYS 140 /* Semi-arbitrary value */ -#endif - -#if !defined(EDEADLK) -# if defined(EDEADLOCK) -# define EDEADLK EDEADLOCK -# else -# define EDEADLK 36 /* This is the value in MSVC. */ -# endif -#endif - -/* POSIX 2008 - related to robust mutexes */ -#if !defined(EOWNERDEAD) -# define EOWNERDEAD 43 -#endif -#if !defined(ENOTRECOVERABLE) -# define ENOTRECOVERABLE 44 -#endif - -#include - -/* - * To avoid including windows.h we define only those things that we - * actually need from it. - */ -#if !defined(PTW32_INCLUDE_WINDOWS_H) -#if !defined(HANDLE) -# define PTW32__HANDLE_DEF -# define HANDLE void * -#endif -#if !defined(DWORD) -# define PTW32__DWORD_DEF -# define DWORD unsigned long -#endif -#endif - -#if !defined(HAVE_STRUCT_TIMESPEC) -#define HAVE_STRUCT_TIMESPEC -#if !defined(_TIMESPEC_DEFINED) -#define _TIMESPEC_DEFINED -struct timespec { - time_t tv_sec; - long tv_nsec; -}; -#endif /* _TIMESPEC_DEFINED */ -#endif /* HAVE_STRUCT_TIMESPEC */ - -#if !defined(SIG_BLOCK) -#define SIG_BLOCK 0 -#endif /* SIG_BLOCK */ - -#if !defined(SIG_UNBLOCK) -#define SIG_UNBLOCK 1 -#endif /* SIG_UNBLOCK */ - -#if !defined(SIG_SETMASK) -#define SIG_SETMASK 2 -#endif /* SIG_SETMASK */ - -#if defined(__cplusplus) -extern "C" -{ -#endif /* __cplusplus */ - -/* - * ------------------------------------------------------------- - * - * POSIX 1003.1-2001 Options - * ========================= - * - * Options are normally set in , which is not provided - * with pthreads-win32. - * - * For conformance with the Single Unix Specification (version 3), all of the - * options below are defined, and have a value of either -1 (not supported) - * or 200112L (supported). - * - * These options can neither be left undefined nor have a value of 0, because - * either indicates that sysconf(), which is not implemented, may be used at - * runtime to check the status of the option. - * - * _POSIX_THREADS (== 200112L) - * If == 200112L, you can use threads - * - * _POSIX_THREAD_ATTR_STACKSIZE (== 200112L) - * If == 200112L, you can control the size of a thread's - * stack - * pthread_attr_getstacksize - * pthread_attr_setstacksize - * - * _POSIX_THREAD_ATTR_STACKADDR (== -1) - * If == 200112L, you can allocate and control a thread's - * stack. If not supported, the following functions - * will return ENOSYS, indicating they are not - * supported: - * pthread_attr_getstackaddr - * pthread_attr_setstackaddr - * - * _POSIX_THREAD_PRIORITY_SCHEDULING (== -1) - * If == 200112L, you can use realtime scheduling. - * This option indicates that the behaviour of some - * implemented functions conforms to the additional TPS - * requirements in the standard. E.g. rwlocks favour - * writers over readers when threads have equal priority. - * - * _POSIX_THREAD_PRIO_INHERIT (== -1) - * If == 200112L, you can create priority inheritance - * mutexes. - * pthread_mutexattr_getprotocol + - * pthread_mutexattr_setprotocol + - * - * _POSIX_THREAD_PRIO_PROTECT (== -1) - * If == 200112L, you can create priority ceiling mutexes - * Indicates the availability of: - * pthread_mutex_getprioceiling - * pthread_mutex_setprioceiling - * pthread_mutexattr_getprioceiling - * pthread_mutexattr_getprotocol + - * pthread_mutexattr_setprioceiling - * pthread_mutexattr_setprotocol + - * - * _POSIX_THREAD_PROCESS_SHARED (== -1) - * If set, you can create mutexes and condition - * variables that can be shared with another - * process.If set, indicates the availability - * of: - * pthread_mutexattr_getpshared - * pthread_mutexattr_setpshared - * pthread_condattr_getpshared - * pthread_condattr_setpshared - * - * _POSIX_THREAD_SAFE_FUNCTIONS (== 200112L) - * If == 200112L you can use the special *_r library - * functions that provide thread-safe behaviour - * - * _POSIX_READER_WRITER_LOCKS (== 200112L) - * If == 200112L, you can use read/write locks - * - * _POSIX_SPIN_LOCKS (== 200112L) - * If == 200112L, you can use spin locks - * - * _POSIX_BARRIERS (== 200112L) - * If == 200112L, you can use barriers - * - * + These functions provide both 'inherit' and/or - * 'protect' protocol, based upon these macro - * settings. - * - * ------------------------------------------------------------- - */ - -/* - * POSIX Options - */ -#undef _POSIX_THREADS -#define _POSIX_THREADS 200809L - -#undef _POSIX_READER_WRITER_LOCKS -#define _POSIX_READER_WRITER_LOCKS 200809L - -#undef _POSIX_SPIN_LOCKS -#define _POSIX_SPIN_LOCKS 200809L - -#undef _POSIX_BARRIERS -#define _POSIX_BARRIERS 200809L - -#undef _POSIX_THREAD_SAFE_FUNCTIONS -#define _POSIX_THREAD_SAFE_FUNCTIONS 200809L - -#undef _POSIX_THREAD_ATTR_STACKSIZE -#define _POSIX_THREAD_ATTR_STACKSIZE 200809L - -/* - * The following options are not supported - */ -#undef _POSIX_THREAD_ATTR_STACKADDR -#define _POSIX_THREAD_ATTR_STACKADDR -1 - -#undef _POSIX_THREAD_PRIO_INHERIT -#define _POSIX_THREAD_PRIO_INHERIT -1 - -#undef _POSIX_THREAD_PRIO_PROTECT -#define _POSIX_THREAD_PRIO_PROTECT -1 - -/* TPS is not fully supported. */ -#undef _POSIX_THREAD_PRIORITY_SCHEDULING -#define _POSIX_THREAD_PRIORITY_SCHEDULING -1 - -#undef _POSIX_THREAD_PROCESS_SHARED -#define _POSIX_THREAD_PROCESS_SHARED -1 - - -/* - * POSIX 1003.1-2001 Limits - * =========================== - * - * These limits are normally set in , which is not provided with - * pthreads-win32. - * - * PTHREAD_DESTRUCTOR_ITERATIONS - * Maximum number of attempts to destroy - * a thread's thread-specific data on - * termination (must be at least 4) - * - * PTHREAD_KEYS_MAX - * Maximum number of thread-specific data keys - * available per process (must be at least 128) - * - * PTHREAD_STACK_MIN - * Minimum supported stack size for a thread - * - * PTHREAD_THREADS_MAX - * Maximum number of threads supported per - * process (must be at least 64). - * - * SEM_NSEMS_MAX - * The maximum number of semaphores a process can have. - * (must be at least 256) - * - * SEM_VALUE_MAX - * The maximum value a semaphore can have. - * (must be at least 32767) - * - */ -#undef _POSIX_THREAD_DESTRUCTOR_ITERATIONS -#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS 4 - -#undef PTHREAD_DESTRUCTOR_ITERATIONS -#define PTHREAD_DESTRUCTOR_ITERATIONS _POSIX_THREAD_DESTRUCTOR_ITERATIONS - -#undef _POSIX_THREAD_KEYS_MAX -#define _POSIX_THREAD_KEYS_MAX 128 - -#undef PTHREAD_KEYS_MAX -#define PTHREAD_KEYS_MAX _POSIX_THREAD_KEYS_MAX - -#undef PTHREAD_STACK_MIN -#define PTHREAD_STACK_MIN 0 - -#undef _POSIX_THREAD_THREADS_MAX -#define _POSIX_THREAD_THREADS_MAX 64 - - /* Arbitrary value */ -#undef PTHREAD_THREADS_MAX -#define PTHREAD_THREADS_MAX 2019 - -#undef _POSIX_SEM_NSEMS_MAX -#define _POSIX_SEM_NSEMS_MAX 256 - - /* Arbitrary value */ -#undef SEM_NSEMS_MAX -#define SEM_NSEMS_MAX 1024 - -#undef _POSIX_SEM_VALUE_MAX -#define _POSIX_SEM_VALUE_MAX 32767 - -#undef SEM_VALUE_MAX -#define SEM_VALUE_MAX INT_MAX - - -#if defined(__GNUC__) && !defined(__declspec) -# error Please upgrade your GNU compiler to one that supports __declspec. -#endif - -/* - * When building the library, you should define PTW32_BUILD so that - * the variables/functions are exported correctly. When using the library, - * do NOT define PTW32_BUILD, and then the variables/functions will - * be imported correctly. - */ -#if !defined(PTW32_STATIC_LIB) -# if defined(PTW32_BUILD) -# define PTW32_DLLPORT __declspec (dllexport) -# else -# define PTW32_DLLPORT __declspec (dllimport) -# endif -#else -# define PTW32_DLLPORT -#endif - -/* - * The Open Watcom C/C++ compiler uses a non-standard calling convention - * that passes function args in registers unless __cdecl is explicitly specified - * in exposed function prototypes. - * - * We force all calls to cdecl even though this could slow Watcom code down - * slightly. If you know that the Watcom compiler will be used to build both - * the DLL and application, then you can probably define this as a null string. - * Remember that pthread.h (this file) is used for both the DLL and application builds. - */ -#define PTW32_CDECL __cdecl - -#if defined(_UWIN) && PTW32_LEVEL >= PTW32_LEVEL_MAX -# include -#else -/* - * Generic handle type - intended to extend uniqueness beyond - * that available with a simple pointer. It should scale for either - * IA-32 or IA-64. - */ -typedef struct { - void * p; /* Pointer to actual object */ - unsigned int x; /* Extra information - reuse count etc */ -} ptw32_handle_t; - -typedef ptw32_handle_t pthread_t; -typedef struct pthread_attr_t_ * pthread_attr_t; -typedef struct pthread_once_t_ pthread_once_t; -typedef struct pthread_key_t_ * pthread_key_t; -typedef struct pthread_mutex_t_ * pthread_mutex_t; -typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t; -typedef struct pthread_cond_t_ * pthread_cond_t; -typedef struct pthread_condattr_t_ * pthread_condattr_t; -#endif -typedef struct pthread_rwlock_t_ * pthread_rwlock_t; -typedef struct pthread_rwlockattr_t_ * pthread_rwlockattr_t; -typedef struct pthread_spinlock_t_ * pthread_spinlock_t; -typedef struct pthread_barrier_t_ * pthread_barrier_t; -typedef struct pthread_barrierattr_t_ * pthread_barrierattr_t; - -/* - * ==================== - * ==================== - * POSIX Threads - * ==================== - * ==================== - */ - -enum { -/* - * pthread_attr_{get,set}detachstate - */ - PTHREAD_CREATE_JOINABLE = 0, /* Default */ - PTHREAD_CREATE_DETACHED = 1, - -/* - * pthread_attr_{get,set}inheritsched - */ - PTHREAD_INHERIT_SCHED = 0, - PTHREAD_EXPLICIT_SCHED = 1, /* Default */ - -/* - * pthread_{get,set}scope - */ - PTHREAD_SCOPE_PROCESS = 0, - PTHREAD_SCOPE_SYSTEM = 1, /* Default */ - -/* - * pthread_setcancelstate paramters - */ - PTHREAD_CANCEL_ENABLE = 0, /* Default */ - PTHREAD_CANCEL_DISABLE = 1, - -/* - * pthread_setcanceltype parameters - */ - PTHREAD_CANCEL_ASYNCHRONOUS = 0, - PTHREAD_CANCEL_DEFERRED = 1, /* Default */ - -/* - * pthread_mutexattr_{get,set}pshared - * pthread_condattr_{get,set}pshared - */ - PTHREAD_PROCESS_PRIVATE = 0, - PTHREAD_PROCESS_SHARED = 1, - -/* - * pthread_mutexattr_{get,set}robust - */ - PTHREAD_MUTEX_STALLED = 0, /* Default */ - PTHREAD_MUTEX_ROBUST = 1, - -/* - * pthread_barrier_wait - */ - PTHREAD_BARRIER_SERIAL_THREAD = -1 -}; - -/* - * ==================== - * ==================== - * Cancelation - * ==================== - * ==================== - */ -#define PTHREAD_CANCELED ((void *)(size_t) -1) - - -/* - * ==================== - * ==================== - * Once Key - * ==================== - * ==================== - */ -#define PTHREAD_ONCE_INIT { PTW32_FALSE, 0, 0, 0} - -struct pthread_once_t_ -{ - int done; /* indicates if user function has been executed */ - void * lock; - int reserved1; - int reserved2; -}; - - -/* - * ==================== - * ==================== - * Object initialisers - * ==================== - * ==================== - */ -#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -1) -#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -2) -#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -3) - -/* - * Compatibility with LinuxThreads - */ -#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP PTHREAD_RECURSIVE_MUTEX_INITIALIZER -#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP PTHREAD_ERRORCHECK_MUTEX_INITIALIZER - -#define PTHREAD_COND_INITIALIZER ((pthread_cond_t)(size_t) -1) - -#define PTHREAD_RWLOCK_INITIALIZER ((pthread_rwlock_t)(size_t) -1) - -#define PTHREAD_SPINLOCK_INITIALIZER ((pthread_spinlock_t)(size_t) -1) - - -/* - * Mutex types. - */ -enum -{ - /* Compatibility with LinuxThreads */ - PTHREAD_MUTEX_FAST_NP, - PTHREAD_MUTEX_RECURSIVE_NP, - PTHREAD_MUTEX_ERRORCHECK_NP, - PTHREAD_MUTEX_TIMED_NP = PTHREAD_MUTEX_FAST_NP, - PTHREAD_MUTEX_ADAPTIVE_NP = PTHREAD_MUTEX_FAST_NP, - /* For compatibility with POSIX */ - PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_FAST_NP, - PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP, - PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP, - PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL -}; - - -typedef struct ptw32_cleanup_t ptw32_cleanup_t; - -#if defined(_MSC_VER) -/* Disable MSVC 'anachronism used' warning */ -#pragma warning( disable : 4229 ) -#endif - -typedef void (* PTW32_CDECL ptw32_cleanup_callback_t)(void *); - -#if defined(_MSC_VER) -#pragma warning( default : 4229 ) -#endif - -struct ptw32_cleanup_t -{ - ptw32_cleanup_callback_t routine; - void *arg; - struct ptw32_cleanup_t *prev; -}; - -#if defined(__CLEANUP_SEH) - /* - * WIN32 SEH version of cancel cleanup. - */ - -#define pthread_cleanup_push( _rout, _arg ) \ - { \ - ptw32_cleanup_t _cleanup; \ - \ - _cleanup.routine = (ptw32_cleanup_callback_t)(_rout); \ - _cleanup.arg = (_arg); \ - __try \ - { \ - -#define pthread_cleanup_pop( _execute ) \ - } \ - __finally \ - { \ - if( _execute || AbnormalTermination()) \ - { \ - (*(_cleanup.routine))( _cleanup.arg ); \ - } \ - } \ - } - -#else /* __CLEANUP_SEH */ - -#if defined(__CLEANUP_C) - - /* - * C implementation of PThreads cancel cleanup - */ - -#define pthread_cleanup_push( _rout, _arg ) \ - { \ - ptw32_cleanup_t _cleanup; \ - \ - ptw32_push_cleanup( &_cleanup, (ptw32_cleanup_callback_t) (_rout), (_arg) ); \ - -#define pthread_cleanup_pop( _execute ) \ - (void) ptw32_pop_cleanup( _execute ); \ - } - -#else /* __CLEANUP_C */ - -#if defined(__CLEANUP_CXX) - - /* - * C++ version of cancel cleanup. - * - John E. Bossom. - */ - - class PThreadCleanup { - /* - * PThreadCleanup - * - * Purpose - * This class is a C++ helper class that is - * used to implement pthread_cleanup_push/ - * pthread_cleanup_pop. - * The destructor of this class automatically - * pops the pushed cleanup routine regardless - * of how the code exits the scope - * (i.e. such as by an exception) - */ - ptw32_cleanup_callback_t cleanUpRout; - void * obj; - int executeIt; - - public: - PThreadCleanup() : - cleanUpRout( 0 ), - obj( 0 ), - executeIt( 0 ) - /* - * No cleanup performed - */ - { - } - - PThreadCleanup( - ptw32_cleanup_callback_t routine, - void * arg ) : - cleanUpRout( routine ), - obj( arg ), - executeIt( 1 ) - /* - * Registers a cleanup routine for 'arg' - */ - { - } - - ~PThreadCleanup() - { - if ( executeIt && ((void *) cleanUpRout != (void *) 0) ) - { - (void) (*cleanUpRout)( obj ); - } - } - - void execute( int exec ) - { - executeIt = exec; - } - }; - - /* - * C++ implementation of PThreads cancel cleanup; - * This implementation takes advantage of a helper - * class who's destructor automatically calls the - * cleanup routine if we exit our scope weirdly - */ -#define pthread_cleanup_push( _rout, _arg ) \ - { \ - PThreadCleanup cleanup((ptw32_cleanup_callback_t)(_rout), \ - (void *) (_arg) ); - -#define pthread_cleanup_pop( _execute ) \ - cleanup.execute( _execute ); \ - } - -#else - -#error ERROR [__FILE__, line __LINE__]: Cleanup type undefined. - -#endif /* __CLEANUP_CXX */ - -#endif /* __CLEANUP_C */ - -#endif /* __CLEANUP_SEH */ - -/* - * =============== - * =============== - * Methods - * =============== - * =============== - */ - -/* - * PThread Attribute Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_attr_init (pthread_attr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_destroy (pthread_attr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getdetachstate (const pthread_attr_t * attr, - int *detachstate); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstackaddr (const pthread_attr_t * attr, - void **stackaddr); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstacksize (const pthread_attr_t * attr, - size_t * stacksize); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setdetachstate (pthread_attr_t * attr, - int detachstate); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstackaddr (pthread_attr_t * attr, - void *stackaddr); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstacksize (pthread_attr_t * attr, - size_t stacksize); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedparam (const pthread_attr_t *attr, - struct sched_param *param); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedparam (pthread_attr_t *attr, - const struct sched_param *param); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedpolicy (pthread_attr_t *, - int); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedpolicy (const pthread_attr_t *, - int *); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setinheritsched(pthread_attr_t * attr, - int inheritsched); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getinheritsched(const pthread_attr_t * attr, - int * inheritsched); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_setscope (pthread_attr_t *, - int); - -PTW32_DLLPORT int PTW32_CDECL pthread_attr_getscope (const pthread_attr_t *, - int *); - -/* - * PThread Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_create (pthread_t * tid, - const pthread_attr_t * attr, - void *(PTW32_CDECL *start) (void *), - void *arg); - -PTW32_DLLPORT int PTW32_CDECL pthread_detach (pthread_t tid); - -PTW32_DLLPORT int PTW32_CDECL pthread_equal (pthread_t t1, - pthread_t t2); - -PTW32_DLLPORT void PTW32_CDECL pthread_exit (void *value_ptr); - -PTW32_DLLPORT int PTW32_CDECL pthread_join (pthread_t thread, - void **value_ptr); - -PTW32_DLLPORT pthread_t PTW32_CDECL pthread_self (void); - -PTW32_DLLPORT int PTW32_CDECL pthread_cancel (pthread_t thread); - -PTW32_DLLPORT int PTW32_CDECL pthread_setcancelstate (int state, - int *oldstate); - -PTW32_DLLPORT int PTW32_CDECL pthread_setcanceltype (int type, - int *oldtype); - -PTW32_DLLPORT void PTW32_CDECL pthread_testcancel (void); - -PTW32_DLLPORT int PTW32_CDECL pthread_once (pthread_once_t * once_control, - void (PTW32_CDECL *init_routine) (void)); - -#if PTW32_LEVEL >= PTW32_LEVEL_MAX -PTW32_DLLPORT ptw32_cleanup_t * PTW32_CDECL ptw32_pop_cleanup (int execute); - -PTW32_DLLPORT void PTW32_CDECL ptw32_push_cleanup (ptw32_cleanup_t * cleanup, - ptw32_cleanup_callback_t routine, - void *arg); -#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */ - -/* - * Thread Specific Data Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_key_create (pthread_key_t * key, - void (PTW32_CDECL *destructor) (void *)); - -PTW32_DLLPORT int PTW32_CDECL pthread_key_delete (pthread_key_t key); - -PTW32_DLLPORT int PTW32_CDECL pthread_setspecific (pthread_key_t key, - const void *value); - -PTW32_DLLPORT void * PTW32_CDECL pthread_getspecific (pthread_key_t key); - - -/* - * Mutex Attribute Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_init (pthread_mutexattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_destroy (pthread_mutexattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getpshared (const pthread_mutexattr_t - * attr, - int *pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setpshared (pthread_mutexattr_t * attr, - int pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_settype (pthread_mutexattr_t * attr, int kind); -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_gettype (const pthread_mutexattr_t * attr, int *kind); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setrobust( - pthread_mutexattr_t *attr, - int robust); -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getrobust( - const pthread_mutexattr_t * attr, - int * robust); - -/* - * Barrier Attribute Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_init (pthread_barrierattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_destroy (pthread_barrierattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_getpshared (const pthread_barrierattr_t - * attr, - int *pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_setpshared (pthread_barrierattr_t * attr, - int pshared); - -/* - * Mutex Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init (pthread_mutex_t * mutex, - const pthread_mutexattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy (pthread_mutex_t * mutex); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock (pthread_mutex_t * mutex); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_timedlock(pthread_mutex_t * mutex, - const struct timespec *abstime); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_trylock (pthread_mutex_t * mutex); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock (pthread_mutex_t * mutex); - -PTW32_DLLPORT int PTW32_CDECL pthread_mutex_consistent (pthread_mutex_t * mutex); - -/* - * Spinlock Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_spin_init (pthread_spinlock_t * lock, int pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_spin_destroy (pthread_spinlock_t * lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_spin_lock (pthread_spinlock_t * lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_spin_trylock (pthread_spinlock_t * lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_spin_unlock (pthread_spinlock_t * lock); - -/* - * Barrier Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_barrier_init (pthread_barrier_t * barrier, - const pthread_barrierattr_t * attr, - unsigned int count); - -PTW32_DLLPORT int PTW32_CDECL pthread_barrier_destroy (pthread_barrier_t * barrier); - -PTW32_DLLPORT int PTW32_CDECL pthread_barrier_wait (pthread_barrier_t * barrier); - -/* - * Condition Variable Attribute Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_condattr_init (pthread_condattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_condattr_destroy (pthread_condattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_condattr_getpshared (const pthread_condattr_t * attr, - int *pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_condattr_setpshared (pthread_condattr_t * attr, - int pshared); - -/* - * Condition Variable Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_cond_init (pthread_cond_t * cond, - const pthread_condattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_cond_destroy (pthread_cond_t * cond); - -PTW32_DLLPORT int PTW32_CDECL pthread_cond_wait (pthread_cond_t * cond, - pthread_mutex_t * mutex); - -PTW32_DLLPORT int PTW32_CDECL pthread_cond_timedwait (pthread_cond_t * cond, - pthread_mutex_t * mutex, - const struct timespec *abstime); - -PTW32_DLLPORT int PTW32_CDECL pthread_cond_signal (pthread_cond_t * cond); - -PTW32_DLLPORT int PTW32_CDECL pthread_cond_broadcast (pthread_cond_t * cond); - -/* - * Scheduling - */ -PTW32_DLLPORT int PTW32_CDECL pthread_setschedparam (pthread_t thread, - int policy, - const struct sched_param *param); - -PTW32_DLLPORT int PTW32_CDECL pthread_getschedparam (pthread_t thread, - int *policy, - struct sched_param *param); - -PTW32_DLLPORT int PTW32_CDECL pthread_setconcurrency (int); - -PTW32_DLLPORT int PTW32_CDECL pthread_getconcurrency (void); - -/* - * Read-Write Lock Functions - */ -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_init(pthread_rwlock_t *lock, - const pthread_rwlockattr_t *attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_destroy(pthread_rwlock_t *lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_tryrdlock(pthread_rwlock_t *); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_trywrlock(pthread_rwlock_t *); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_rdlock(pthread_rwlock_t *lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedrdlock(pthread_rwlock_t *lock, - const struct timespec *abstime); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_wrlock(pthread_rwlock_t *lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedwrlock(pthread_rwlock_t *lock, - const struct timespec *abstime); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_unlock(pthread_rwlock_t *lock); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_init (pthread_rwlockattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_destroy (pthread_rwlockattr_t * attr); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_getpshared (const pthread_rwlockattr_t * attr, - int *pshared); - -PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_setpshared (pthread_rwlockattr_t * attr, - int pshared); - -#if PTW32_LEVEL >= PTW32_LEVEL_MAX - 1 - -/* - * Signal Functions. Should be defined in but MSVC and MinGW32 - * already have signal.h that don't define these. - */ -PTW32_DLLPORT int PTW32_CDECL pthread_kill(pthread_t thread, int sig); - -/* - * Non-portable functions - */ - -/* - * Compatibility with Linux. - */ -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setkind_np(pthread_mutexattr_t * attr, - int kind); -PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getkind_np(pthread_mutexattr_t * attr, - int *kind); - -/* - * Possibly supported by other POSIX threads implementations - */ -PTW32_DLLPORT int PTW32_CDECL pthread_delay_np (struct timespec * interval); -PTW32_DLLPORT int PTW32_CDECL pthread_num_processors_np(void); -PTW32_DLLPORT unsigned __int64 PTW32_CDECL pthread_getunique_np(pthread_t thread); - -/* - * Useful if an application wants to statically link - * the lib rather than load the DLL at run-time. - */ -PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_attach_np(void); -PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_detach_np(void); -PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_attach_np(void); -PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_detach_np(void); - -/* - * Features that are auto-detected at load/run time. - */ -PTW32_DLLPORT int PTW32_CDECL pthread_win32_test_features_np(int); -enum ptw32_features { - PTW32_SYSTEM_INTERLOCKED_COMPARE_EXCHANGE = 0x0001, /* System provides it. */ - PTW32_ALERTABLE_ASYNC_CANCEL = 0x0002 /* Can cancel blocked threads. */ -}; - -/* - * Register a system time change with the library. - * Causes the library to perform various functions - * in response to the change. Should be called whenever - * the application's top level window receives a - * WM_TIMECHANGE message. It can be passed directly to - * pthread_create() as a new thread if desired. - */ -PTW32_DLLPORT void * PTW32_CDECL pthread_timechange_handler_np(void *); - -#endif /*PTW32_LEVEL >= PTW32_LEVEL_MAX - 1 */ - -#if PTW32_LEVEL >= PTW32_LEVEL_MAX - -/* - * Returns the Win32 HANDLE for the POSIX thread. - */ -PTW32_DLLPORT HANDLE PTW32_CDECL pthread_getw32threadhandle_np(pthread_t thread); -/* - * Returns the win32 thread ID for POSIX thread. - */ -PTW32_DLLPORT DWORD PTW32_CDECL pthread_getw32threadid_np (pthread_t thread); - - -/* - * Protected Methods - * - * This function blocks until the given WIN32 handle - * is signaled or pthread_cancel had been called. - * This function allows the caller to hook into the - * PThreads cancel mechanism. It is implemented using - * - * WaitForMultipleObjects - * - * on 'waitHandle' and a manually reset WIN32 Event - * used to implement pthread_cancel. The 'timeout' - * argument to TimedWait is simply passed to - * WaitForMultipleObjects. - */ -PTW32_DLLPORT int PTW32_CDECL pthreadCancelableWait (HANDLE waitHandle); -PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle, - DWORD timeout); - -#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */ - -/* - * Thread-Safe C Runtime Library Mappings. - */ -#if !defined(_UWIN) -# if defined(NEED_ERRNO) - PTW32_DLLPORT int * PTW32_CDECL _errno( void ); -# else -# if !defined(errno) -# if (defined(_MT) || defined(_DLL)) - __declspec(dllimport) extern int * __cdecl _errno(void); -# define errno (*_errno()) -# endif -# endif -# endif -#endif - -/* - * Some compiler environments don't define some things. - */ -#if defined(__BORLANDC__) -# define _ftime ftime -# define _timeb timeb -#endif - -#if defined(__cplusplus) - -/* - * Internal exceptions - */ -class ptw32_exception {}; -class ptw32_exception_cancel : public ptw32_exception {}; -class ptw32_exception_exit : public ptw32_exception {}; - -#endif - -#if PTW32_LEVEL >= PTW32_LEVEL_MAX - -/* FIXME: This is only required if the library was built using SEH */ -/* - * Get internal SEH tag - */ -PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void); - -#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */ - -#if !defined(PTW32_BUILD) - -#if defined(__CLEANUP_SEH) - -/* - * Redefine the SEH __except keyword to ensure that applications - * propagate our internal exceptions up to the library's internal handlers. - */ -#define __except( E ) \ - __except( ( GetExceptionCode() == ptw32_get_exception_services_code() ) \ - ? EXCEPTION_CONTINUE_SEARCH : ( E ) ) - -#endif /* __CLEANUP_SEH */ - -#if defined(__CLEANUP_CXX) - -/* - * Redefine the C++ catch keyword to ensure that applications - * propagate our internal exceptions up to the library's internal handlers. - */ -#if defined(_MSC_VER) - /* - * WARNING: Replace any 'catch( ... )' with 'PtW32CatchAll' - * if you want Pthread-Win32 cancelation and pthread_exit to work. - */ - -#if !defined(PtW32NoCatchWarn) - -#pragma message("Specify \"/DPtW32NoCatchWarn\" compiler flag to skip this message.") -#pragma message("------------------------------------------------------------------") -#pragma message("When compiling applications with MSVC++ and C++ exception handling:") -#pragma message(" Replace any 'catch( ... )' in routines called from POSIX threads") -#pragma message(" with 'PtW32CatchAll' or 'CATCHALL' if you want POSIX thread") -#pragma message(" cancelation and pthread_exit to work. For example:") -#pragma message("") -#pragma message(" #if defined(PtW32CatchAll)") -#pragma message(" PtW32CatchAll") -#pragma message(" #else") -#pragma message(" catch(...)") -#pragma message(" #endif") -#pragma message(" {") -#pragma message(" /* Catchall block processing */") -#pragma message(" }") -#pragma message("------------------------------------------------------------------") - -#endif - -#define PtW32CatchAll \ - catch( ptw32_exception & ) { throw; } \ - catch( ... ) - -#else /* _MSC_VER */ - -#define catch( E ) \ - catch( ptw32_exception & ) { throw; } \ - catch( E ) - -#endif /* _MSC_VER */ - -#endif /* __CLEANUP_CXX */ - -#endif /* ! PTW32_BUILD */ - -#if defined(__cplusplus) -} /* End of extern "C" */ -#endif /* __cplusplus */ - -#if defined(PTW32__HANDLE_DEF) -# undef HANDLE -#endif -#if defined(PTW32__DWORD_DEF) -# undef DWORD -#endif - -#undef PTW32_LEVEL -#undef PTW32_LEVEL_MAX - -#endif /* ! RC_INVOKED */ - -#endif /* PTHREAD_H */ diff --git a/src/Detector/darknet/3rdparty/include/sched.h b/src/Detector/darknet/3rdparty/include/sched.h deleted file mode 100644 index f36a97a66..000000000 --- a/src/Detector/darknet/3rdparty/include/sched.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Module: sched.h - * - * Purpose: - * Provides an implementation of POSIX realtime extensions - * as defined in - * - * POSIX 1003.1b-1993 (POSIX.1b) - * - * -------------------------------------------------------------------------- - * - * Pthreads-win32 - POSIX Threads Library for Win32 - * Copyright(C) 1998 John E. Bossom - * Copyright(C) 1999,2005 Pthreads-win32 contributors - * - * Contact Email: rpj@callisto.canberra.edu.au - * - * The current list of contributors is contained - * in the file CONTRIBUTORS included with the source - * code distribution. The list can also be seen at the - * following World Wide Web location: - * http://sources.redhat.com/pthreads-win32/contributors.html - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library in the file COPYING.LIB; - * if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ -#if !defined(_SCHED_H) -#define _SCHED_H - -#undef PTW32_SCHED_LEVEL - -#if defined(_POSIX_SOURCE) -#define PTW32_SCHED_LEVEL 0 -/* Early POSIX */ -#endif - -#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309 -#undef PTW32_SCHED_LEVEL -#define PTW32_SCHED_LEVEL 1 -/* Include 1b, 1c and 1d */ -#endif - -#if defined(INCLUDE_NP) -#undef PTW32_SCHED_LEVEL -#define PTW32_SCHED_LEVEL 2 -/* Include Non-Portable extensions */ -#endif - -#define PTW32_SCHED_LEVEL_MAX 3 - -#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 ) || !defined(PTW32_SCHED_LEVEL) -#define PTW32_SCHED_LEVEL PTW32_SCHED_LEVEL_MAX -/* Include everything */ -#endif - - -#if defined(__GNUC__) && !defined(__declspec) -# error Please upgrade your GNU compiler to one that supports __declspec. -#endif - -/* - * When building the library, you should define PTW32_BUILD so that - * the variables/functions are exported correctly. When using the library, - * do NOT define PTW32_BUILD, and then the variables/functions will - * be imported correctly. - */ -#if !defined(PTW32_STATIC_LIB) -# if defined(PTW32_BUILD) -# define PTW32_DLLPORT __declspec (dllexport) -# else -# define PTW32_DLLPORT __declspec (dllimport) -# endif -#else -# define PTW32_DLLPORT -#endif - -/* - * This is a duplicate of what is in the autoconf config.h, - * which is only used when building the pthread-win32 libraries. - */ - -#if !defined(PTW32_CONFIG_H) -# if defined(WINCE) -# define NEED_ERRNO -# define NEED_SEM -# endif -# if defined(__MINGW64__) -# define HAVE_STRUCT_TIMESPEC -# define HAVE_MODE_T -# elif defined(_UWIN) || defined(__MINGW32__) -# define HAVE_MODE_T -# endif -#endif - -/* - * - */ - -#if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX -#if defined(NEED_ERRNO) -#include "need_errno.h" -#else -#include -#endif -#endif /* PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX */ - -#if (defined(__MINGW64__) || defined(__MINGW32__)) || defined(_UWIN) -# if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX -/* For pid_t */ -# include -/* Required by Unix 98 */ -# include -# else - typedef int pid_t; -# endif -#else - typedef int pid_t; -#endif - -/* Thread scheduling policies */ - -enum { - SCHED_OTHER = 0, - SCHED_FIFO, - SCHED_RR, - SCHED_MIN = SCHED_OTHER, - SCHED_MAX = SCHED_RR -}; - -struct sched_param { - int sched_priority; -}; - -#if defined(__cplusplus) -extern "C" -{ -#endif /* __cplusplus */ - -PTW32_DLLPORT int __cdecl sched_yield (void); - -PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy); - -PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy); - -PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy); - -PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid); - -/* - * Note that this macro returns ENOTSUP rather than - * ENOSYS as might be expected. However, returning ENOSYS - * should mean that sched_get_priority_{min,max} are - * not implemented as well as sched_rr_get_interval. - * This is not the case, since we just don't support - * round-robin scheduling. Therefore I have chosen to - * return the same value as sched_setscheduler when - * SCHED_RR is passed to it. - */ -#define sched_rr_get_interval(_pid, _interval) \ - ( errno = ENOTSUP, (int) -1 ) - - -#if defined(__cplusplus) -} /* End of extern "C" */ -#endif /* __cplusplus */ - -#undef PTW32_SCHED_LEVEL -#undef PTW32_SCHED_LEVEL_MAX - -#endif /* !_SCHED_H */ - diff --git a/src/Detector/darknet/3rdparty/include/semaphore.h b/src/Detector/darknet/3rdparty/include/semaphore.h deleted file mode 100644 index c6e9407e2..000000000 --- a/src/Detector/darknet/3rdparty/include/semaphore.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Module: semaphore.h - * - * Purpose: - * Semaphores aren't actually part of the PThreads standard. - * They are defined by the POSIX Standard: - * - * POSIX 1003.1b-1993 (POSIX.1b) - * - * -------------------------------------------------------------------------- - * - * Pthreads-win32 - POSIX Threads Library for Win32 - * Copyright(C) 1998 John E. Bossom - * Copyright(C) 1999,2005 Pthreads-win32 contributors - * - * Contact Email: rpj@callisto.canberra.edu.au - * - * The current list of contributors is contained - * in the file CONTRIBUTORS included with the source - * code distribution. The list can also be seen at the - * following World Wide Web location: - * http://sources.redhat.com/pthreads-win32/contributors.html - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library in the file COPYING.LIB; - * if not, write to the Free Software Foundation, Inc., - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ -#if !defined( SEMAPHORE_H ) -#define SEMAPHORE_H - -#undef PTW32_SEMAPHORE_LEVEL - -#if defined(_POSIX_SOURCE) -#define PTW32_SEMAPHORE_LEVEL 0 -/* Early POSIX */ -#endif - -#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309 -#undef PTW32_SEMAPHORE_LEVEL -#define PTW32_SEMAPHORE_LEVEL 1 -/* Include 1b, 1c and 1d */ -#endif - -#if defined(INCLUDE_NP) -#undef PTW32_SEMAPHORE_LEVEL -#define PTW32_SEMAPHORE_LEVEL 2 -/* Include Non-Portable extensions */ -#endif - -#define PTW32_SEMAPHORE_LEVEL_MAX 3 - -#if !defined(PTW32_SEMAPHORE_LEVEL) -#define PTW32_SEMAPHORE_LEVEL PTW32_SEMAPHORE_LEVEL_MAX -/* Include everything */ -#endif - -#if defined(__GNUC__) && ! defined (__declspec) -# error Please upgrade your GNU compiler to one that supports __declspec. -#endif - -/* - * When building the library, you should define PTW32_BUILD so that - * the variables/functions are exported correctly. When using the library, - * do NOT define PTW32_BUILD, and then the variables/functions will - * be imported correctly. - */ -#if !defined(PTW32_STATIC_LIB) -# if defined(PTW32_BUILD) -# define PTW32_DLLPORT __declspec (dllexport) -# else -# define PTW32_DLLPORT __declspec (dllimport) -# endif -#else -# define PTW32_DLLPORT -#endif - -/* - * This is a duplicate of what is in the autoconf config.h, - * which is only used when building the pthread-win32 libraries. - */ - -#if !defined(PTW32_CONFIG_H) -# if defined(WINCE) -# define NEED_ERRNO -# define NEED_SEM -# endif -# if defined(__MINGW64__) -# define HAVE_STRUCT_TIMESPEC -# define HAVE_MODE_T -# elif defined(_UWIN) || defined(__MINGW32__) -# define HAVE_MODE_T -# endif -#endif - -/* - * - */ - -#if PTW32_SEMAPHORE_LEVEL >= PTW32_SEMAPHORE_LEVEL_MAX -#if defined(NEED_ERRNO) -#include "need_errno.h" -#else -#include -#endif -#endif /* PTW32_SEMAPHORE_LEVEL >= PTW32_SEMAPHORE_LEVEL_MAX */ - -#define _POSIX_SEMAPHORES - -#if defined(__cplusplus) -extern "C" -{ -#endif /* __cplusplus */ - -#if !defined(HAVE_MODE_T) -typedef unsigned int mode_t; -#endif - - -typedef struct sem_t_ * sem_t; - -PTW32_DLLPORT int __cdecl sem_init (sem_t * sem, - int pshared, - unsigned int value); - -PTW32_DLLPORT int __cdecl sem_destroy (sem_t * sem); - -PTW32_DLLPORT int __cdecl sem_trywait (sem_t * sem); - -PTW32_DLLPORT int __cdecl sem_wait (sem_t * sem); - -PTW32_DLLPORT int __cdecl sem_timedwait (sem_t * sem, - const struct timespec * abstime); - -PTW32_DLLPORT int __cdecl sem_post (sem_t * sem); - -PTW32_DLLPORT int __cdecl sem_post_multiple (sem_t * sem, - int count); - -PTW32_DLLPORT int __cdecl sem_open (const char * name, - int oflag, - mode_t mode, - unsigned int value); - -PTW32_DLLPORT int __cdecl sem_close (sem_t * sem); - -PTW32_DLLPORT int __cdecl sem_unlink (const char * name); - -PTW32_DLLPORT int __cdecl sem_getvalue (sem_t * sem, - int * sval); - -#if defined(__cplusplus) -} /* End of extern "C" */ -#endif /* __cplusplus */ - -#undef PTW32_SEMAPHORE_LEVEL -#undef PTW32_SEMAPHORE_LEVEL_MAX - -#endif /* !SEMAPHORE_H */ diff --git a/src/Detector/darknet/3rdparty/lib/x64/libpthreadGC2.a b/src/Detector/darknet/3rdparty/lib/x64/libpthreadGC2.a deleted file mode 100644 index 430162364..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x64/libpthreadGC2.a and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x64/pthreadVC2.lib b/src/Detector/darknet/3rdparty/lib/x64/pthreadVC2.lib deleted file mode 100644 index 1b07e0e9a..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x64/pthreadVC2.lib and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x86/libpthreadGC2.a b/src/Detector/darknet/3rdparty/lib/x86/libpthreadGC2.a deleted file mode 100644 index df211759f..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x86/libpthreadGC2.a and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x86/libpthreadGCE2.a b/src/Detector/darknet/3rdparty/lib/x86/libpthreadGCE2.a deleted file mode 100644 index 9c56202c5..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x86/libpthreadGCE2.a and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x86/pthreadVC2.lib b/src/Detector/darknet/3rdparty/lib/x86/pthreadVC2.lib deleted file mode 100644 index c20ee200d..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x86/pthreadVC2.lib and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x86/pthreadVCE2.lib b/src/Detector/darknet/3rdparty/lib/x86/pthreadVCE2.lib deleted file mode 100644 index 7f05317ba..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x86/pthreadVCE2.lib and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/lib/x86/pthreadVSE2.lib b/src/Detector/darknet/3rdparty/lib/x86/pthreadVSE2.lib deleted file mode 100644 index 3f3335d46..000000000 Binary files a/src/Detector/darknet/3rdparty/lib/x86/pthreadVSE2.lib and /dev/null differ diff --git a/src/Detector/darknet/3rdparty/stb/include/stb_image.h b/src/Detector/darknet/3rdparty/stb/include/stb_image.h deleted file mode 100644 index a056138dd..000000000 --- a/src/Detector/darknet/3rdparty/stb/include/stb_image.h +++ /dev/null @@ -1,7187 +0,0 @@ -/* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h - no warranty implied; use at your own risk - - Do this: - #define STB_IMAGE_IMPLEMENTATION - before you include this file in *one* C or C++ file to create the implementation. - - // i.e. it should look like this: - #include ... - #include ... - #include ... - #define STB_IMAGE_IMPLEMENTATION - #include "stb_image.h" - - You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. - And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free - - - QUICK NOTES: - Primarily of interest to game developers and other people who can - avoid problematic images and only need the trivial interface - - JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) - PNG 1/2/4/8/16-bit-per-channel - - TGA (not sure what subset, if a subset) - BMP non-1bpp, non-RLE - PSD (composited view only, no extra channels, 8/16 bit-per-channel) - - GIF (*comp always reports as 4-channel) - HDR (radiance rgbE format) - PIC (Softimage PIC) - PNM (PPM and PGM binary only) - - Animated GIF still needs a proper API, but here's one way to do it: - http://gist.github.com/urraka/685d9a6340b26b830d49 - - - decode from memory or through FILE (define STBI_NO_STDIO to remove code) - - decode from arbitrary I/O callbacks - - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) - - Full documentation under "DOCUMENTATION" below. - - -LICENSE - - See end of file for license information. - -RECENT REVISION HISTORY: - - 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes - 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC - 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs - 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes - 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes - 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 - RGB-format JPEG; remove white matting in PSD; - allocate large structures on the stack; - correct channel count for PNG & BMP - 2.10 (2016-01-22) avoid warning introduced in 2.09 - 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED - - See end of file for full revision history. - - - ============================ Contributors ========================= - - Image formats Extensions, features - Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) - Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) - Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) - Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) - Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) - Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) - Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) - github:urraka (animated gif) Junggon Kim (PNM comments) - Daniel Gibson (16-bit TGA) - socks-the-fox (16-bit PNG) - Jeremy Sawicki (handle all ImageNet JPGs) - Optimizations & bugfixes - Fabian "ryg" Giesen - Arseny Kapoulkine - John-Mark Allen - - Bug & warning fixes - Marc LeBlanc David Woo Guillaume George Martins Mozeiko - Christpher Lloyd Jerry Jansson Joseph Thomson Phil Jordan - Dave Moore Roy Eltham Hayaki Saito Nathan Reed - Won Chun Luke Graham Johan Duparc Nick Verigakis - the Horde3D community Thomas Ruf Ronny Chevalier Baldur Karlsson - Janez Zemva John Bartholomew Michal Cichon github:rlyeh - Jonathan Blow Ken Hamada Tero Hanninen github:romigrou - Laurent Gomila Cort Stratton Sergio Gonzalez github:svdijk - Aruelien Pocheville Thibault Reuille Cass Everitt github:snagar - Ryamond Barbiero Paul Du Bois Engin Manap github:Zelex - Michaelangel007@github Philipp Wiesemann Dale Weiler github:grim210 - Oriol Ferrer Mesia Josh Tobin Matthew Gregan github:sammyhw - Blazej Dariusz Roszkowski Gregory Mullen github:phprus - Christian Floisand Kevin Schmidt github:poppolopoppo -*/ - -#ifndef STBI_INCLUDE_STB_IMAGE_H -#define STBI_INCLUDE_STB_IMAGE_H - -// DOCUMENTATION -// -// Limitations: -// - no 16-bit-per-channel PNG -// - no 12-bit-per-channel JPEG -// - no JPEGs with arithmetic coding -// - no 1-bit BMP -// - GIF always returns *comp=4 -// -// Basic usage (see HDR discussion below for HDR usage): -// int x,y,n; -// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); -// // ... process data if not NULL ... -// // ... x = width, y = height, n = # 8-bit components per pixel ... -// // ... replace '0' with '1'..'4' to force that many components per pixel -// // ... but 'n' will always be the number that it would have been if you said 0 -// stbi_image_free(data) -// -// Standard parameters: -// int *x -- outputs image width in pixels -// int *y -- outputs image height in pixels -// int *channels_in_file -- outputs # of image components in image file -// int desired_channels -- if non-zero, # of image components requested in result -// -// The return value from an image loader is an 'unsigned char *' which points -// to the pixel data, or NULL on an allocation failure or if the image is -// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, -// with each pixel consisting of N interleaved 8-bit components; the first -// pixel pointed to is top-left-most in the image. There is no padding between -// image scanlines or between pixels, regardless of format. The number of -// components N is 'desired_channels' if desired_channels is non-zero, or -// *channels_in_file otherwise. If desired_channels is non-zero, -// *channels_in_file has the number of components that _would_ have been -// output otherwise. E.g. if you set desired_channels to 4, you will always -// get RGBA output, but you can check *channels_in_file to see if it's trivially -// opaque because e.g. there were only 3 channels in the source image. -// -// An output image with N components has the following components interleaved -// in this order in each pixel: -// -// N=#comp components -// 1 grey -// 2 grey, alpha -// 3 red, green, blue -// 4 red, green, blue, alpha -// -// If image loading fails for any reason, the return value will be NULL, -// and *x, *y, *channels_in_file will be unchanged. The function -// stbi_failure_reason() can be queried for an extremely brief, end-user -// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS -// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly -// more user-friendly ones. -// -// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. -// -// =========================================================================== -// -// Philosophy -// -// stb libraries are designed with the following priorities: -// -// 1. easy to use -// 2. easy to maintain -// 3. good performance -// -// Sometimes I let "good performance" creep up in priority over "easy to maintain", -// and for best performance I may provide less-easy-to-use APIs that give higher -// performance, in addition to the easy to use ones. Nevertheless, it's important -// to keep in mind that from the standpoint of you, a client of this library, -// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. -// -// Some secondary priorities arise directly from the first two, some of which -// make more explicit reasons why performance can't be emphasized. -// -// - Portable ("ease of use") -// - Small source code footprint ("easy to maintain") -// - No dependencies ("ease of use") -// -// =========================================================================== -// -// I/O callbacks -// -// I/O callbacks allow you to read from arbitrary sources, like packaged -// files or some other source. Data read from callbacks are processed -// through a small internal buffer (currently 128 bytes) to try to reduce -// overhead. -// -// The three functions you must define are "read" (reads some bytes of data), -// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). -// -// =========================================================================== -// -// SIMD support -// -// The JPEG decoder will try to automatically use SIMD kernels on x86 when -// supported by the compiler. For ARM Neon support, you must explicitly -// request it. -// -// (The old do-it-yourself SIMD API is no longer supported in the current -// code.) -// -// On x86, SSE2 will automatically be used when available based on a run-time -// test; if not, the generic C versions are used as a fall-back. On ARM targets, -// the typical path is to have separate builds for NEON and non-NEON devices -// (at least this is true for iOS and Android). Therefore, the NEON support is -// toggled by a build flag: define STBI_NEON to get NEON loops. -// -// If for some reason you do not want to use any of SIMD code, or if -// you have issues compiling it, you can disable it entirely by -// defining STBI_NO_SIMD. -// -// =========================================================================== -// -// HDR image support (disable by defining STBI_NO_HDR) -// -// stb_image now supports loading HDR images in general, and currently -// the Radiance .HDR file format, although the support is provided -// generically. You can still load any file through the existing interface; -// if you attempt to load an HDR file, it will be automatically remapped to -// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; -// both of these constants can be reconfigured through this interface: -// -// stbi_hdr_to_ldr_gamma(2.2f); -// stbi_hdr_to_ldr_scale(1.0f); -// -// (note, do not use _inverse_ constants; stbi_image will invert them -// appropriately). -// -// Additionally, there is a new, parallel interface for loading files as -// (linear) floats to preserve the full dynamic range: -// -// float *data = stbi_loadf(filename, &x, &y, &n, 0); -// -// If you load LDR images through this interface, those images will -// be promoted to floating point values, run through the inverse of -// constants corresponding to the above: -// -// stbi_ldr_to_hdr_scale(1.0f); -// stbi_ldr_to_hdr_gamma(2.2f); -// -// Finally, given a filename (or an open file or memory block--see header -// file for details) containing image data, you can query for the "most -// appropriate" interface to use (that is, whether the image is HDR or -// not), using: -// -// stbi_is_hdr(char *filename); -// -// =========================================================================== -// -// iPhone PNG support: -// -// By default we convert iphone-formatted PNGs back to RGB, even though -// they are internally encoded differently. You can disable this conversion -// by by calling stbi_convert_iphone_png_to_rgb(0), in which case -// you will always just get the native iphone "format" through (which -// is BGR stored in RGB). -// -// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per -// pixel to remove any premultiplied alpha *only* if the image file explicitly -// says there's premultiplied data (currently only happens in iPhone images, -// and only if iPhone convert-to-rgb processing is on). -// -// =========================================================================== -// -// ADDITIONAL CONFIGURATION -// -// - You can suppress implementation of any of the decoders to reduce -// your code footprint by #defining one or more of the following -// symbols before creating the implementation. -// -// STBI_NO_JPEG -// STBI_NO_PNG -// STBI_NO_BMP -// STBI_NO_PSD -// STBI_NO_TGA -// STBI_NO_GIF -// STBI_NO_HDR -// STBI_NO_PIC -// STBI_NO_PNM (.ppm and .pgm) -// -// - You can request *only* certain decoders and suppress all other ones -// (this will be more forward-compatible, as addition of new decoders -// doesn't require you to disable them explicitly): -// -// STBI_ONLY_JPEG -// STBI_ONLY_PNG -// STBI_ONLY_BMP -// STBI_ONLY_PSD -// STBI_ONLY_TGA -// STBI_ONLY_GIF -// STBI_ONLY_HDR -// STBI_ONLY_PIC -// STBI_ONLY_PNM (.ppm and .pgm) -// -// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still -// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB -// - - -#ifndef STBI_NO_STDIO -#include -#endif // STBI_NO_STDIO - -#define STBI_VERSION 1 - -enum -{ - STBI_default = 0, // only used for desired_channels - - STBI_grey = 1, - STBI_grey_alpha = 2, - STBI_rgb = 3, - STBI_rgb_alpha = 4 -}; - -typedef unsigned char stbi_uc; -typedef unsigned short stbi_us; - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef STB_IMAGE_STATIC -#define STBIDEF static -#else -#define STBIDEF extern -#endif - -////////////////////////////////////////////////////////////////////////////// -// -// PRIMARY API - works on images of any type -// - -// -// load image by filename, open file, or memory buffer -// - -typedef struct -{ - int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read - void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative - int (*eof) (void *user); // returns nonzero if we are at end of file/data -} stbi_io_callbacks; - -//////////////////////////////////// -// -// 8-bits-per-channel interface -// - -STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); -STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); - -#ifndef STBI_NO_STDIO -STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); -STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); -// for stbi_load_from_file, file pointer is left pointing immediately after image -#endif - -//////////////////////////////////// -// -// 16-bits-per-channel interface -// - -STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); -STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); - -#ifndef STBI_NO_STDIO -STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); -STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); -#endif - -//////////////////////////////////// -// -// float-per-channel interface -// -#ifndef STBI_NO_LINEAR - STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); - STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); - - #ifndef STBI_NO_STDIO - STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); - STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); - #endif -#endif - -#ifndef STBI_NO_HDR - STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); - STBIDEF void stbi_hdr_to_ldr_scale(float scale); -#endif // STBI_NO_HDR - -#ifndef STBI_NO_LINEAR - STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); - STBIDEF void stbi_ldr_to_hdr_scale(float scale); -#endif // STBI_NO_LINEAR - -// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR -STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); -STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); -#ifndef STBI_NO_STDIO -STBIDEF int stbi_is_hdr (char const *filename); -STBIDEF int stbi_is_hdr_from_file(FILE *f); -#endif // STBI_NO_STDIO - - -// get a VERY brief reason for failure -// NOT THREADSAFE -STBIDEF const char *stbi_failure_reason (void); - -// free the loaded image -- this is just free() -STBIDEF void stbi_image_free (void *retval_from_stbi_load); - -// get image dimensions & components without fully decoding -STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); -STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); - -#ifndef STBI_NO_STDIO -STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); -STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); - -#endif - - - -// for image formats that explicitly notate that they have premultiplied alpha, -// we just return the colors as stored in the file. set this flag to force -// unpremultiplication. results are undefined if the unpremultiply overflow. -STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); - -// indicate whether we should process iphone images back to canonical format, -// or just pass them through "as-is" -STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); - -// flip the image vertically, so the first pixel in the output array is the bottom left -STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); - -// ZLIB client - used by PNG, available for other purposes - -STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); -STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); -STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); -STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); - -STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); -STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); - - -#ifdef __cplusplus -} -#endif - -// -// -//// end header file ///////////////////////////////////////////////////// -#endif // STBI_INCLUDE_STB_IMAGE_H - -#ifdef STB_IMAGE_IMPLEMENTATION - -#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ - || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ - || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ - || defined(STBI_ONLY_ZLIB) - #ifndef STBI_ONLY_JPEG - #define STBI_NO_JPEG - #endif - #ifndef STBI_ONLY_PNG - #define STBI_NO_PNG - #endif - #ifndef STBI_ONLY_BMP - #define STBI_NO_BMP - #endif - #ifndef STBI_ONLY_PSD - #define STBI_NO_PSD - #endif - #ifndef STBI_ONLY_TGA - #define STBI_NO_TGA - #endif - #ifndef STBI_ONLY_GIF - #define STBI_NO_GIF - #endif - #ifndef STBI_ONLY_HDR - #define STBI_NO_HDR - #endif - #ifndef STBI_ONLY_PIC - #define STBI_NO_PIC - #endif - #ifndef STBI_ONLY_PNM - #define STBI_NO_PNM - #endif -#endif - -#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) -#define STBI_NO_ZLIB -#endif - - -#include -#include // ptrdiff_t on osx -#include -#include -#include - -#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) -#include // ldexp -#endif - -#ifndef STBI_NO_STDIO -#include -#endif - -#ifndef STBI_ASSERT -#include -#define STBI_ASSERT(x) assert(x) -#endif - - -#ifndef _MSC_VER - #ifdef __cplusplus - #define stbi_inline inline - #else - #define stbi_inline - #endif -#else - #define stbi_inline __forceinline -#endif - - -#ifdef _MSC_VER -typedef unsigned short stbi__uint16; -typedef signed short stbi__int16; -typedef unsigned int stbi__uint32; -typedef signed int stbi__int32; -#else -#include -typedef uint16_t stbi__uint16; -typedef int16_t stbi__int16; -typedef uint32_t stbi__uint32; -typedef int32_t stbi__int32; -#endif - -// should produce compiler error if size is wrong -typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; - -#ifdef _MSC_VER -#define STBI_NOTUSED(v) (void)(v) -#else -#define STBI_NOTUSED(v) (void)sizeof(v) -#endif - -#ifdef _MSC_VER -#define STBI_HAS_LROTL -#endif - -#ifdef STBI_HAS_LROTL - #define stbi_lrot(x,y) _lrotl(x,y) -#else - #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) -#endif - -#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) -// ok -#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) -// ok -#else -#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." -#endif - -#ifndef STBI_MALLOC -#define STBI_MALLOC(sz) malloc(sz) -#define STBI_REALLOC(p,newsz) realloc(p,newsz) -#define STBI_FREE(p) free(p) -#endif - -#ifndef STBI_REALLOC_SIZED -#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) -#endif - -// x86/x64 detection -#if defined(__x86_64__) || defined(_M_X64) -#define STBI__X64_TARGET -#elif defined(__i386) || defined(_M_IX86) -#define STBI__X86_TARGET -#endif - -#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) -// gcc doesn't support sse2 intrinsics unless you compile with -msse2, -// which in turn means it gets to use SSE2 everywhere. This is unfortunate, -// but previous attempts to provide the SSE2 functions with runtime -// detection caused numerous issues. The way architecture extensions are -// exposed in GCC/Clang is, sadly, not really suited for one-file libs. -// New behavior: if compiled with -msse2, we use SSE2 without any -// detection; if not, we don't use it at all. -#define STBI_NO_SIMD -#endif - -#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) -// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET -// -// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the -// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. -// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not -// simultaneously enabling "-mstackrealign". -// -// See https://github.com/nothings/stb/issues/81 for more information. -// -// So default to no SSE2 on 32-bit MinGW. If you've read this far and added -// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. -#define STBI_NO_SIMD -#endif - -#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) -#define STBI_SSE2 -#include - -#ifdef _MSC_VER - -#if _MSC_VER >= 1400 // not VC6 -#include // __cpuid -static int stbi__cpuid3(void) -{ - int info[4]; - __cpuid(info,1); - return info[3]; -} -#else -static int stbi__cpuid3(void) -{ - int res; - __asm { - mov eax,1 - cpuid - mov res,edx - } - return res; -} -#endif - -#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name - -static int stbi__sse2_available(void) -{ - int info3 = stbi__cpuid3(); - return ((info3 >> 26) & 1) != 0; -} -#else // assume GCC-style if not VC++ -#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) - -static int stbi__sse2_available(void) -{ - // If we're even attempting to compile this on GCC/Clang, that means - // -msse2 is on, which means the compiler is allowed to use SSE2 - // instructions at will, and so are we. - return 1; -} -#endif -#endif - -// ARM NEON -#if defined(STBI_NO_SIMD) && defined(STBI_NEON) -#undef STBI_NEON -#endif - -#ifdef STBI_NEON -#include -// assume GCC or Clang on ARM targets -#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) -#endif - -#ifndef STBI_SIMD_ALIGN -#define STBI_SIMD_ALIGN(type, name) type name -#endif - -/////////////////////////////////////////////// -// -// stbi__context struct and start_xxx functions - -// stbi__context structure is our basic context used by all images, so it -// contains all the IO context, plus some basic image information -typedef struct -{ - stbi__uint32 img_x, img_y; - int img_n, img_out_n; - - stbi_io_callbacks io; - void *io_user_data; - - int read_from_callbacks; - int buflen; - stbi_uc buffer_start[128]; - - stbi_uc *img_buffer, *img_buffer_end; - stbi_uc *img_buffer_original, *img_buffer_original_end; -} stbi__context; - - -static void stbi__refill_buffer(stbi__context *s); - -// initialize a memory-decode context -static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) -{ - s->io.read = NULL; - s->read_from_callbacks = 0; - s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; - s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; -} - -// initialize a callback-based context -static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) -{ - s->io = *c; - s->io_user_data = user; - s->buflen = sizeof(s->buffer_start); - s->read_from_callbacks = 1; - s->img_buffer_original = s->buffer_start; - stbi__refill_buffer(s); - s->img_buffer_original_end = s->img_buffer_end; -} - -#ifndef STBI_NO_STDIO - -static int stbi__stdio_read(void *user, char *data, int size) -{ - return (int) fread(data,1,size,(FILE*) user); -} - -static void stbi__stdio_skip(void *user, int n) -{ - fseek((FILE*) user, n, SEEK_CUR); -} - -static int stbi__stdio_eof(void *user) -{ - return feof((FILE*) user); -} - -static stbi_io_callbacks stbi__stdio_callbacks = -{ - stbi__stdio_read, - stbi__stdio_skip, - stbi__stdio_eof, -}; - -static void stbi__start_file(stbi__context *s, FILE *f) -{ - stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); -} - -//static void stop_file(stbi__context *s) { } - -#endif // !STBI_NO_STDIO - -static void stbi__rewind(stbi__context *s) -{ - // conceptually rewind SHOULD rewind to the beginning of the stream, - // but we just rewind to the beginning of the initial buffer, because - // we only use it after doing 'test', which only ever looks at at most 92 bytes - s->img_buffer = s->img_buffer_original; - s->img_buffer_end = s->img_buffer_original_end; -} - -enum -{ - STBI_ORDER_RGB, - STBI_ORDER_BGR -}; - -typedef struct -{ - int bits_per_channel; - int num_channels; - int channel_order; -} stbi__result_info; - -#ifndef STBI_NO_JPEG -static int stbi__jpeg_test(stbi__context *s); -static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_PNG -static int stbi__png_test(stbi__context *s); -static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_BMP -static int stbi__bmp_test(stbi__context *s); -static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_TGA -static int stbi__tga_test(stbi__context *s); -static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_PSD -static int stbi__psd_test(stbi__context *s); -static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); -static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_HDR -static int stbi__hdr_test(stbi__context *s); -static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_PIC -static int stbi__pic_test(stbi__context *s); -static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_GIF -static int stbi__gif_test(stbi__context *s); -static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -#ifndef STBI_NO_PNM -static int stbi__pnm_test(stbi__context *s); -static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); -static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); -#endif - -// this is not threadsafe -static const char *stbi__g_failure_reason; - -STBIDEF const char *stbi_failure_reason(void) -{ - return stbi__g_failure_reason; -} - -static int stbi__err(const char *str) -{ - stbi__g_failure_reason = str; - return 0; -} - -static void *stbi__malloc(size_t size) -{ - return STBI_MALLOC(size); -} - -// stb_image uses ints pervasively, including for offset calculations. -// therefore the largest decoded image size we can support with the -// current code, even on 64-bit targets, is INT_MAX. this is not a -// significant limitation for the intended use case. -// -// we do, however, need to make sure our size calculations don't -// overflow. hence a few helper functions for size calculations that -// multiply integers together, making sure that they're non-negative -// and no overflow occurs. - -// return 1 if the sum is valid, 0 on overflow. -// negative terms are considered invalid. -static int stbi__addsizes_valid(int a, int b) -{ - if (b < 0) return 0; - // now 0 <= b <= INT_MAX, hence also - // 0 <= INT_MAX - b <= INTMAX. - // And "a + b <= INT_MAX" (which might overflow) is the - // same as a <= INT_MAX - b (no overflow) - return a <= INT_MAX - b; -} - -// returns 1 if the product is valid, 0 on overflow. -// negative factors are considered invalid. -static int stbi__mul2sizes_valid(int a, int b) -{ - if (a < 0 || b < 0) return 0; - if (b == 0) return 1; // mul-by-0 is always safe - // portable way to check for no overflows in a*b - return a <= INT_MAX/b; -} - -// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow -static int stbi__mad2sizes_valid(int a, int b, int add) -{ - return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); -} - -// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow -static int stbi__mad3sizes_valid(int a, int b, int c, int add) -{ - return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && - stbi__addsizes_valid(a*b*c, add); -} - -// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow -static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) -{ - return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && - stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); -} - -// mallocs with size overflow checking -static void *stbi__malloc_mad2(int a, int b, int add) -{ - if (!stbi__mad2sizes_valid(a, b, add)) return NULL; - return stbi__malloc(a*b + add); -} - -static void *stbi__malloc_mad3(int a, int b, int c, int add) -{ - if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; - return stbi__malloc(a*b*c + add); -} - -static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) -{ - if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; - return stbi__malloc(a*b*c*d + add); -} - -// stbi__err - error -// stbi__errpf - error returning pointer to float -// stbi__errpuc - error returning pointer to unsigned char - -#ifdef STBI_NO_FAILURE_STRINGS - #define stbi__err(x,y) 0 -#elif defined(STBI_FAILURE_USERMSG) - #define stbi__err(x,y) stbi__err(y) -#else - #define stbi__err(x,y) stbi__err(x) -#endif - -#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) -#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) - -STBIDEF void stbi_image_free(void *retval_from_stbi_load) -{ - STBI_FREE(retval_from_stbi_load); -} - -#ifndef STBI_NO_LINEAR -static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); -#endif - -#ifndef STBI_NO_HDR -static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); -#endif - -static int stbi__vertically_flip_on_load = 0; - -STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) -{ - stbi__vertically_flip_on_load = flag_true_if_should_flip; -} - -static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) -{ - memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields - ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed - ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order - ri->num_channels = 0; - - #ifndef STBI_NO_JPEG - if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); - #endif - #ifndef STBI_NO_PNG - if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); - #endif - #ifndef STBI_NO_BMP - if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); - #endif - #ifndef STBI_NO_GIF - if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); - #endif - #ifndef STBI_NO_PSD - if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); - #endif - #ifndef STBI_NO_PIC - if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); - #endif - #ifndef STBI_NO_PNM - if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); - #endif - - #ifndef STBI_NO_HDR - if (stbi__hdr_test(s)) { - float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); - return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); - } - #endif - - #ifndef STBI_NO_TGA - // test tga last because it's a crappy test! - if (stbi__tga_test(s)) - return stbi__tga_load(s,x,y,comp,req_comp, ri); - #endif - - return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); -} - -static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) -{ - int i; - int img_len = w * h * channels; - stbi_uc *reduced; - - reduced = (stbi_uc *) stbi__malloc(img_len); - if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); - - for (i = 0; i < img_len; ++i) - reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling - - STBI_FREE(orig); - return reduced; -} - -static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) -{ - int i; - int img_len = w * h * channels; - stbi__uint16 *enlarged; - - enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); - if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); - - for (i = 0; i < img_len; ++i) - enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff - - STBI_FREE(orig); - return enlarged; -} - -static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) -{ - int row; - size_t bytes_per_row = (size_t)w * bytes_per_pixel; - stbi_uc temp[2048]; - stbi_uc *bytes = (stbi_uc *)image; - - for (row = 0; row < (h>>1); row++) { - stbi_uc *row0 = bytes + row*bytes_per_row; - stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; - // swap row0 with row1 - size_t bytes_left = bytes_per_row; - while (bytes_left) { - size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); - memcpy(temp, row0, bytes_copy); - memcpy(row0, row1, bytes_copy); - memcpy(row1, temp, bytes_copy); - row0 += bytes_copy; - row1 += bytes_copy; - bytes_left -= bytes_copy; - } - } -} - -static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) -{ - stbi__result_info ri; - void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); - - if (result == NULL) - return NULL; - - if (ri.bits_per_channel != 8) { - STBI_ASSERT(ri.bits_per_channel == 16); - result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); - ri.bits_per_channel = 8; - } - - // @TODO: move stbi__convert_format to here - - if (stbi__vertically_flip_on_load) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); - } - - return (unsigned char *) result; -} - -static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) -{ - stbi__result_info ri; - void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); - - if (result == NULL) - return NULL; - - if (ri.bits_per_channel != 16) { - STBI_ASSERT(ri.bits_per_channel == 8); - result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); - ri.bits_per_channel = 16; - } - - // @TODO: move stbi__convert_format16 to here - // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision - - if (stbi__vertically_flip_on_load) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); - } - - return (stbi__uint16 *) result; -} - -#ifndef STBI_NO_HDR -static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) -{ - if (stbi__vertically_flip_on_load && result != NULL) { - int channels = req_comp ? req_comp : *comp; - stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); - } -} -#endif - -#ifndef STBI_NO_STDIO - -static FILE *stbi__fopen(char const *filename, char const *mode) -{ - FILE *f; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - if (0 != fopen_s(&f, filename, mode)) - f=0; -#else - f = fopen(filename, mode); -#endif - return f; -} - - -STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) -{ - FILE *f = stbi__fopen(filename, "rb"); - unsigned char *result; - if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); - result = stbi_load_from_file(f,x,y,comp,req_comp); - fclose(f); - return result; -} - -STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) -{ - unsigned char *result; - stbi__context s; - stbi__start_file(&s,f); - result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); - if (result) { - // need to 'unget' all the characters in the IO buffer - fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); - } - return result; -} - -STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) -{ - stbi__uint16 *result; - stbi__context s; - stbi__start_file(&s,f); - result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); - if (result) { - // need to 'unget' all the characters in the IO buffer - fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); - } - return result; -} - -STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) -{ - FILE *f = stbi__fopen(filename, "rb"); - stbi__uint16 *result; - if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); - result = stbi_load_from_file_16(f,x,y,comp,req_comp); - fclose(f); - return result; -} - - -#endif //!STBI_NO_STDIO - -STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) -{ - stbi__context s; - stbi__start_mem(&s,buffer,len); - return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); -} - -STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) -{ - stbi__context s; - stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); - return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); -} - -STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) -{ - stbi__context s; - stbi__start_mem(&s,buffer,len); - return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); -} - -STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) -{ - stbi__context s; - stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); - return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); -} - -#ifndef STBI_NO_LINEAR -static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) -{ - unsigned char *data; - #ifndef STBI_NO_HDR - if (stbi__hdr_test(s)) { - stbi__result_info ri; - float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); - if (hdr_data) - stbi__float_postprocess(hdr_data,x,y,comp,req_comp); - return hdr_data; - } - #endif - data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); - if (data) - return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); - return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); -} - -STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) -{ - stbi__context s; - stbi__start_mem(&s,buffer,len); - return stbi__loadf_main(&s,x,y,comp,req_comp); -} - -STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) -{ - stbi__context s; - stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); - return stbi__loadf_main(&s,x,y,comp,req_comp); -} - -#ifndef STBI_NO_STDIO -STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) -{ - float *result; - FILE *f = stbi__fopen(filename, "rb"); - if (!f) return stbi__errpf("can't fopen", "Unable to open file"); - result = stbi_loadf_from_file(f,x,y,comp,req_comp); - fclose(f); - return result; -} - -STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) -{ - stbi__context s; - stbi__start_file(&s,f); - return stbi__loadf_main(&s,x,y,comp,req_comp); -} -#endif // !STBI_NO_STDIO - -#endif // !STBI_NO_LINEAR - -// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is -// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always -// reports false! - -STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) -{ - #ifndef STBI_NO_HDR - stbi__context s; - stbi__start_mem(&s,buffer,len); - return stbi__hdr_test(&s); - #else - STBI_NOTUSED(buffer); - STBI_NOTUSED(len); - return 0; - #endif -} - -#ifndef STBI_NO_STDIO -STBIDEF int stbi_is_hdr (char const *filename) -{ - FILE *f = stbi__fopen(filename, "rb"); - int result=0; - if (f) { - result = stbi_is_hdr_from_file(f); - fclose(f); - } - return result; -} - -STBIDEF int stbi_is_hdr_from_file(FILE *f) -{ - #ifndef STBI_NO_HDR - stbi__context s; - stbi__start_file(&s,f); - return stbi__hdr_test(&s); - #else - STBI_NOTUSED(f); - return 0; - #endif -} -#endif // !STBI_NO_STDIO - -STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) -{ - #ifndef STBI_NO_HDR - stbi__context s; - stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); - return stbi__hdr_test(&s); - #else - STBI_NOTUSED(clbk); - STBI_NOTUSED(user); - return 0; - #endif -} - -#ifndef STBI_NO_LINEAR -static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; - -STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } -STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } -#endif - -static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; - -STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } -STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } - - -////////////////////////////////////////////////////////////////////////////// -// -// Common code used by all image loaders -// - -enum -{ - STBI__SCAN_load=0, - STBI__SCAN_type, - STBI__SCAN_header -}; - -static void stbi__refill_buffer(stbi__context *s) -{ - int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); - if (n == 0) { - // at end of file, treat same as if from memory, but need to handle case - // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file - s->read_from_callbacks = 0; - s->img_buffer = s->buffer_start; - s->img_buffer_end = s->buffer_start+1; - *s->img_buffer = 0; - } else { - s->img_buffer = s->buffer_start; - s->img_buffer_end = s->buffer_start + n; - } -} - -stbi_inline static stbi_uc stbi__get8(stbi__context *s) -{ - if (s->img_buffer < s->img_buffer_end) - return *s->img_buffer++; - if (s->read_from_callbacks) { - stbi__refill_buffer(s); - return *s->img_buffer++; - } - return 0; -} - -stbi_inline static int stbi__at_eof(stbi__context *s) -{ - if (s->io.read) { - if (!(s->io.eof)(s->io_user_data)) return 0; - // if feof() is true, check if buffer = end - // special case: we've only got the special 0 character at the end - if (s->read_from_callbacks == 0) return 1; - } - - return s->img_buffer >= s->img_buffer_end; -} - -static void stbi__skip(stbi__context *s, int n) -{ - if (n < 0) { - s->img_buffer = s->img_buffer_end; - return; - } - if (s->io.read) { - int blen = (int) (s->img_buffer_end - s->img_buffer); - if (blen < n) { - s->img_buffer = s->img_buffer_end; - (s->io.skip)(s->io_user_data, n - blen); - return; - } - } - s->img_buffer += n; -} - -static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) -{ - if (s->io.read) { - int blen = (int) (s->img_buffer_end - s->img_buffer); - if (blen < n) { - int res, count; - - memcpy(buffer, s->img_buffer, blen); - - count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); - res = (count == (n-blen)); - s->img_buffer = s->img_buffer_end; - return res; - } - } - - if (s->img_buffer+n <= s->img_buffer_end) { - memcpy(buffer, s->img_buffer, n); - s->img_buffer += n; - return 1; - } else - return 0; -} - -static int stbi__get16be(stbi__context *s) -{ - int z = stbi__get8(s); - return (z << 8) + stbi__get8(s); -} - -static stbi__uint32 stbi__get32be(stbi__context *s) -{ - stbi__uint32 z = stbi__get16be(s); - return (z << 16) + stbi__get16be(s); -} - -#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) -// nothing -#else -static int stbi__get16le(stbi__context *s) -{ - int z = stbi__get8(s); - return z + (stbi__get8(s) << 8); -} -#endif - -#ifndef STBI_NO_BMP -static stbi__uint32 stbi__get32le(stbi__context *s) -{ - stbi__uint32 z = stbi__get16le(s); - return z + (stbi__get16le(s) << 16); -} -#endif - -#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings - - -////////////////////////////////////////////////////////////////////////////// -// -// generic converter from built-in img_n to req_comp -// individual types do this automatically as much as possible (e.g. jpeg -// does all cases internally since it needs to colorspace convert anyway, -// and it never has alpha, so very few cases ). png can automatically -// interleave an alpha=255 channel, but falls back to this for other cases -// -// assume data buffer is malloced, so malloc a new one and free that one -// only failure mode is malloc failing - -static stbi_uc stbi__compute_y(int r, int g, int b) -{ - return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); -} - -static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) -{ - int i,j; - unsigned char *good; - - if (req_comp == img_n) return data; - STBI_ASSERT(req_comp >= 1 && req_comp <= 4); - - good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); - if (good == NULL) { - STBI_FREE(data); - return stbi__errpuc("outofmem", "Out of memory"); - } - - for (j=0; j < (int) y; ++j) { - unsigned char *src = data + j * x * img_n ; - unsigned char *dest = good + j * x * req_comp; - - #define STBI__COMBO(a,b) ((a)*8+(b)) - #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) - // convert source image with img_n components to one with req_comp components; - // avoid switch per pixel, so use switch per scanline and massive macros - switch (STBI__COMBO(img_n, req_comp)) { - STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255; } break; - STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; } break; - STBI__CASE(2,1) { dest[0]=src[0]; } break; - STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; - STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; } break; - STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; - STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255; } break; - STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; - STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break; - STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; - default: STBI_ASSERT(0); - } - #undef STBI__CASE - } - - STBI_FREE(data); - return good; -} - -static stbi__uint16 stbi__compute_y_16(int r, int g, int b) -{ - return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); -} - -static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) -{ - int i,j; - stbi__uint16 *good; - - if (req_comp == img_n) return data; - STBI_ASSERT(req_comp >= 1 && req_comp <= 4); - - good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); - if (good == NULL) { - STBI_FREE(data); - return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); - } - - for (j=0; j < (int) y; ++j) { - stbi__uint16 *src = data + j * x * img_n ; - stbi__uint16 *dest = good + j * x * req_comp; - - #define STBI__COMBO(a,b) ((a)*8+(b)) - #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) - // convert source image with img_n components to one with req_comp components; - // avoid switch per pixel, so use switch per scanline and massive macros - switch (STBI__COMBO(img_n, req_comp)) { - STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff; } break; - STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff; } break; - STBI__CASE(2,1) { dest[0]=src[0]; } break; - STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; - STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; } break; - STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff; } break; - STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; - STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break; - STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; - STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break; - STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; } break; - default: STBI_ASSERT(0); - } - #undef STBI__CASE - } - - STBI_FREE(data); - return good; -} - -#ifndef STBI_NO_LINEAR -static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) -{ - int i,k,n; - float *output; - if (!data) return NULL; - output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); - if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } - // compute number of non-alpha components - if (comp & 1) n = comp; else n = comp-1; - for (i=0; i < x*y; ++i) { - for (k=0; k < n; ++k) { - output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); - } - if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; - } - STBI_FREE(data); - return output; -} -#endif - -#ifndef STBI_NO_HDR -#define stbi__float2int(x) ((int) (x)) -static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) -{ - int i,k,n; - stbi_uc *output; - if (!data) return NULL; - output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); - if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } - // compute number of non-alpha components - if (comp & 1) n = comp; else n = comp-1; - for (i=0; i < x*y; ++i) { - for (k=0; k < n; ++k) { - float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; - if (z < 0) z = 0; - if (z > 255) z = 255; - output[i*comp + k] = (stbi_uc) stbi__float2int(z); - } - if (k < comp) { - float z = data[i*comp+k] * 255 + 0.5f; - if (z < 0) z = 0; - if (z > 255) z = 255; - output[i*comp + k] = (stbi_uc) stbi__float2int(z); - } - } - STBI_FREE(data); - return output; -} -#endif - -////////////////////////////////////////////////////////////////////////////// -// -// "baseline" JPEG/JFIF decoder -// -// simple implementation -// - doesn't support delayed output of y-dimension -// - simple interface (only one output format: 8-bit interleaved RGB) -// - doesn't try to recover corrupt jpegs -// - doesn't allow partial loading, loading multiple at once -// - still fast on x86 (copying globals into locals doesn't help x86) -// - allocates lots of intermediate memory (full size of all components) -// - non-interleaved case requires this anyway -// - allows good upsampling (see next) -// high-quality -// - upsampled channels are bilinearly interpolated, even across blocks -// - quality integer IDCT derived from IJG's 'slow' -// performance -// - fast huffman; reasonable integer IDCT -// - some SIMD kernels for common paths on targets with SSE2/NEON -// - uses a lot of intermediate memory, could cache poorly - -#ifndef STBI_NO_JPEG - -// huffman decoding acceleration -#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache - -typedef struct -{ - stbi_uc fast[1 << FAST_BITS]; - // weirdly, repacking this into AoS is a 10% speed loss, instead of a win - stbi__uint16 code[256]; - stbi_uc values[256]; - stbi_uc size[257]; - unsigned int maxcode[18]; - int delta[17]; // old 'firstsymbol' - old 'firstcode' -} stbi__huffman; - -typedef struct -{ - stbi__context *s; - stbi__huffman huff_dc[4]; - stbi__huffman huff_ac[4]; - stbi__uint16 dequant[4][64]; - stbi__int16 fast_ac[4][1 << FAST_BITS]; - -// sizes for components, interleaved MCUs - int img_h_max, img_v_max; - int img_mcu_x, img_mcu_y; - int img_mcu_w, img_mcu_h; - -// definition of jpeg image component - struct - { - int id; - int h,v; - int tq; - int hd,ha; - int dc_pred; - - int x,y,w2,h2; - stbi_uc *data; - void *raw_data, *raw_coeff; - stbi_uc *linebuf; - short *coeff; // progressive only - int coeff_w, coeff_h; // number of 8x8 coefficient blocks - } img_comp[4]; - - stbi__uint32 code_buffer; // jpeg entropy-coded buffer - int code_bits; // number of valid bits - unsigned char marker; // marker seen while filling entropy buffer - int nomore; // flag if we saw a marker so must stop - - int progressive; - int spec_start; - int spec_end; - int succ_high; - int succ_low; - int eob_run; - int jfif; - int app14_color_transform; // Adobe APP14 tag - int rgb; - - int scan_n, order[4]; - int restart_interval, todo; - -// kernels - void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); - void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); - stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); -} stbi__jpeg; - -static int stbi__build_huffman(stbi__huffman *h, int *count) -{ - int i,j,k=0,code; - // build size list for each symbol (from JPEG spec) - for (i=0; i < 16; ++i) - for (j=0; j < count[i]; ++j) - h->size[k++] = (stbi_uc) (i+1); - h->size[k] = 0; - - // compute actual symbols (from jpeg spec) - code = 0; - k = 0; - for(j=1; j <= 16; ++j) { - // compute delta to add to code to compute symbol id - h->delta[j] = k - code; - if (h->size[k] == j) { - while (h->size[k] == j) - h->code[k++] = (stbi__uint16) (code++); - if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG"); - } - // compute largest code + 1 for this size, preshifted as needed later - h->maxcode[j] = code << (16-j); - code <<= 1; - } - h->maxcode[j] = 0xffffffff; - - // build non-spec acceleration table; 255 is flag for not-accelerated - memset(h->fast, 255, 1 << FAST_BITS); - for (i=0; i < k; ++i) { - int s = h->size[i]; - if (s <= FAST_BITS) { - int c = h->code[i] << (FAST_BITS-s); - int m = 1 << (FAST_BITS-s); - for (j=0; j < m; ++j) { - h->fast[c+j] = (stbi_uc) i; - } - } - } - return 1; -} - -// build a table that decodes both magnitude and value of small ACs in -// one go. -static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) -{ - int i; - for (i=0; i < (1 << FAST_BITS); ++i) { - stbi_uc fast = h->fast[i]; - fast_ac[i] = 0; - if (fast < 255) { - int rs = h->values[fast]; - int run = (rs >> 4) & 15; - int magbits = rs & 15; - int len = h->size[fast]; - - if (magbits && len + magbits <= FAST_BITS) { - // magnitude code followed by receive_extend code - int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); - int m = 1 << (magbits - 1); - if (k < m) k += (~0U << magbits) + 1; - // if the result is small enough, we can fit it in fast_ac table - if (k >= -128 && k <= 127) - fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits)); - } - } - } -} - -static void stbi__grow_buffer_unsafe(stbi__jpeg *j) -{ - do { - int b = j->nomore ? 0 : stbi__get8(j->s); - if (b == 0xff) { - int c = stbi__get8(j->s); - while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes - if (c != 0) { - j->marker = (unsigned char) c; - j->nomore = 1; - return; - } - } - j->code_buffer |= b << (24 - j->code_bits); - j->code_bits += 8; - } while (j->code_bits <= 24); -} - -// (1 << n) - 1 -static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; - -// decode a jpeg huffman value from the bitstream -stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) -{ - unsigned int temp; - int c,k; - - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - - // look at the top FAST_BITS and determine what symbol ID it is, - // if the code is <= FAST_BITS - c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); - k = h->fast[c]; - if (k < 255) { - int s = h->size[k]; - if (s > j->code_bits) - return -1; - j->code_buffer <<= s; - j->code_bits -= s; - return h->values[k]; - } - - // naive test is to shift the code_buffer down so k bits are - // valid, then test against maxcode. To speed this up, we've - // preshifted maxcode left so that it has (16-k) 0s at the - // end; in other words, regardless of the number of bits, it - // wants to be compared against something shifted to have 16; - // that way we don't need to shift inside the loop. - temp = j->code_buffer >> 16; - for (k=FAST_BITS+1 ; ; ++k) - if (temp < h->maxcode[k]) - break; - if (k == 17) { - // error! code not found - j->code_bits -= 16; - return -1; - } - - if (k > j->code_bits) - return -1; - - // convert the huffman code to the symbol id - c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; - STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); - - // convert the id to a symbol - j->code_bits -= k; - j->code_buffer <<= k; - return h->values[c]; -} - -// bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); - - sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB - k = stbi_lrot(j->code_buffer, n); - STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask))); - j->code_buffer = k & ~stbi__bmask[n]; - k &= stbi__bmask[n]; - j->code_bits -= n; - return k + (stbi__jbias[n] & ~sgn); -} - -// get some unsigned bits -stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) -{ - unsigned int k; - if (j->code_bits < n) stbi__grow_buffer_unsafe(j); - k = stbi_lrot(j->code_buffer, n); - j->code_buffer = k & ~stbi__bmask[n]; - k &= stbi__bmask[n]; - j->code_bits -= n; - return k; -} - -stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) -{ - unsigned int k; - if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); - k = j->code_buffer; - j->code_buffer <<= 1; - --j->code_bits; - return k & 0x80000000; -} - -// given a value that's at position X in the zigzag stream, -// where does it appear in the 8x8 matrix coded as row-major? -static stbi_uc stbi__jpeg_dezigzag[64+15] = -{ - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, - 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, - 35, 42, 49, 56, 57, 50, 43, 36, - 29, 22, 15, 23, 30, 37, 44, 51, - 58, 59, 52, 45, 38, 31, 39, 46, - 53, 60, 61, 54, 47, 55, 62, 63, - // let corrupt input sample past end - 63, 63, 63, 63, 63, 63, 63, 63, - 63, 63, 63, 63, 63, 63, 63 -}; - -// decode one 64-entry block-- -static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) -{ - int diff,dc,k; - int t; - - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - t = stbi__jpeg_huff_decode(j, hdc); - if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - - // 0 all the ac values now so we can do it 32-bits at a time - memset(data,0,64*sizeof(data[0])); - - diff = t ? stbi__extend_receive(j, t) : 0; - dc = j->img_comp[b].dc_pred + diff; - j->img_comp[b].dc_pred = dc; - data[0] = (short) (dc * dequant[0]); - - // decode AC components, see JPEG spec - k = 1; - do { - unsigned int zig; - int c,r,s; - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); - r = fac[c]; - if (r) { // fast-AC path - k += (r >> 4) & 15; // run - s = r & 15; // combined length - j->code_buffer <<= s; - j->code_bits -= s; - // decode into unzigzag'd location - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) ((r >> 8) * dequant[zig]); - } else { - int rs = stbi__jpeg_huff_decode(j, hac); - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (rs != 0xf0) break; // end block - k += 16; - } else { - k += r; - // decode into unzigzag'd location - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); - } - } - } while (k < 64); - return 1; -} - -static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) -{ - int diff,dc; - int t; - if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); - - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - - if (j->succ_high == 0) { - // first scan for DC coefficient, must be first - memset(data,0,64*sizeof(data[0])); // 0 all the ac values now - t = stbi__jpeg_huff_decode(j, hdc); - diff = t ? stbi__extend_receive(j, t) : 0; - - dc = j->img_comp[b].dc_pred + diff; - j->img_comp[b].dc_pred = dc; - data[0] = (short) (dc << j->succ_low); - } else { - // refinement scan for DC coefficient - if (stbi__jpeg_get_bit(j)) - data[0] += (short) (1 << j->succ_low); - } - return 1; -} - -// @OPTIMIZE: store non-zigzagged during the decode passes, -// and only de-zigzag when dequantizing -static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) -{ - int k; - if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); - - if (j->succ_high == 0) { - int shift = j->succ_low; - - if (j->eob_run) { - --j->eob_run; - return 1; - } - - k = j->spec_start; - do { - unsigned int zig; - int c,r,s; - if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); - c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); - r = fac[c]; - if (r) { // fast-AC path - k += (r >> 4) & 15; // run - s = r & 15; // combined length - j->code_buffer <<= s; - j->code_bits -= s; - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) ((r >> 8) << shift); - } else { - int rs = stbi__jpeg_huff_decode(j, hac); - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (r < 15) { - j->eob_run = (1 << r); - if (r) - j->eob_run += stbi__jpeg_get_bits(j, r); - --j->eob_run; - break; - } - k += 16; - } else { - k += r; - zig = stbi__jpeg_dezigzag[k++]; - data[zig] = (short) (stbi__extend_receive(j,s) << shift); - } - } - } while (k <= j->spec_end); - } else { - // refinement scan for these AC coefficients - - short bit = (short) (1 << j->succ_low); - - if (j->eob_run) { - --j->eob_run; - for (k = j->spec_start; k <= j->spec_end; ++k) { - short *p = &data[stbi__jpeg_dezigzag[k]]; - if (*p != 0) - if (stbi__jpeg_get_bit(j)) - if ((*p & bit)==0) { - if (*p > 0) - *p += bit; - else - *p -= bit; - } - } - } else { - k = j->spec_start; - do { - int r,s; - int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh - if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); - s = rs & 15; - r = rs >> 4; - if (s == 0) { - if (r < 15) { - j->eob_run = (1 << r) - 1; - if (r) - j->eob_run += stbi__jpeg_get_bits(j, r); - r = 64; // force end of block - } else { - // r=15 s=0 should write 16 0s, so we just do - // a run of 15 0s and then write s (which is 0), - // so we don't have to do anything special here - } - } else { - if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); - // sign bit - if (stbi__jpeg_get_bit(j)) - s = bit; - else - s = -bit; - } - - // advance by r - while (k <= j->spec_end) { - short *p = &data[stbi__jpeg_dezigzag[k++]]; - if (*p != 0) { - if (stbi__jpeg_get_bit(j)) - if ((*p & bit)==0) { - if (*p > 0) - *p += bit; - else - *p -= bit; - } - } else { - if (r == 0) { - *p = (short) s; - break; - } - --r; - } - } - } while (k <= j->spec_end); - } - } - return 1; -} - -// take a -128..127 value and stbi__clamp it and convert to 0..255 -stbi_inline static stbi_uc stbi__clamp(int x) -{ - // trick to use a single test to catch both cases - if ((unsigned int) x > 255) { - if (x < 0) return 0; - if (x > 255) return 255; - } - return (stbi_uc) x; -} - -#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) -#define stbi__fsh(x) ((x) << 12) - -// derived from jidctint -- DCT_ISLOW -#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ - int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ - p2 = s2; \ - p3 = s6; \ - p1 = (p2+p3) * stbi__f2f(0.5411961f); \ - t2 = p1 + p3*stbi__f2f(-1.847759065f); \ - t3 = p1 + p2*stbi__f2f( 0.765366865f); \ - p2 = s0; \ - p3 = s4; \ - t0 = stbi__fsh(p2+p3); \ - t1 = stbi__fsh(p2-p3); \ - x0 = t0+t3; \ - x3 = t0-t3; \ - x1 = t1+t2; \ - x2 = t1-t2; \ - t0 = s7; \ - t1 = s5; \ - t2 = s3; \ - t3 = s1; \ - p3 = t0+t2; \ - p4 = t1+t3; \ - p1 = t0+t3; \ - p2 = t1+t2; \ - p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ - t0 = t0*stbi__f2f( 0.298631336f); \ - t1 = t1*stbi__f2f( 2.053119869f); \ - t2 = t2*stbi__f2f( 3.072711026f); \ - t3 = t3*stbi__f2f( 1.501321110f); \ - p1 = p5 + p1*stbi__f2f(-0.899976223f); \ - p2 = p5 + p2*stbi__f2f(-2.562915447f); \ - p3 = p3*stbi__f2f(-1.961570560f); \ - p4 = p4*stbi__f2f(-0.390180644f); \ - t3 += p1+p4; \ - t2 += p2+p3; \ - t1 += p2+p4; \ - t0 += p1+p3; - -static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) -{ - int i,val[64],*v=val; - stbi_uc *o; - short *d = data; - - // columns - for (i=0; i < 8; ++i,++d, ++v) { - // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing - if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 - && d[40]==0 && d[48]==0 && d[56]==0) { - // no shortcut 0 seconds - // (1|2|3|4|5|6|7)==0 0 seconds - // all separate -0.047 seconds - // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds - int dcterm = d[0] << 2; - v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; - } else { - STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) - // constants scaled things up by 1<<12; let's bring them back - // down, but keep 2 extra bits of precision - x0 += 512; x1 += 512; x2 += 512; x3 += 512; - v[ 0] = (x0+t3) >> 10; - v[56] = (x0-t3) >> 10; - v[ 8] = (x1+t2) >> 10; - v[48] = (x1-t2) >> 10; - v[16] = (x2+t1) >> 10; - v[40] = (x2-t1) >> 10; - v[24] = (x3+t0) >> 10; - v[32] = (x3-t0) >> 10; - } - } - - for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { - // no fast case since the first 1D IDCT spread components out - STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) - // constants scaled things up by 1<<12, plus we had 1<<2 from first - // loop, plus horizontal and vertical each scale by sqrt(8) so together - // we've got an extra 1<<3, so 1<<17 total we need to remove. - // so we want to round that, which means adding 0.5 * 1<<17, - // aka 65536. Also, we'll end up with -128 to 127 that we want - // to encode as 0..255 by adding 128, so we'll add that before the shift - x0 += 65536 + (128<<17); - x1 += 65536 + (128<<17); - x2 += 65536 + (128<<17); - x3 += 65536 + (128<<17); - // tried computing the shifts into temps, or'ing the temps to see - // if any were out of range, but that was slower - o[0] = stbi__clamp((x0+t3) >> 17); - o[7] = stbi__clamp((x0-t3) >> 17); - o[1] = stbi__clamp((x1+t2) >> 17); - o[6] = stbi__clamp((x1-t2) >> 17); - o[2] = stbi__clamp((x2+t1) >> 17); - o[5] = stbi__clamp((x2-t1) >> 17); - o[3] = stbi__clamp((x3+t0) >> 17); - o[4] = stbi__clamp((x3-t0) >> 17); - } -} - -#ifdef STBI_SSE2 -// sse2 integer IDCT. not the fastest possible implementation but it -// produces bit-identical results to the generic C version so it's -// fully "transparent". -static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) -{ - // This is constructed to match our regular (generic) integer IDCT exactly. - __m128i row0, row1, row2, row3, row4, row5, row6, row7; - __m128i tmp; - - // dot product constant: even elems=x, odd elems=y - #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) - - // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) - // out(1) = c1[even]*x + c1[odd]*y - #define dct_rot(out0,out1, x,y,c0,c1) \ - __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ - __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ - __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ - __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ - __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ - __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) - - // out = in << 12 (in 16-bit, out 32-bit) - #define dct_widen(out, in) \ - __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ - __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) - - // wide add - #define dct_wadd(out, a, b) \ - __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ - __m128i out##_h = _mm_add_epi32(a##_h, b##_h) - - // wide sub - #define dct_wsub(out, a, b) \ - __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ - __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) - - // butterfly a/b, add bias, then shift by "s" and pack - #define dct_bfly32o(out0, out1, a,b,bias,s) \ - { \ - __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ - __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ - dct_wadd(sum, abiased, b); \ - dct_wsub(dif, abiased, b); \ - out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ - out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ - } - - // 8-bit interleave step (for transposes) - #define dct_interleave8(a, b) \ - tmp = a; \ - a = _mm_unpacklo_epi8(a, b); \ - b = _mm_unpackhi_epi8(tmp, b) - - // 16-bit interleave step (for transposes) - #define dct_interleave16(a, b) \ - tmp = a; \ - a = _mm_unpacklo_epi16(a, b); \ - b = _mm_unpackhi_epi16(tmp, b) - - #define dct_pass(bias,shift) \ - { \ - /* even part */ \ - dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ - __m128i sum04 = _mm_add_epi16(row0, row4); \ - __m128i dif04 = _mm_sub_epi16(row0, row4); \ - dct_widen(t0e, sum04); \ - dct_widen(t1e, dif04); \ - dct_wadd(x0, t0e, t3e); \ - dct_wsub(x3, t0e, t3e); \ - dct_wadd(x1, t1e, t2e); \ - dct_wsub(x2, t1e, t2e); \ - /* odd part */ \ - dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ - dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ - __m128i sum17 = _mm_add_epi16(row1, row7); \ - __m128i sum35 = _mm_add_epi16(row3, row5); \ - dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ - dct_wadd(x4, y0o, y4o); \ - dct_wadd(x5, y1o, y5o); \ - dct_wadd(x6, y2o, y5o); \ - dct_wadd(x7, y3o, y4o); \ - dct_bfly32o(row0,row7, x0,x7,bias,shift); \ - dct_bfly32o(row1,row6, x1,x6,bias,shift); \ - dct_bfly32o(row2,row5, x2,x5,bias,shift); \ - dct_bfly32o(row3,row4, x3,x4,bias,shift); \ - } - - __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); - __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); - __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); - __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); - __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); - __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); - __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); - __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); - - // rounding biases in column/row passes, see stbi__idct_block for explanation. - __m128i bias_0 = _mm_set1_epi32(512); - __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); - - // load - row0 = _mm_load_si128((const __m128i *) (data + 0*8)); - row1 = _mm_load_si128((const __m128i *) (data + 1*8)); - row2 = _mm_load_si128((const __m128i *) (data + 2*8)); - row3 = _mm_load_si128((const __m128i *) (data + 3*8)); - row4 = _mm_load_si128((const __m128i *) (data + 4*8)); - row5 = _mm_load_si128((const __m128i *) (data + 5*8)); - row6 = _mm_load_si128((const __m128i *) (data + 6*8)); - row7 = _mm_load_si128((const __m128i *) (data + 7*8)); - - // column pass - dct_pass(bias_0, 10); - - { - // 16bit 8x8 transpose pass 1 - dct_interleave16(row0, row4); - dct_interleave16(row1, row5); - dct_interleave16(row2, row6); - dct_interleave16(row3, row7); - - // transpose pass 2 - dct_interleave16(row0, row2); - dct_interleave16(row1, row3); - dct_interleave16(row4, row6); - dct_interleave16(row5, row7); - - // transpose pass 3 - dct_interleave16(row0, row1); - dct_interleave16(row2, row3); - dct_interleave16(row4, row5); - dct_interleave16(row6, row7); - } - - // row pass - dct_pass(bias_1, 17); - - { - // pack - __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 - __m128i p1 = _mm_packus_epi16(row2, row3); - __m128i p2 = _mm_packus_epi16(row4, row5); - __m128i p3 = _mm_packus_epi16(row6, row7); - - // 8bit 8x8 transpose pass 1 - dct_interleave8(p0, p2); // a0e0a1e1... - dct_interleave8(p1, p3); // c0g0c1g1... - - // transpose pass 2 - dct_interleave8(p0, p1); // a0c0e0g0... - dct_interleave8(p2, p3); // b0d0f0h0... - - // transpose pass 3 - dct_interleave8(p0, p2); // a0b0c0d0... - dct_interleave8(p1, p3); // a4b4c4d4... - - // store - _mm_storel_epi64((__m128i *) out, p0); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p2); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p1); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; - _mm_storel_epi64((__m128i *) out, p3); out += out_stride; - _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); - } - -#undef dct_const -#undef dct_rot -#undef dct_widen -#undef dct_wadd -#undef dct_wsub -#undef dct_bfly32o -#undef dct_interleave8 -#undef dct_interleave16 -#undef dct_pass -} - -#endif // STBI_SSE2 - -#ifdef STBI_NEON - -// NEON integer IDCT. should produce bit-identical -// results to the generic C version. -static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) -{ - int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; - - int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); - int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); - int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); - int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); - int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); - int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); - int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); - int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); - int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); - int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); - int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); - int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); - -#define dct_long_mul(out, inq, coeff) \ - int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ - int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) - -#define dct_long_mac(out, acc, inq, coeff) \ - int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ - int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) - -#define dct_widen(out, inq) \ - int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ - int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) - -// wide add -#define dct_wadd(out, a, b) \ - int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ - int32x4_t out##_h = vaddq_s32(a##_h, b##_h) - -// wide sub -#define dct_wsub(out, a, b) \ - int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ - int32x4_t out##_h = vsubq_s32(a##_h, b##_h) - -// butterfly a/b, then shift using "shiftop" by "s" and pack -#define dct_bfly32o(out0,out1, a,b,shiftop,s) \ - { \ - dct_wadd(sum, a, b); \ - dct_wsub(dif, a, b); \ - out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ - out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ - } - -#define dct_pass(shiftop, shift) \ - { \ - /* even part */ \ - int16x8_t sum26 = vaddq_s16(row2, row6); \ - dct_long_mul(p1e, sum26, rot0_0); \ - dct_long_mac(t2e, p1e, row6, rot0_1); \ - dct_long_mac(t3e, p1e, row2, rot0_2); \ - int16x8_t sum04 = vaddq_s16(row0, row4); \ - int16x8_t dif04 = vsubq_s16(row0, row4); \ - dct_widen(t0e, sum04); \ - dct_widen(t1e, dif04); \ - dct_wadd(x0, t0e, t3e); \ - dct_wsub(x3, t0e, t3e); \ - dct_wadd(x1, t1e, t2e); \ - dct_wsub(x2, t1e, t2e); \ - /* odd part */ \ - int16x8_t sum15 = vaddq_s16(row1, row5); \ - int16x8_t sum17 = vaddq_s16(row1, row7); \ - int16x8_t sum35 = vaddq_s16(row3, row5); \ - int16x8_t sum37 = vaddq_s16(row3, row7); \ - int16x8_t sumodd = vaddq_s16(sum17, sum35); \ - dct_long_mul(p5o, sumodd, rot1_0); \ - dct_long_mac(p1o, p5o, sum17, rot1_1); \ - dct_long_mac(p2o, p5o, sum35, rot1_2); \ - dct_long_mul(p3o, sum37, rot2_0); \ - dct_long_mul(p4o, sum15, rot2_1); \ - dct_wadd(sump13o, p1o, p3o); \ - dct_wadd(sump24o, p2o, p4o); \ - dct_wadd(sump23o, p2o, p3o); \ - dct_wadd(sump14o, p1o, p4o); \ - dct_long_mac(x4, sump13o, row7, rot3_0); \ - dct_long_mac(x5, sump24o, row5, rot3_1); \ - dct_long_mac(x6, sump23o, row3, rot3_2); \ - dct_long_mac(x7, sump14o, row1, rot3_3); \ - dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ - dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ - dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ - dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ - } - - // load - row0 = vld1q_s16(data + 0*8); - row1 = vld1q_s16(data + 1*8); - row2 = vld1q_s16(data + 2*8); - row3 = vld1q_s16(data + 3*8); - row4 = vld1q_s16(data + 4*8); - row5 = vld1q_s16(data + 5*8); - row6 = vld1q_s16(data + 6*8); - row7 = vld1q_s16(data + 7*8); - - // add DC bias - row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); - - // column pass - dct_pass(vrshrn_n_s32, 10); - - // 16bit 8x8 transpose - { -// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. -// whether compilers actually get this is another story, sadly. -#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } -#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } -#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } - - // pass 1 - dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 - dct_trn16(row2, row3); - dct_trn16(row4, row5); - dct_trn16(row6, row7); - - // pass 2 - dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 - dct_trn32(row1, row3); - dct_trn32(row4, row6); - dct_trn32(row5, row7); - - // pass 3 - dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 - dct_trn64(row1, row5); - dct_trn64(row2, row6); - dct_trn64(row3, row7); - -#undef dct_trn16 -#undef dct_trn32 -#undef dct_trn64 - } - - // row pass - // vrshrn_n_s32 only supports shifts up to 16, we need - // 17. so do a non-rounding shift of 16 first then follow - // up with a rounding shift by 1. - dct_pass(vshrn_n_s32, 16); - - { - // pack and round - uint8x8_t p0 = vqrshrun_n_s16(row0, 1); - uint8x8_t p1 = vqrshrun_n_s16(row1, 1); - uint8x8_t p2 = vqrshrun_n_s16(row2, 1); - uint8x8_t p3 = vqrshrun_n_s16(row3, 1); - uint8x8_t p4 = vqrshrun_n_s16(row4, 1); - uint8x8_t p5 = vqrshrun_n_s16(row5, 1); - uint8x8_t p6 = vqrshrun_n_s16(row6, 1); - uint8x8_t p7 = vqrshrun_n_s16(row7, 1); - - // again, these can translate into one instruction, but often don't. -#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } -#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } -#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } - - // sadly can't use interleaved stores here since we only write - // 8 bytes to each scan line! - - // 8x8 8-bit transpose pass 1 - dct_trn8_8(p0, p1); - dct_trn8_8(p2, p3); - dct_trn8_8(p4, p5); - dct_trn8_8(p6, p7); - - // pass 2 - dct_trn8_16(p0, p2); - dct_trn8_16(p1, p3); - dct_trn8_16(p4, p6); - dct_trn8_16(p5, p7); - - // pass 3 - dct_trn8_32(p0, p4); - dct_trn8_32(p1, p5); - dct_trn8_32(p2, p6); - dct_trn8_32(p3, p7); - - // store - vst1_u8(out, p0); out += out_stride; - vst1_u8(out, p1); out += out_stride; - vst1_u8(out, p2); out += out_stride; - vst1_u8(out, p3); out += out_stride; - vst1_u8(out, p4); out += out_stride; - vst1_u8(out, p5); out += out_stride; - vst1_u8(out, p6); out += out_stride; - vst1_u8(out, p7); - -#undef dct_trn8_8 -#undef dct_trn8_16 -#undef dct_trn8_32 - } - -#undef dct_long_mul -#undef dct_long_mac -#undef dct_widen -#undef dct_wadd -#undef dct_wsub -#undef dct_bfly32o -#undef dct_pass -} - -#endif // STBI_NEON - -#define STBI__MARKER_none 0xff -// if there's a pending marker from the entropy stream, return that -// otherwise, fetch from the stream and get a marker. if there's no -// marker, return 0xff, which is never a valid marker value -static stbi_uc stbi__get_marker(stbi__jpeg *j) -{ - stbi_uc x; - if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } - x = stbi__get8(j->s); - if (x != 0xff) return STBI__MARKER_none; - while (x == 0xff) - x = stbi__get8(j->s); // consume repeated 0xff fill bytes - return x; -} - -// in each scan, we'll have scan_n components, and the order -// of the components is specified by order[] -#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) - -// after a restart interval, stbi__jpeg_reset the entropy decoder and -// the dc prediction -static void stbi__jpeg_reset(stbi__jpeg *j) -{ - j->code_bits = 0; - j->code_buffer = 0; - j->nomore = 0; - j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; - j->marker = STBI__MARKER_none; - j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; - j->eob_run = 0; - // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, - // since we don't even allow 1<<30 pixels -} - -static int stbi__parse_entropy_coded_data(stbi__jpeg *z) -{ - stbi__jpeg_reset(z); - if (!z->progressive) { - if (z->scan_n == 1) { - int i,j; - STBI_SIMD_ALIGN(short, data[64]); - int n = z->order[0]; - // non-interleaved data, we just need to process one block at a time, - // in trivial scanline order - // number of blocks to do just depends on how many actual "pixels" this - // component has, independent of interleaved MCU blocking and such - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); - // every data block is an MCU, so countdown the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - // if it's NOT a restart, then just bail, so we get corrupt data - // rather than no data - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } else { // interleaved - int i,j,k,x,y; - STBI_SIMD_ALIGN(short, data[64]); - for (j=0; j < z->img_mcu_y; ++j) { - for (i=0; i < z->img_mcu_x; ++i) { - // scan an interleaved mcu... process scan_n components in order - for (k=0; k < z->scan_n; ++k) { - int n = z->order[k]; - // scan out an mcu's worth of this component; that's just determined - // by the basic H and V specified for the component - for (y=0; y < z->img_comp[n].v; ++y) { - for (x=0; x < z->img_comp[n].h; ++x) { - int x2 = (i*z->img_comp[n].h + x)*8; - int y2 = (j*z->img_comp[n].v + y)*8; - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); - } - } - } - // after all interleaved components, that's an interleaved MCU, - // so now count down the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } - } else { - if (z->scan_n == 1) { - int i,j; - int n = z->order[0]; - // non-interleaved data, we just need to process one block at a time, - // in trivial scanline order - // number of blocks to do just depends on how many actual "pixels" this - // component has, independent of interleaved MCU blocking and such - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); - if (z->spec_start == 0) { - if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) - return 0; - } else { - int ha = z->img_comp[n].ha; - if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) - return 0; - } - // every data block is an MCU, so countdown the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } else { // interleaved - int i,j,k,x,y; - for (j=0; j < z->img_mcu_y; ++j) { - for (i=0; i < z->img_mcu_x; ++i) { - // scan an interleaved mcu... process scan_n components in order - for (k=0; k < z->scan_n; ++k) { - int n = z->order[k]; - // scan out an mcu's worth of this component; that's just determined - // by the basic H and V specified for the component - for (y=0; y < z->img_comp[n].v; ++y) { - for (x=0; x < z->img_comp[n].h; ++x) { - int x2 = (i*z->img_comp[n].h + x); - int y2 = (j*z->img_comp[n].v + y); - short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); - if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) - return 0; - } - } - } - // after all interleaved components, that's an interleaved MCU, - // so now count down the restart interval - if (--z->todo <= 0) { - if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); - if (!STBI__RESTART(z->marker)) return 1; - stbi__jpeg_reset(z); - } - } - } - return 1; - } - } -} - -static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) -{ - int i; - for (i=0; i < 64; ++i) - data[i] *= dequant[i]; -} - -static void stbi__jpeg_finish(stbi__jpeg *z) -{ - if (z->progressive) { - // dequantize and idct the data - int i,j,n; - for (n=0; n < z->s->img_n; ++n) { - int w = (z->img_comp[n].x+7) >> 3; - int h = (z->img_comp[n].y+7) >> 3; - for (j=0; j < h; ++j) { - for (i=0; i < w; ++i) { - short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); - stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); - z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); - } - } - } - } -} - -static int stbi__process_marker(stbi__jpeg *z, int m) -{ - int L; - switch (m) { - case STBI__MARKER_none: // no marker found - return stbi__err("expected marker","Corrupt JPEG"); - - case 0xDD: // DRI - specify restart interval - if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); - z->restart_interval = stbi__get16be(z->s); - return 1; - - case 0xDB: // DQT - define quantization table - L = stbi__get16be(z->s)-2; - while (L > 0) { - int q = stbi__get8(z->s); - int p = q >> 4, sixteen = (p != 0); - int t = q & 15,i; - if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); - if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); - - for (i=0; i < 64; ++i) - z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); - L -= (sixteen ? 129 : 65); - } - return L==0; - - case 0xC4: // DHT - define huffman table - L = stbi__get16be(z->s)-2; - while (L > 0) { - stbi_uc *v; - int sizes[16],i,n=0; - int q = stbi__get8(z->s); - int tc = q >> 4; - int th = q & 15; - if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); - for (i=0; i < 16; ++i) { - sizes[i] = stbi__get8(z->s); - n += sizes[i]; - } - L -= 17; - if (tc == 0) { - if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; - v = z->huff_dc[th].values; - } else { - if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; - v = z->huff_ac[th].values; - } - for (i=0; i < n; ++i) - v[i] = stbi__get8(z->s); - if (tc != 0) - stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); - L -= n; - } - return L==0; - } - - // check for comment block or APP blocks - if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { - L = stbi__get16be(z->s); - if (L < 2) { - if (m == 0xFE) - return stbi__err("bad COM len","Corrupt JPEG"); - else - return stbi__err("bad APP len","Corrupt JPEG"); - } - L -= 2; - - if (m == 0xE0 && L >= 5) { // JFIF APP0 segment - static const unsigned char tag[5] = {'J','F','I','F','\0'}; - int ok = 1; - int i; - for (i=0; i < 5; ++i) - if (stbi__get8(z->s) != tag[i]) - ok = 0; - L -= 5; - if (ok) - z->jfif = 1; - } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment - static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; - int ok = 1; - int i; - for (i=0; i < 6; ++i) - if (stbi__get8(z->s) != tag[i]) - ok = 0; - L -= 6; - if (ok) { - stbi__get8(z->s); // version - stbi__get16be(z->s); // flags0 - stbi__get16be(z->s); // flags1 - z->app14_color_transform = stbi__get8(z->s); // color transform - L -= 6; - } - } - - stbi__skip(z->s, L); - return 1; - } - - return stbi__err("unknown marker","Corrupt JPEG"); -} - -// after we see SOS -static int stbi__process_scan_header(stbi__jpeg *z) -{ - int i; - int Ls = stbi__get16be(z->s); - z->scan_n = stbi__get8(z->s); - if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); - if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); - for (i=0; i < z->scan_n; ++i) { - int id = stbi__get8(z->s), which; - int q = stbi__get8(z->s); - for (which = 0; which < z->s->img_n; ++which) - if (z->img_comp[which].id == id) - break; - if (which == z->s->img_n) return 0; // no match - z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); - z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); - z->order[i] = which; - } - - { - int aa; - z->spec_start = stbi__get8(z->s); - z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 - aa = stbi__get8(z->s); - z->succ_high = (aa >> 4); - z->succ_low = (aa & 15); - if (z->progressive) { - if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) - return stbi__err("bad SOS", "Corrupt JPEG"); - } else { - if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); - if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); - z->spec_end = 63; - } - } - - return 1; -} - -static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) -{ - int i; - for (i=0; i < ncomp; ++i) { - if (z->img_comp[i].raw_data) { - STBI_FREE(z->img_comp[i].raw_data); - z->img_comp[i].raw_data = NULL; - z->img_comp[i].data = NULL; - } - if (z->img_comp[i].raw_coeff) { - STBI_FREE(z->img_comp[i].raw_coeff); - z->img_comp[i].raw_coeff = 0; - z->img_comp[i].coeff = 0; - } - if (z->img_comp[i].linebuf) { - STBI_FREE(z->img_comp[i].linebuf); - z->img_comp[i].linebuf = NULL; - } - } - return why; -} - -static int stbi__process_frame_header(stbi__jpeg *z, int scan) -{ - stbi__context *s = z->s; - int Lf,p,i,q, h_max=1,v_max=1,c; - Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG - p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline - s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG - s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires - c = stbi__get8(s); - if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); - s->img_n = c; - for (i=0; i < c; ++i) { - z->img_comp[i].data = NULL; - z->img_comp[i].linebuf = NULL; - } - - if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); - - z->rgb = 0; - for (i=0; i < s->img_n; ++i) { - static unsigned char rgb[3] = { 'R', 'G', 'B' }; - z->img_comp[i].id = stbi__get8(s); - if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) - ++z->rgb; - q = stbi__get8(s); - z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); - z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); - z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); - } - - if (scan != STBI__SCAN_load) return 1; - - if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); - - for (i=0; i < s->img_n; ++i) { - if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; - if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; - } - - // compute interleaved mcu info - z->img_h_max = h_max; - z->img_v_max = v_max; - z->img_mcu_w = h_max * 8; - z->img_mcu_h = v_max * 8; - // these sizes can't be more than 17 bits - z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; - z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; - - for (i=0; i < s->img_n; ++i) { - // number of effective pixels (e.g. for non-interleaved MCU) - z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; - z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; - // to simplify generation, we'll allocate enough memory to decode - // the bogus oversized data from using interleaved MCUs and their - // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't - // discard the extra data until colorspace conversion - // - // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) - // so these muls can't overflow with 32-bit ints (which we require) - z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; - z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; - z->img_comp[i].coeff = 0; - z->img_comp[i].raw_coeff = 0; - z->img_comp[i].linebuf = NULL; - z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); - if (z->img_comp[i].raw_data == NULL) - return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); - // align blocks for idct using mmx/sse - z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); - if (z->progressive) { - // w2, h2 are multiples of 8 (see above) - z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; - z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; - z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); - if (z->img_comp[i].raw_coeff == NULL) - return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); - z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); - } - } - - return 1; -} - -// use comparisons since in some cases we handle more than one case (e.g. SOF) -#define stbi__DNL(x) ((x) == 0xdc) -#define stbi__SOI(x) ((x) == 0xd8) -#define stbi__EOI(x) ((x) == 0xd9) -#define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) -#define stbi__SOS(x) ((x) == 0xda) - -#define stbi__SOF_progressive(x) ((x) == 0xc2) - -static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) -{ - int m; - z->jfif = 0; - z->app14_color_transform = -1; // valid values are 0,1,2 - z->marker = STBI__MARKER_none; // initialize cached marker to empty - m = stbi__get_marker(z); - if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); - if (scan == STBI__SCAN_type) return 1; - m = stbi__get_marker(z); - while (!stbi__SOF(m)) { - if (!stbi__process_marker(z,m)) return 0; - m = stbi__get_marker(z); - while (m == STBI__MARKER_none) { - // some files have extra padding after their blocks, so ok, we'll scan - if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); - m = stbi__get_marker(z); - } - } - z->progressive = stbi__SOF_progressive(m); - if (!stbi__process_frame_header(z, scan)) return 0; - return 1; -} - -// decode image to YCbCr format -static int stbi__decode_jpeg_image(stbi__jpeg *j) -{ - int m; - for (m = 0; m < 4; m++) { - j->img_comp[m].raw_data = NULL; - j->img_comp[m].raw_coeff = NULL; - } - j->restart_interval = 0; - if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; - m = stbi__get_marker(j); - while (!stbi__EOI(m)) { - if (stbi__SOS(m)) { - if (!stbi__process_scan_header(j)) return 0; - if (!stbi__parse_entropy_coded_data(j)) return 0; - if (j->marker == STBI__MARKER_none ) { - // handle 0s at the end of image data from IP Kamera 9060 - while (!stbi__at_eof(j->s)) { - int x = stbi__get8(j->s); - if (x == 255) { - j->marker = stbi__get8(j->s); - break; - } - } - // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 - } - } else if (stbi__DNL(m)) { - int Ld = stbi__get16be(j->s); - stbi__uint32 NL = stbi__get16be(j->s); - if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG"); - if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG"); - } else { - if (!stbi__process_marker(j, m)) return 0; - } - m = stbi__get_marker(j); - } - if (j->progressive) - stbi__jpeg_finish(j); - return 1; -} - -// static jfif-centered resampling (across block boundaries) - -typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, - int w, int hs); - -#define stbi__div4(x) ((stbi_uc) ((x) >> 2)) - -static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - STBI_NOTUSED(out); - STBI_NOTUSED(in_far); - STBI_NOTUSED(w); - STBI_NOTUSED(hs); - return in_near; -} - -static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - // need to generate two samples vertically for every one in input - int i; - STBI_NOTUSED(hs); - for (i=0; i < w; ++i) - out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); - return out; -} - -static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - // need to generate two samples horizontally for every one in input - int i; - stbi_uc *input = in_near; - - if (w == 1) { - // if only one sample, can't do any interpolation - out[0] = out[1] = input[0]; - return out; - } - - out[0] = input[0]; - out[1] = stbi__div4(input[0]*3 + input[1] + 2); - for (i=1; i < w-1; ++i) { - int n = 3*input[i]+2; - out[i*2+0] = stbi__div4(n+input[i-1]); - out[i*2+1] = stbi__div4(n+input[i+1]); - } - out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); - out[i*2+1] = input[w-1]; - - STBI_NOTUSED(in_far); - STBI_NOTUSED(hs); - - return out; -} - -#define stbi__div16(x) ((stbi_uc) ((x) >> 4)) - -static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - // need to generate 2x2 samples for every one in input - int i,t0,t1; - if (w == 1) { - out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); - return out; - } - - t1 = 3*in_near[0] + in_far[0]; - out[0] = stbi__div4(t1+2); - for (i=1; i < w; ++i) { - t0 = t1; - t1 = 3*in_near[i]+in_far[i]; - out[i*2-1] = stbi__div16(3*t0 + t1 + 8); - out[i*2 ] = stbi__div16(3*t1 + t0 + 8); - } - out[w*2-1] = stbi__div4(t1+2); - - STBI_NOTUSED(hs); - - return out; -} - -#if defined(STBI_SSE2) || defined(STBI_NEON) -static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - // need to generate 2x2 samples for every one in input - int i=0,t0,t1; - - if (w == 1) { - out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); - return out; - } - - t1 = 3*in_near[0] + in_far[0]; - // process groups of 8 pixels for as long as we can. - // note we can't handle the last pixel in a row in this loop - // because we need to handle the filter boundary conditions. - for (; i < ((w-1) & ~7); i += 8) { -#if defined(STBI_SSE2) - // load and perform the vertical filtering pass - // this uses 3*x + y = 4*x + (y - x) - __m128i zero = _mm_setzero_si128(); - __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); - __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); - __m128i farw = _mm_unpacklo_epi8(farb, zero); - __m128i nearw = _mm_unpacklo_epi8(nearb, zero); - __m128i diff = _mm_sub_epi16(farw, nearw); - __m128i nears = _mm_slli_epi16(nearw, 2); - __m128i curr = _mm_add_epi16(nears, diff); // current row - - // horizontal filter works the same based on shifted vers of current - // row. "prev" is current row shifted right by 1 pixel; we need to - // insert the previous pixel value (from t1). - // "next" is current row shifted left by 1 pixel, with first pixel - // of next block of 8 pixels added in. - __m128i prv0 = _mm_slli_si128(curr, 2); - __m128i nxt0 = _mm_srli_si128(curr, 2); - __m128i prev = _mm_insert_epi16(prv0, t1, 0); - __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); - - // horizontal filter, polyphase implementation since it's convenient: - // even pixels = 3*cur + prev = cur*4 + (prev - cur) - // odd pixels = 3*cur + next = cur*4 + (next - cur) - // note the shared term. - __m128i bias = _mm_set1_epi16(8); - __m128i curs = _mm_slli_epi16(curr, 2); - __m128i prvd = _mm_sub_epi16(prev, curr); - __m128i nxtd = _mm_sub_epi16(next, curr); - __m128i curb = _mm_add_epi16(curs, bias); - __m128i even = _mm_add_epi16(prvd, curb); - __m128i odd = _mm_add_epi16(nxtd, curb); - - // interleave even and odd pixels, then undo scaling. - __m128i int0 = _mm_unpacklo_epi16(even, odd); - __m128i int1 = _mm_unpackhi_epi16(even, odd); - __m128i de0 = _mm_srli_epi16(int0, 4); - __m128i de1 = _mm_srli_epi16(int1, 4); - - // pack and write output - __m128i outv = _mm_packus_epi16(de0, de1); - _mm_storeu_si128((__m128i *) (out + i*2), outv); -#elif defined(STBI_NEON) - // load and perform the vertical filtering pass - // this uses 3*x + y = 4*x + (y - x) - uint8x8_t farb = vld1_u8(in_far + i); - uint8x8_t nearb = vld1_u8(in_near + i); - int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); - int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); - int16x8_t curr = vaddq_s16(nears, diff); // current row - - // horizontal filter works the same based on shifted vers of current - // row. "prev" is current row shifted right by 1 pixel; we need to - // insert the previous pixel value (from t1). - // "next" is current row shifted left by 1 pixel, with first pixel - // of next block of 8 pixels added in. - int16x8_t prv0 = vextq_s16(curr, curr, 7); - int16x8_t nxt0 = vextq_s16(curr, curr, 1); - int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); - int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); - - // horizontal filter, polyphase implementation since it's convenient: - // even pixels = 3*cur + prev = cur*4 + (prev - cur) - // odd pixels = 3*cur + next = cur*4 + (next - cur) - // note the shared term. - int16x8_t curs = vshlq_n_s16(curr, 2); - int16x8_t prvd = vsubq_s16(prev, curr); - int16x8_t nxtd = vsubq_s16(next, curr); - int16x8_t even = vaddq_s16(curs, prvd); - int16x8_t odd = vaddq_s16(curs, nxtd); - - // undo scaling and round, then store with even/odd phases interleaved - uint8x8x2_t o; - o.val[0] = vqrshrun_n_s16(even, 4); - o.val[1] = vqrshrun_n_s16(odd, 4); - vst2_u8(out + i*2, o); -#endif - - // "previous" value for next iter - t1 = 3*in_near[i+7] + in_far[i+7]; - } - - t0 = t1; - t1 = 3*in_near[i] + in_far[i]; - out[i*2] = stbi__div16(3*t1 + t0 + 8); - - for (++i; i < w; ++i) { - t0 = t1; - t1 = 3*in_near[i]+in_far[i]; - out[i*2-1] = stbi__div16(3*t0 + t1 + 8); - out[i*2 ] = stbi__div16(3*t1 + t0 + 8); - } - out[w*2-1] = stbi__div4(t1+2); - - STBI_NOTUSED(hs); - - return out; -} -#endif - -static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) -{ - // resample with nearest-neighbor - int i,j; - STBI_NOTUSED(in_far); - for (i=0; i < w; ++i) - for (j=0; j < hs; ++j) - out[i*hs+j] = in_near[i]; - return out; -} - -// this is a reduced-precision calculation of YCbCr-to-RGB introduced -// to make sure the code produces the same results in both SIMD and scalar -#define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) -static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) -{ - int i; - for (i=0; i < count; ++i) { - int y_fixed = (y[i] << 20) + (1<<19); // rounding - int r,g,b; - int cr = pcr[i] - 128; - int cb = pcb[i] - 128; - r = y_fixed + cr* stbi__float2fixed(1.40200f); - g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* stbi__float2fixed(1.77200f); - r >>= 20; - g >>= 20; - b >>= 20; - if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } - if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } - if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; - out[3] = 255; - out += step; - } -} - -#if defined(STBI_SSE2) || defined(STBI_NEON) -static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) -{ - int i = 0; - -#ifdef STBI_SSE2 - // step == 3 is pretty ugly on the final interleave, and i'm not convinced - // it's useful in practice (you wouldn't use it for textures, for example). - // so just accelerate step == 4 case. - if (step == 4) { - // this is a fairly straightforward implementation and not super-optimized. - __m128i signflip = _mm_set1_epi8(-0x80); - __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); - __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); - __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); - __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); - __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); - __m128i xw = _mm_set1_epi16(255); // alpha channel - - for (; i+7 < count; i += 8) { - // load - __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); - __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); - __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); - __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 - __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 - - // unpack to short (and left-shift cr, cb by 8) - __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); - __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); - __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); - - // color transform - __m128i yws = _mm_srli_epi16(yw, 4); - __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); - __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); - __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); - __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); - __m128i rws = _mm_add_epi16(cr0, yws); - __m128i gwt = _mm_add_epi16(cb0, yws); - __m128i bws = _mm_add_epi16(yws, cb1); - __m128i gws = _mm_add_epi16(gwt, cr1); - - // descale - __m128i rw = _mm_srai_epi16(rws, 4); - __m128i bw = _mm_srai_epi16(bws, 4); - __m128i gw = _mm_srai_epi16(gws, 4); - - // back to byte, set up for transpose - __m128i brb = _mm_packus_epi16(rw, bw); - __m128i gxb = _mm_packus_epi16(gw, xw); - - // transpose to interleave channels - __m128i t0 = _mm_unpacklo_epi8(brb, gxb); - __m128i t1 = _mm_unpackhi_epi8(brb, gxb); - __m128i o0 = _mm_unpacklo_epi16(t0, t1); - __m128i o1 = _mm_unpackhi_epi16(t0, t1); - - // store - _mm_storeu_si128((__m128i *) (out + 0), o0); - _mm_storeu_si128((__m128i *) (out + 16), o1); - out += 32; - } - } -#endif - -#ifdef STBI_NEON - // in this version, step=3 support would be easy to add. but is there demand? - if (step == 4) { - // this is a fairly straightforward implementation and not super-optimized. - uint8x8_t signflip = vdup_n_u8(0x80); - int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); - int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); - int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); - int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); - - for (; i+7 < count; i += 8) { - // load - uint8x8_t y_bytes = vld1_u8(y + i); - uint8x8_t cr_bytes = vld1_u8(pcr + i); - uint8x8_t cb_bytes = vld1_u8(pcb + i); - int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); - int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); - - // expand to s16 - int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); - int16x8_t crw = vshll_n_s8(cr_biased, 7); - int16x8_t cbw = vshll_n_s8(cb_biased, 7); - - // color transform - int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); - int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); - int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); - int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); - int16x8_t rws = vaddq_s16(yws, cr0); - int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); - int16x8_t bws = vaddq_s16(yws, cb1); - - // undo scaling, round, convert to byte - uint8x8x4_t o; - o.val[0] = vqrshrun_n_s16(rws, 4); - o.val[1] = vqrshrun_n_s16(gws, 4); - o.val[2] = vqrshrun_n_s16(bws, 4); - o.val[3] = vdup_n_u8(255); - - // store, interleaving r/g/b/a - vst4_u8(out, o); - out += 8*4; - } - } -#endif - - for (; i < count; ++i) { - int y_fixed = (y[i] << 20) + (1<<19); // rounding - int r,g,b; - int cr = pcr[i] - 128; - int cb = pcb[i] - 128; - r = y_fixed + cr* stbi__float2fixed(1.40200f); - g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); - b = y_fixed + cb* stbi__float2fixed(1.77200f); - r >>= 20; - g >>= 20; - b >>= 20; - if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } - if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } - if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } - out[0] = (stbi_uc)r; - out[1] = (stbi_uc)g; - out[2] = (stbi_uc)b; - out[3] = 255; - out += step; - } -} -#endif - -// set up the kernels -static void stbi__setup_jpeg(stbi__jpeg *j) -{ - j->idct_block_kernel = stbi__idct_block; - j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; - j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; - -#ifdef STBI_SSE2 - if (stbi__sse2_available()) { - j->idct_block_kernel = stbi__idct_simd; - j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; - j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; - } -#endif - -#ifdef STBI_NEON - j->idct_block_kernel = stbi__idct_simd; - j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; - j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; -#endif -} - -// clean up the temporary component buffers -static void stbi__cleanup_jpeg(stbi__jpeg *j) -{ - stbi__free_jpeg_components(j, j->s->img_n, 0); -} - -typedef struct -{ - resample_row_func resample; - stbi_uc *line0,*line1; - int hs,vs; // expansion factor in each axis - int w_lores; // horizontal pixels pre-expansion - int ystep; // how far through vertical expansion we are - int ypos; // which pre-expansion row we're on -} stbi__resample; - -// fast 0..255 * 0..255 => 0..255 rounded multiplication -static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) -{ - unsigned int t = x*y + 128; - return (stbi_uc) ((t + (t >>8)) >> 8); -} - -static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) -{ - int n, decode_n, is_rgb; - z->s->img_n = 0; // make stbi__cleanup_jpeg safe - - // validate req_comp - if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); - - // load a jpeg image from whichever source, but leave in YCbCr format - if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } - - // determine actual number of components to generate - n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; - - is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); - - if (z->s->img_n == 3 && n < 3 && !is_rgb) - decode_n = 1; - else - decode_n = z->s->img_n; - - // resample and color-convert - { - int k; - unsigned int i,j; - stbi_uc *output; - stbi_uc *coutput[4]; - - stbi__resample res_comp[4]; - - for (k=0; k < decode_n; ++k) { - stbi__resample *r = &res_comp[k]; - - // allocate line buffer big enough for upsampling off the edges - // with upsample factor of 4 - z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); - if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } - - r->hs = z->img_h_max / z->img_comp[k].h; - r->vs = z->img_v_max / z->img_comp[k].v; - r->ystep = r->vs >> 1; - r->w_lores = (z->s->img_x + r->hs-1) / r->hs; - r->ypos = 0; - r->line0 = r->line1 = z->img_comp[k].data; - - if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; - else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; - else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; - else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; - else r->resample = stbi__resample_row_generic; - } - - // can't error after this so, this is safe - output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); - if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } - - // now go ahead and resample - for (j=0; j < z->s->img_y; ++j) { - stbi_uc *out = output + n * z->s->img_x * j; - for (k=0; k < decode_n; ++k) { - stbi__resample *r = &res_comp[k]; - int y_bot = r->ystep >= (r->vs >> 1); - coutput[k] = r->resample(z->img_comp[k].linebuf, - y_bot ? r->line1 : r->line0, - y_bot ? r->line0 : r->line1, - r->w_lores, r->hs); - if (++r->ystep >= r->vs) { - r->ystep = 0; - r->line0 = r->line1; - if (++r->ypos < z->img_comp[k].y) - r->line1 += z->img_comp[k].w2; - } - } - if (n >= 3) { - stbi_uc *y = coutput[0]; - if (z->s->img_n == 3) { - if (is_rgb) { - for (i=0; i < z->s->img_x; ++i) { - out[0] = y[i]; - out[1] = coutput[1][i]; - out[2] = coutput[2][i]; - out[3] = 255; - out += n; - } - } else { - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - } - } else if (z->s->img_n == 4) { - if (z->app14_color_transform == 0) { // CMYK - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - out[0] = stbi__blinn_8x8(coutput[0][i], m); - out[1] = stbi__blinn_8x8(coutput[1][i], m); - out[2] = stbi__blinn_8x8(coutput[2][i], m); - out[3] = 255; - out += n; - } - } else if (z->app14_color_transform == 2) { // YCCK - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - out[0] = stbi__blinn_8x8(255 - out[0], m); - out[1] = stbi__blinn_8x8(255 - out[1], m); - out[2] = stbi__blinn_8x8(255 - out[2], m); - out += n; - } - } else { // YCbCr + alpha? Ignore the fourth channel for now - z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); - } - } else - for (i=0; i < z->s->img_x; ++i) { - out[0] = out[1] = out[2] = y[i]; - out[3] = 255; // not used if n==3 - out += n; - } - } else { - if (is_rgb) { - if (n == 1) - for (i=0; i < z->s->img_x; ++i) - *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); - else { - for (i=0; i < z->s->img_x; ++i, out += 2) { - out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); - out[1] = 255; - } - } - } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { - for (i=0; i < z->s->img_x; ++i) { - stbi_uc m = coutput[3][i]; - stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); - stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); - stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); - out[0] = stbi__compute_y(r, g, b); - out[1] = 255; - out += n; - } - } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { - for (i=0; i < z->s->img_x; ++i) { - out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); - out[1] = 255; - out += n; - } - } else { - stbi_uc *y = coutput[0]; - if (n == 1) - for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; - else - for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255; - } - } - } - stbi__cleanup_jpeg(z); - *out_x = z->s->img_x; - *out_y = z->s->img_y; - if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output - return output; - } -} - -static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - unsigned char* result; - stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); - STBI_NOTUSED(ri); - j->s = s; - stbi__setup_jpeg(j); - result = load_jpeg_image(j, x,y,comp,req_comp); - STBI_FREE(j); - return result; -} - -static int stbi__jpeg_test(stbi__context *s) -{ - int r; - stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); - j->s = s; - stbi__setup_jpeg(j); - r = stbi__decode_jpeg_header(j, STBI__SCAN_type); - stbi__rewind(s); - STBI_FREE(j); - return r; -} - -static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) -{ - if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { - stbi__rewind( j->s ); - return 0; - } - if (x) *x = j->s->img_x; - if (y) *y = j->s->img_y; - if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; - return 1; -} - -static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) -{ - int result; - stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); - j->s = s; - result = stbi__jpeg_info_raw(j, x, y, comp); - STBI_FREE(j); - return result; -} -#endif - -// public domain zlib decode v0.2 Sean Barrett 2006-11-18 -// simple implementation -// - all input must be provided in an upfront buffer -// - all output is written to a single output buffer (can malloc/realloc) -// performance -// - fast huffman - -#ifndef STBI_NO_ZLIB - -// fast-way is faster to check than jpeg huffman, but slow way is slower -#define STBI__ZFAST_BITS 9 // accelerate all cases in default tables -#define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) - -// zlib-style huffman encoding -// (jpegs packs from left, zlib from right, so can't share code) -typedef struct -{ - stbi__uint16 fast[1 << STBI__ZFAST_BITS]; - stbi__uint16 firstcode[16]; - int maxcode[17]; - stbi__uint16 firstsymbol[16]; - stbi_uc size[288]; - stbi__uint16 value[288]; -} stbi__zhuffman; - -stbi_inline static int stbi__bitreverse16(int n) -{ - n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); - n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); - n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); - n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); - return n; -} - -stbi_inline static int stbi__bit_reverse(int v, int bits) -{ - STBI_ASSERT(bits <= 16); - // to bit reverse n bits, reverse 16 and shift - // e.g. 11 bits, bit reverse and shift away 5 - return stbi__bitreverse16(v) >> (16-bits); -} - -static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) -{ - int i,k=0; - int code, next_code[16], sizes[17]; - - // DEFLATE spec for generating codes - memset(sizes, 0, sizeof(sizes)); - memset(z->fast, 0, sizeof(z->fast)); - for (i=0; i < num; ++i) - ++sizes[sizelist[i]]; - sizes[0] = 0; - for (i=1; i < 16; ++i) - if (sizes[i] > (1 << i)) - return stbi__err("bad sizes", "Corrupt PNG"); - code = 0; - for (i=1; i < 16; ++i) { - next_code[i] = code; - z->firstcode[i] = (stbi__uint16) code; - z->firstsymbol[i] = (stbi__uint16) k; - code = (code + sizes[i]); - if (sizes[i]) - if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); - z->maxcode[i] = code << (16-i); // preshift for inner loop - code <<= 1; - k += sizes[i]; - } - z->maxcode[16] = 0x10000; // sentinel - for (i=0; i < num; ++i) { - int s = sizelist[i]; - if (s) { - int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; - stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); - z->size [c] = (stbi_uc ) s; - z->value[c] = (stbi__uint16) i; - if (s <= STBI__ZFAST_BITS) { - int j = stbi__bit_reverse(next_code[s],s); - while (j < (1 << STBI__ZFAST_BITS)) { - z->fast[j] = fastv; - j += (1 << s); - } - } - ++next_code[s]; - } - } - return 1; -} - -// zlib-from-memory implementation for PNG reading -// because PNG allows splitting the zlib stream arbitrarily, -// and it's annoying structurally to have PNG call ZLIB call PNG, -// we require PNG read all the IDATs and combine them into a single -// memory buffer - -typedef struct -{ - stbi_uc *zbuffer, *zbuffer_end; - int num_bits; - stbi__uint32 code_buffer; - - char *zout; - char *zout_start; - char *zout_end; - int z_expandable; - - stbi__zhuffman z_length, z_distance; -} stbi__zbuf; - -stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) -{ - if (z->zbuffer >= z->zbuffer_end) return 0; - return *z->zbuffer++; -} - -static void stbi__fill_bits(stbi__zbuf *z) -{ - do { - STBI_ASSERT(z->code_buffer < (1U << z->num_bits)); - z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; - z->num_bits += 8; - } while (z->num_bits <= 24); -} - -stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) -{ - unsigned int k; - if (z->num_bits < n) stbi__fill_bits(z); - k = z->code_buffer & ((1 << n) - 1); - z->code_buffer >>= n; - z->num_bits -= n; - return k; -} - -static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) -{ - int b,s,k; - // not resolved by fast table, so compute it the slow way - // use jpeg approach, which requires MSbits at top - k = stbi__bit_reverse(a->code_buffer, 16); - for (s=STBI__ZFAST_BITS+1; ; ++s) - if (k < z->maxcode[s]) - break; - if (s == 16) return -1; // invalid code! - // code size is s, so: - b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; - STBI_ASSERT(z->size[b] == s); - a->code_buffer >>= s; - a->num_bits -= s; - return z->value[b]; -} - -stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) -{ - int b,s; - if (a->num_bits < 16) stbi__fill_bits(a); - b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; - if (b) { - s = b >> 9; - a->code_buffer >>= s; - a->num_bits -= s; - return b & 511; - } - return stbi__zhuffman_decode_slowpath(a, z); -} - -static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes -{ - char *q; - int cur, limit, old_limit; - z->zout = zout; - if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); - cur = (int) (z->zout - z->zout_start); - limit = old_limit = (int) (z->zout_end - z->zout_start); - while (cur + n > limit) - limit *= 2; - q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); - STBI_NOTUSED(old_limit); - if (q == NULL) return stbi__err("outofmem", "Out of memory"); - z->zout_start = q; - z->zout = q + cur; - z->zout_end = q + limit; - return 1; -} - -static int stbi__zlength_base[31] = { - 3,4,5,6,7,8,9,10,11,13, - 15,17,19,23,27,31,35,43,51,59, - 67,83,99,115,131,163,195,227,258,0,0 }; - -static int stbi__zlength_extra[31]= -{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; - -static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, -257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; - -static int stbi__zdist_extra[32] = -{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; - -static int stbi__parse_huffman_block(stbi__zbuf *a) -{ - char *zout = a->zout; - for(;;) { - int z = stbi__zhuffman_decode(a, &a->z_length); - if (z < 256) { - if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes - if (zout >= a->zout_end) { - if (!stbi__zexpand(a, zout, 1)) return 0; - zout = a->zout; - } - *zout++ = (char) z; - } else { - stbi_uc *p; - int len,dist; - if (z == 256) { - a->zout = zout; - return 1; - } - z -= 257; - len = stbi__zlength_base[z]; - if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); - z = stbi__zhuffman_decode(a, &a->z_distance); - if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); - dist = stbi__zdist_base[z]; - if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); - if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); - if (zout + len > a->zout_end) { - if (!stbi__zexpand(a, zout, len)) return 0; - zout = a->zout; - } - p = (stbi_uc *) (zout - dist); - if (dist == 1) { // run of one byte; common in images. - stbi_uc v = *p; - if (len) { do *zout++ = v; while (--len); } - } else { - if (len) { do *zout++ = *p++; while (--len); } - } - } - } -} - -static int stbi__compute_huffman_codes(stbi__zbuf *a) -{ - static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; - stbi__zhuffman z_codelength; - stbi_uc lencodes[286+32+137];//padding for maximum single op - stbi_uc codelength_sizes[19]; - int i,n; - - int hlit = stbi__zreceive(a,5) + 257; - int hdist = stbi__zreceive(a,5) + 1; - int hclen = stbi__zreceive(a,4) + 4; - int ntot = hlit + hdist; - - memset(codelength_sizes, 0, sizeof(codelength_sizes)); - for (i=0; i < hclen; ++i) { - int s = stbi__zreceive(a,3); - codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; - } - if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; - - n = 0; - while (n < ntot) { - int c = stbi__zhuffman_decode(a, &z_codelength); - if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); - if (c < 16) - lencodes[n++] = (stbi_uc) c; - else { - stbi_uc fill = 0; - if (c == 16) { - c = stbi__zreceive(a,2)+3; - if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); - fill = lencodes[n-1]; - } else if (c == 17) - c = stbi__zreceive(a,3)+3; - else { - STBI_ASSERT(c == 18); - c = stbi__zreceive(a,7)+11; - } - if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); - memset(lencodes+n, fill, c); - n += c; - } - } - if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); - if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; - if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; - return 1; -} - -static int stbi__parse_uncompressed_block(stbi__zbuf *a) -{ - stbi_uc header[4]; - int len,nlen,k; - if (a->num_bits & 7) - stbi__zreceive(a, a->num_bits & 7); // discard - // drain the bit-packed data into header - k = 0; - while (a->num_bits > 0) { - header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check - a->code_buffer >>= 8; - a->num_bits -= 8; - } - STBI_ASSERT(a->num_bits == 0); - // now fill header the normal way - while (k < 4) - header[k++] = stbi__zget8(a); - len = header[1] * 256 + header[0]; - nlen = header[3] * 256 + header[2]; - if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); - if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); - if (a->zout + len > a->zout_end) - if (!stbi__zexpand(a, a->zout, len)) return 0; - memcpy(a->zout, a->zbuffer, len); - a->zbuffer += len; - a->zout += len; - return 1; -} - -static int stbi__parse_zlib_header(stbi__zbuf *a) -{ - int cmf = stbi__zget8(a); - int cm = cmf & 15; - /* int cinfo = cmf >> 4; */ - int flg = stbi__zget8(a); - if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec - if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png - if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png - // window = 1 << (8 + cinfo)... but who cares, we fully buffer output - return 1; -} - -static const stbi_uc stbi__zdefault_length[288] = -{ - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, - 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, - 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 -}; -static const stbi_uc stbi__zdefault_distance[32] = -{ - 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 -}; -/* -Init algorithm: -{ - int i; // use <= to match clearly with spec - for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; - for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; - for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; - for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; - - for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; -} -*/ - -static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) -{ - int final, type; - if (parse_header) - if (!stbi__parse_zlib_header(a)) return 0; - a->num_bits = 0; - a->code_buffer = 0; - do { - final = stbi__zreceive(a,1); - type = stbi__zreceive(a,2); - if (type == 0) { - if (!stbi__parse_uncompressed_block(a)) return 0; - } else if (type == 3) { - return 0; - } else { - if (type == 1) { - // use fixed code lengths - if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , 288)) return 0; - if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; - } else { - if (!stbi__compute_huffman_codes(a)) return 0; - } - if (!stbi__parse_huffman_block(a)) return 0; - } - } while (!final); - return 1; -} - -static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) -{ - a->zout_start = obuf; - a->zout = obuf; - a->zout_end = obuf + olen; - a->z_expandable = exp; - - return stbi__parse_zlib(a, parse_header); -} - -STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) -{ - stbi__zbuf a; - char *p = (char *) stbi__malloc(initial_size); - if (p == NULL) return NULL; - a.zbuffer = (stbi_uc *) buffer; - a.zbuffer_end = (stbi_uc *) buffer + len; - if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; - } else { - STBI_FREE(a.zout_start); - return NULL; - } -} - -STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) -{ - return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); -} - -STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) -{ - stbi__zbuf a; - char *p = (char *) stbi__malloc(initial_size); - if (p == NULL) return NULL; - a.zbuffer = (stbi_uc *) buffer; - a.zbuffer_end = (stbi_uc *) buffer + len; - if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; - } else { - STBI_FREE(a.zout_start); - return NULL; - } -} - -STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) -{ - stbi__zbuf a; - a.zbuffer = (stbi_uc *) ibuffer; - a.zbuffer_end = (stbi_uc *) ibuffer + ilen; - if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) - return (int) (a.zout - a.zout_start); - else - return -1; -} - -STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) -{ - stbi__zbuf a; - char *p = (char *) stbi__malloc(16384); - if (p == NULL) return NULL; - a.zbuffer = (stbi_uc *) buffer; - a.zbuffer_end = (stbi_uc *) buffer+len; - if (stbi__do_zlib(&a, p, 16384, 1, 0)) { - if (outlen) *outlen = (int) (a.zout - a.zout_start); - return a.zout_start; - } else { - STBI_FREE(a.zout_start); - return NULL; - } -} - -STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) -{ - stbi__zbuf a; - a.zbuffer = (stbi_uc *) ibuffer; - a.zbuffer_end = (stbi_uc *) ibuffer + ilen; - if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) - return (int) (a.zout - a.zout_start); - else - return -1; -} -#endif - -// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 -// simple implementation -// - only 8-bit samples -// - no CRC checking -// - allocates lots of intermediate memory -// - avoids problem of streaming data between subsystems -// - avoids explicit window management -// performance -// - uses stb_zlib, a PD zlib implementation with fast huffman decoding - -#ifndef STBI_NO_PNG -typedef struct -{ - stbi__uint32 length; - stbi__uint32 type; -} stbi__pngchunk; - -static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) -{ - stbi__pngchunk c; - c.length = stbi__get32be(s); - c.type = stbi__get32be(s); - return c; -} - -static int stbi__check_png_header(stbi__context *s) -{ - static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; - int i; - for (i=0; i < 8; ++i) - if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); - return 1; -} - -typedef struct -{ - stbi__context *s; - stbi_uc *idata, *expanded, *out; - int depth; -} stbi__png; - - -enum { - STBI__F_none=0, - STBI__F_sub=1, - STBI__F_up=2, - STBI__F_avg=3, - STBI__F_paeth=4, - // synthetic filters used for first scanline to avoid needing a dummy row of 0s - STBI__F_avg_first, - STBI__F_paeth_first -}; - -static stbi_uc first_row_filter[5] = -{ - STBI__F_none, - STBI__F_sub, - STBI__F_none, - STBI__F_avg_first, - STBI__F_paeth_first -}; - -static int stbi__paeth(int a, int b, int c) -{ - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; -} - -static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; - -// create the png data from post-deflated data -static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) -{ - int bytes = (depth == 16? 2 : 1); - stbi__context *s = a->s; - stbi__uint32 i,j,stride = x*out_n*bytes; - stbi__uint32 img_len, img_width_bytes; - int k; - int img_n = s->img_n; // copy it into a local for later - - int output_bytes = out_n*bytes; - int filter_bytes = img_n*bytes; - int width = x; - - STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); - a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into - if (!a->out) return stbi__err("outofmem", "Out of memory"); - - img_width_bytes = (((img_n * x * depth) + 7) >> 3); - img_len = (img_width_bytes + 1) * y; - // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, - // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), - // so just check for raw_len < img_len always. - if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); - - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *prior; - int filter = *raw++; - - if (filter > 4) - return stbi__err("invalid filter","Corrupt PNG"); - - if (depth < 8) { - STBI_ASSERT(img_width_bytes <= x); - cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place - filter_bytes = 1; - width = img_width_bytes; - } - prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above - - // if first row, use special filter that doesn't sample previous row - if (j == 0) filter = first_row_filter[filter]; - - // handle first byte explicitly - for (k=0; k < filter_bytes; ++k) { - switch (filter) { - case STBI__F_none : cur[k] = raw[k]; break; - case STBI__F_sub : cur[k] = raw[k]; break; - case STBI__F_up : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; - case STBI__F_avg : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break; - case STBI__F_paeth : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break; - case STBI__F_avg_first : cur[k] = raw[k]; break; - case STBI__F_paeth_first: cur[k] = raw[k]; break; - } - } - - if (depth == 8) { - if (img_n != out_n) - cur[img_n] = 255; // first pixel - raw += img_n; - cur += out_n; - prior += out_n; - } else if (depth == 16) { - if (img_n != out_n) { - cur[filter_bytes] = 255; // first pixel top byte - cur[filter_bytes+1] = 255; // first pixel bottom byte - } - raw += filter_bytes; - cur += output_bytes; - prior += output_bytes; - } else { - raw += 1; - cur += 1; - prior += 1; - } - - // this is a little gross, so that we don't switch per-pixel or per-component - if (depth < 8 || img_n == out_n) { - int nk = (width - 1)*filter_bytes; - #define STBI__CASE(f) \ - case f: \ - for (k=0; k < nk; ++k) - switch (filter) { - // "none" filter turns into a memcpy here; make that explicit. - case STBI__F_none: memcpy(cur, raw, nk); break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break; - } - #undef STBI__CASE - raw += nk; - } else { - STBI_ASSERT(img_n+1 == out_n); - #define STBI__CASE(f) \ - case f: \ - for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \ - for (k=0; k < filter_bytes; ++k) - switch (filter) { - STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break; - STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break; - STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break; - STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break; - STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break; - STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break; - STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break; - } - #undef STBI__CASE - - // the loop above sets the high byte of the pixels' alpha, but for - // 16 bit png files we also need the low byte set. we'll do that here. - if (depth == 16) { - cur = a->out + stride*j; // start at the beginning of the row again - for (i=0; i < x; ++i,cur+=output_bytes) { - cur[filter_bytes+1] = 255; - } - } - } - } - - // we make a separate pass to expand bits to pixels; for performance, - // this could run two scanlines behind the above code, so it won't - // intefere with filtering but will still be in the cache. - if (depth < 8) { - for (j=0; j < y; ++j) { - stbi_uc *cur = a->out + stride*j; - stbi_uc *in = a->out + stride*j + x*out_n - img_width_bytes; - // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit - // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop - stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range - - // note that the final byte might overshoot and write more data than desired. - // we can allocate enough data that this never writes out of memory, but it - // could also overwrite the next scanline. can it overwrite non-empty data - // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel. - // so we need to explicitly clamp the final ones - - if (depth == 4) { - for (k=x*img_n; k >= 2; k-=2, ++in) { - *cur++ = scale * ((*in >> 4) ); - *cur++ = scale * ((*in ) & 0x0f); - } - if (k > 0) *cur++ = scale * ((*in >> 4) ); - } else if (depth == 2) { - for (k=x*img_n; k >= 4; k-=4, ++in) { - *cur++ = scale * ((*in >> 6) ); - *cur++ = scale * ((*in >> 4) & 0x03); - *cur++ = scale * ((*in >> 2) & 0x03); - *cur++ = scale * ((*in ) & 0x03); - } - if (k > 0) *cur++ = scale * ((*in >> 6) ); - if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03); - if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03); - } else if (depth == 1) { - for (k=x*img_n; k >= 8; k-=8, ++in) { - *cur++ = scale * ((*in >> 7) ); - *cur++ = scale * ((*in >> 6) & 0x01); - *cur++ = scale * ((*in >> 5) & 0x01); - *cur++ = scale * ((*in >> 4) & 0x01); - *cur++ = scale * ((*in >> 3) & 0x01); - *cur++ = scale * ((*in >> 2) & 0x01); - *cur++ = scale * ((*in >> 1) & 0x01); - *cur++ = scale * ((*in ) & 0x01); - } - if (k > 0) *cur++ = scale * ((*in >> 7) ); - if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01); - if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01); - if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01); - if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01); - if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01); - if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01); - } - if (img_n != out_n) { - int q; - // insert alpha = 255 - cur = a->out + stride*j; - if (img_n == 1) { - for (q=x-1; q >= 0; --q) { - cur[q*2+1] = 255; - cur[q*2+0] = cur[q]; - } - } else { - STBI_ASSERT(img_n == 3); - for (q=x-1; q >= 0; --q) { - cur[q*4+3] = 255; - cur[q*4+2] = cur[q*3+2]; - cur[q*4+1] = cur[q*3+1]; - cur[q*4+0] = cur[q*3+0]; - } - } - } - } - } else if (depth == 16) { - // force the image data from big-endian to platform-native. - // this is done in a separate pass due to the decoding relying - // on the data being untouched, but could probably be done - // per-line during decode if care is taken. - stbi_uc *cur = a->out; - stbi__uint16 *cur16 = (stbi__uint16*)cur; - - for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) { - *cur16 = (cur[0] << 8) | cur[1]; - } - } - - return 1; -} - -static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) -{ - int bytes = (depth == 16 ? 2 : 1); - int out_bytes = out_n * bytes; - stbi_uc *final; - int p; - if (!interlaced) - return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); - - // de-interlacing - final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); - for (p=0; p < 7; ++p) { - int xorig[] = { 0,4,0,2,0,1,0 }; - int yorig[] = { 0,0,4,0,2,0,1 }; - int xspc[] = { 8,8,4,4,2,2,1 }; - int yspc[] = { 8,8,8,4,4,2,2 }; - int i,j,x,y; - // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 - x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; - y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; - if (x && y) { - stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; - if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { - STBI_FREE(final); - return 0; - } - for (j=0; j < y; ++j) { - for (i=0; i < x; ++i) { - int out_y = j*yspc[p]+yorig[p]; - int out_x = i*xspc[p]+xorig[p]; - memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, - a->out + (j*x+i)*out_bytes, out_bytes); - } - } - STBI_FREE(a->out); - image_data += img_len; - image_data_len -= img_len; - } - } - a->out = final; - - return 1; -} - -static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) -{ - stbi__context *s = z->s; - stbi__uint32 i, pixel_count = s->img_x * s->img_y; - stbi_uc *p = z->out; - - // compute color-based transparency, assuming we've - // already got 255 as the alpha value in the output - STBI_ASSERT(out_n == 2 || out_n == 4); - - if (out_n == 2) { - for (i=0; i < pixel_count; ++i) { - p[1] = (p[0] == tc[0] ? 0 : 255); - p += 2; - } - } else { - for (i=0; i < pixel_count; ++i) { - if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) - p[3] = 0; - p += 4; - } - } - return 1; -} - -static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) -{ - stbi__context *s = z->s; - stbi__uint32 i, pixel_count = s->img_x * s->img_y; - stbi__uint16 *p = (stbi__uint16*) z->out; - - // compute color-based transparency, assuming we've - // already got 65535 as the alpha value in the output - STBI_ASSERT(out_n == 2 || out_n == 4); - - if (out_n == 2) { - for (i = 0; i < pixel_count; ++i) { - p[1] = (p[0] == tc[0] ? 0 : 65535); - p += 2; - } - } else { - for (i = 0; i < pixel_count; ++i) { - if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) - p[3] = 0; - p += 4; - } - } - return 1; -} - -static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) -{ - stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; - stbi_uc *p, *temp_out, *orig = a->out; - - p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); - if (p == NULL) return stbi__err("outofmem", "Out of memory"); - - // between here and free(out) below, exitting would leak - temp_out = p; - - if (pal_img_n == 3) { - for (i=0; i < pixel_count; ++i) { - int n = orig[i]*4; - p[0] = palette[n ]; - p[1] = palette[n+1]; - p[2] = palette[n+2]; - p += 3; - } - } else { - for (i=0; i < pixel_count; ++i) { - int n = orig[i]*4; - p[0] = palette[n ]; - p[1] = palette[n+1]; - p[2] = palette[n+2]; - p[3] = palette[n+3]; - p += 4; - } - } - STBI_FREE(a->out); - a->out = temp_out; - - STBI_NOTUSED(len); - - return 1; -} - -static int stbi__unpremultiply_on_load = 0; -static int stbi__de_iphone_flag = 0; - -STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) -{ - stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply; -} - -STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) -{ - stbi__de_iphone_flag = flag_true_if_should_convert; -} - -static void stbi__de_iphone(stbi__png *z) -{ - stbi__context *s = z->s; - stbi__uint32 i, pixel_count = s->img_x * s->img_y; - stbi_uc *p = z->out; - - if (s->img_out_n == 3) { // convert bgr to rgb - for (i=0; i < pixel_count; ++i) { - stbi_uc t = p[0]; - p[0] = p[2]; - p[2] = t; - p += 3; - } - } else { - STBI_ASSERT(s->img_out_n == 4); - if (stbi__unpremultiply_on_load) { - // convert bgr to rgb and unpremultiply - for (i=0; i < pixel_count; ++i) { - stbi_uc a = p[3]; - stbi_uc t = p[0]; - if (a) { - stbi_uc half = a / 2; - p[0] = (p[2] * 255 + half) / a; - p[1] = (p[1] * 255 + half) / a; - p[2] = ( t * 255 + half) / a; - } else { - p[0] = p[2]; - p[2] = t; - } - p += 4; - } - } else { - // convert bgr to rgb - for (i=0; i < pixel_count; ++i) { - stbi_uc t = p[0]; - p[0] = p[2]; - p[2] = t; - p += 4; - } - } - } -} - -#define STBI__PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) - -static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) -{ - stbi_uc palette[1024], pal_img_n=0; - stbi_uc has_trans=0, tc[3]; - stbi__uint16 tc16[3]; - stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; - int first=1,k,interlace=0, color=0, is_iphone=0; - stbi__context *s = z->s; - - z->expanded = NULL; - z->idata = NULL; - z->out = NULL; - - if (!stbi__check_png_header(s)) return 0; - - if (scan == STBI__SCAN_type) return 1; - - for (;;) { - stbi__pngchunk c = stbi__get_chunk_header(s); - switch (c.type) { - case STBI__PNG_TYPE('C','g','B','I'): - is_iphone = 1; - stbi__skip(s, c.length); - break; - case STBI__PNG_TYPE('I','H','D','R'): { - int comp,filter; - if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); - first = 0; - if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); - s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); - s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)"); - z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); - color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); - if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); - if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); - comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); - filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); - interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); - if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); - if (!pal_img_n) { - s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); - if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); - if (scan == STBI__SCAN_header) return 1; - } else { - // if paletted, then pal_n is our final components, and - // img_n is # components to decompress/filter. - s->img_n = 1; - if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); - // if SCAN_header, have to scan to see if we have a tRNS - } - break; - } - - case STBI__PNG_TYPE('P','L','T','E'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); - pal_len = c.length / 3; - if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); - for (i=0; i < pal_len; ++i) { - palette[i*4+0] = stbi__get8(s); - palette[i*4+1] = stbi__get8(s); - palette[i*4+2] = stbi__get8(s); - palette[i*4+3] = 255; - } - break; - } - - case STBI__PNG_TYPE('t','R','N','S'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); - if (pal_img_n) { - if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } - if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); - if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); - pal_img_n = 4; - for (i=0; i < c.length; ++i) - palette[i*4+3] = stbi__get8(s); - } else { - if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); - if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); - has_trans = 1; - if (z->depth == 16) { - for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is - } else { - for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger - } - } - break; - } - - case STBI__PNG_TYPE('I','D','A','T'): { - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); - if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; } - if ((int)(ioff + c.length) < (int)ioff) return 0; - if (ioff + c.length > idata_limit) { - stbi__uint32 idata_limit_old = idata_limit; - stbi_uc *p; - if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; - while (ioff + c.length > idata_limit) - idata_limit *= 2; - STBI_NOTUSED(idata_limit_old); - p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); - z->idata = p; - } - if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); - ioff += c.length; - break; - } - - case STBI__PNG_TYPE('I','E','N','D'): { - stbi__uint32 raw_len, bpl; - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if (scan != STBI__SCAN_load) return 1; - if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); - // initial guess for decoded data size to avoid unnecessary reallocs - bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component - raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; - z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); - if (z->expanded == NULL) return 0; // zlib should set error - STBI_FREE(z->idata); z->idata = NULL; - if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) - s->img_out_n = s->img_n+1; - else - s->img_out_n = s->img_n; - if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; - if (has_trans) { - if (z->depth == 16) { - if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; - } else { - if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; - } - } - if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) - stbi__de_iphone(z); - if (pal_img_n) { - // pal_img_n == 3 or 4 - s->img_n = pal_img_n; // record the actual colors we had - s->img_out_n = pal_img_n; - if (req_comp >= 3) s->img_out_n = req_comp; - if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) - return 0; - } else if (has_trans) { - // non-paletted image with tRNS -> source image has (constant) alpha - ++s->img_n; - } - STBI_FREE(z->expanded); z->expanded = NULL; - return 1; - } - - default: - // if critical, fail - if (first) return stbi__err("first not IHDR", "Corrupt PNG"); - if ((c.type & (1 << 29)) == 0) { - #ifndef STBI_NO_FAILURE_STRINGS - // not threadsafe - static char invalid_chunk[] = "XXXX PNG chunk not known"; - invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); - invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); - invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); - invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); - #endif - return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); - } - stbi__skip(s, c.length); - break; - } - // end of PNG chunk, read and skip CRC - stbi__get32be(s); - } -} - -static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) -{ - void *result=NULL; - if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); - if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { - if (p->depth < 8) - ri->bits_per_channel = 8; - else - ri->bits_per_channel = p->depth; - result = p->out; - p->out = NULL; - if (req_comp && req_comp != p->s->img_out_n) { - if (ri->bits_per_channel == 8) - result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); - else - result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); - p->s->img_out_n = req_comp; - if (result == NULL) return result; - } - *x = p->s->img_x; - *y = p->s->img_y; - if (n) *n = p->s->img_n; - } - STBI_FREE(p->out); p->out = NULL; - STBI_FREE(p->expanded); p->expanded = NULL; - STBI_FREE(p->idata); p->idata = NULL; - - return result; -} - -static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - stbi__png p; - p.s = s; - return stbi__do_png(&p, x,y,comp,req_comp, ri); -} - -static int stbi__png_test(stbi__context *s) -{ - int r; - r = stbi__check_png_header(s); - stbi__rewind(s); - return r; -} - -static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) -{ - if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { - stbi__rewind( p->s ); - return 0; - } - if (x) *x = p->s->img_x; - if (y) *y = p->s->img_y; - if (comp) *comp = p->s->img_n; - return 1; -} - -static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) -{ - stbi__png p; - p.s = s; - return stbi__png_info_raw(&p, x, y, comp); -} -#endif - -// Microsoft/Windows BMP image - -#ifndef STBI_NO_BMP -static int stbi__bmp_test_raw(stbi__context *s) -{ - int r; - int sz; - if (stbi__get8(s) != 'B') return 0; - if (stbi__get8(s) != 'M') return 0; - stbi__get32le(s); // discard filesize - stbi__get16le(s); // discard reserved - stbi__get16le(s); // discard reserved - stbi__get32le(s); // discard data offset - sz = stbi__get32le(s); - r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); - return r; -} - -static int stbi__bmp_test(stbi__context *s) -{ - int r = stbi__bmp_test_raw(s); - stbi__rewind(s); - return r; -} - - -// returns 0..31 for the highest set bit -static int stbi__high_bit(unsigned int z) -{ - int n=0; - if (z == 0) return -1; - if (z >= 0x10000) n += 16, z >>= 16; - if (z >= 0x00100) n += 8, z >>= 8; - if (z >= 0x00010) n += 4, z >>= 4; - if (z >= 0x00004) n += 2, z >>= 2; - if (z >= 0x00002) n += 1, z >>= 1; - return n; -} - -static int stbi__bitcount(unsigned int a) -{ - a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 - a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 - a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits - a = (a + (a >> 8)); // max 16 per 8 bits - a = (a + (a >> 16)); // max 32 per 8 bits - return a & 0xff; -} - -static int stbi__shiftsigned(int v, int shift, int bits) -{ - int result; - int z=0; - - if (shift < 0) v <<= -shift; - else v >>= shift; - result = v; - - z = bits; - while (z < 8) { - result += v >> z; - z += bits; - } - return result; -} - -typedef struct -{ - int bpp, offset, hsz; - unsigned int mr,mg,mb,ma, all_a; -} stbi__bmp_data; - -static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) -{ - int hsz; - if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); - stbi__get32le(s); // discard filesize - stbi__get16le(s); // discard reserved - stbi__get16le(s); // discard reserved - info->offset = stbi__get32le(s); - info->hsz = hsz = stbi__get32le(s); - info->mr = info->mg = info->mb = info->ma = 0; - - if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); - if (hsz == 12) { - s->img_x = stbi__get16le(s); - s->img_y = stbi__get16le(s); - } else { - s->img_x = stbi__get32le(s); - s->img_y = stbi__get32le(s); - } - if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); - info->bpp = stbi__get16le(s); - if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit"); - if (hsz != 12) { - int compress = stbi__get32le(s); - if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); - stbi__get32le(s); // discard sizeof - stbi__get32le(s); // discard hres - stbi__get32le(s); // discard vres - stbi__get32le(s); // discard colorsused - stbi__get32le(s); // discard max important - if (hsz == 40 || hsz == 56) { - if (hsz == 56) { - stbi__get32le(s); - stbi__get32le(s); - stbi__get32le(s); - stbi__get32le(s); - } - if (info->bpp == 16 || info->bpp == 32) { - if (compress == 0) { - if (info->bpp == 32) { - info->mr = 0xffu << 16; - info->mg = 0xffu << 8; - info->mb = 0xffu << 0; - info->ma = 0xffu << 24; - info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 - } else { - info->mr = 31u << 10; - info->mg = 31u << 5; - info->mb = 31u << 0; - } - } else if (compress == 3) { - info->mr = stbi__get32le(s); - info->mg = stbi__get32le(s); - info->mb = stbi__get32le(s); - // not documented, but generated by photoshop and handled by mspaint - if (info->mr == info->mg && info->mg == info->mb) { - // ?!?!? - return stbi__errpuc("bad BMP", "bad BMP"); - } - } else - return stbi__errpuc("bad BMP", "bad BMP"); - } - } else { - int i; - if (hsz != 108 && hsz != 124) - return stbi__errpuc("bad BMP", "bad BMP"); - info->mr = stbi__get32le(s); - info->mg = stbi__get32le(s); - info->mb = stbi__get32le(s); - info->ma = stbi__get32le(s); - stbi__get32le(s); // discard color space - for (i=0; i < 12; ++i) - stbi__get32le(s); // discard color space parameters - if (hsz == 124) { - stbi__get32le(s); // discard rendering intent - stbi__get32le(s); // discard offset of profile data - stbi__get32le(s); // discard size of profile data - stbi__get32le(s); // discard reserved - } - } - } - return (void *) 1; -} - - -static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - stbi_uc *out; - unsigned int mr=0,mg=0,mb=0,ma=0, all_a; - stbi_uc pal[256][4]; - int psize=0,i,j,width; - int flip_vertically, pad, target; - stbi__bmp_data info; - STBI_NOTUSED(ri); - - info.all_a = 255; - if (stbi__bmp_parse_header(s, &info) == NULL) - return NULL; // error code already set - - flip_vertically = ((int) s->img_y) > 0; - s->img_y = abs((int) s->img_y); - - mr = info.mr; - mg = info.mg; - mb = info.mb; - ma = info.ma; - all_a = info.all_a; - - if (info.hsz == 12) { - if (info.bpp < 24) - psize = (info.offset - 14 - 24) / 3; - } else { - if (info.bpp < 16) - psize = (info.offset - 14 - info.hsz) >> 2; - } - - s->img_n = ma ? 4 : 3; - if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 - target = req_comp; - else - target = s->img_n; // if they want monochrome, we'll post-convert - - // sanity-check size - if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) - return stbi__errpuc("too large", "Corrupt BMP"); - - out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); - if (!out) return stbi__errpuc("outofmem", "Out of memory"); - if (info.bpp < 16) { - int z=0; - if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } - for (i=0; i < psize; ++i) { - pal[i][2] = stbi__get8(s); - pal[i][1] = stbi__get8(s); - pal[i][0] = stbi__get8(s); - if (info.hsz != 12) stbi__get8(s); - pal[i][3] = 255; - } - stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); - if (info.bpp == 4) width = (s->img_x + 1) >> 1; - else if (info.bpp == 8) width = s->img_x; - else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } - pad = (-width)&3; - for (j=0; j < (int) s->img_y; ++j) { - for (i=0; i < (int) s->img_x; i += 2) { - int v=stbi__get8(s),v2=0; - if (info.bpp == 4) { - v2 = v & 15; - v >>= 4; - } - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; - if (i+1 == (int) s->img_x) break; - v = (info.bpp == 8) ? stbi__get8(s) : v2; - out[z++] = pal[v][0]; - out[z++] = pal[v][1]; - out[z++] = pal[v][2]; - if (target == 4) out[z++] = 255; - } - stbi__skip(s, pad); - } - } else { - int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; - int z = 0; - int easy=0; - stbi__skip(s, info.offset - 14 - info.hsz); - if (info.bpp == 24) width = 3 * s->img_x; - else if (info.bpp == 16) width = 2*s->img_x; - else /* bpp = 32 and pad = 0 */ width=0; - pad = (-width) & 3; - if (info.bpp == 24) { - easy = 1; - } else if (info.bpp == 32) { - if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) - easy = 2; - } - if (!easy) { - if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } - // right shift amt to put high bit in position #7 - rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); - gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); - bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); - ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); - } - for (j=0; j < (int) s->img_y; ++j) { - if (easy) { - for (i=0; i < (int) s->img_x; ++i) { - unsigned char a; - out[z+2] = stbi__get8(s); - out[z+1] = stbi__get8(s); - out[z+0] = stbi__get8(s); - z += 3; - a = (easy == 2 ? stbi__get8(s) : 255); - all_a |= a; - if (target == 4) out[z++] = a; - } - } else { - int bpp = info.bpp; - for (i=0; i < (int) s->img_x; ++i) { - stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); - int a; - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); - out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); - a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); - all_a |= a; - if (target == 4) out[z++] = STBI__BYTECAST(a); - } - } - stbi__skip(s, pad); - } - } - - // if alpha channel is all 0s, replace with all 255s - if (target == 4 && all_a == 0) - for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) - out[i] = 255; - - if (flip_vertically) { - stbi_uc t; - for (j=0; j < (int) s->img_y>>1; ++j) { - stbi_uc *p1 = out + j *s->img_x*target; - stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; - for (i=0; i < (int) s->img_x*target; ++i) { - t = p1[i], p1[i] = p2[i], p2[i] = t; - } - } - } - - if (req_comp && req_comp != target) { - out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); - if (out == NULL) return out; // stbi__convert_format frees input on failure - } - - *x = s->img_x; - *y = s->img_y; - if (comp) *comp = s->img_n; - return out; -} -#endif - -// Targa Truevision - TGA -// by Jonathan Dummer -#ifndef STBI_NO_TGA -// returns STBI_rgb or whatever, 0 on error -static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) -{ - // only RGB or RGBA (incl. 16bit) or grey allowed - if(is_rgb16) *is_rgb16 = 0; - switch(bits_per_pixel) { - case 8: return STBI_grey; - case 16: if(is_grey) return STBI_grey_alpha; - // else: fall-through - case 15: if(is_rgb16) *is_rgb16 = 1; - return STBI_rgb; - case 24: // fall-through - case 32: return bits_per_pixel/8; - default: return 0; - } -} - -static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) -{ - int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; - int sz, tga_colormap_type; - stbi__get8(s); // discard Offset - tga_colormap_type = stbi__get8(s); // colormap type - if( tga_colormap_type > 1 ) { - stbi__rewind(s); - return 0; // only RGB or indexed allowed - } - tga_image_type = stbi__get8(s); // image type - if ( tga_colormap_type == 1 ) { // colormapped (paletted) image - if (tga_image_type != 1 && tga_image_type != 9) { - stbi__rewind(s); - return 0; - } - stbi__skip(s,4); // skip index of first colormap entry and number of entries - sz = stbi__get8(s); // check bits per palette color entry - if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { - stbi__rewind(s); - return 0; - } - stbi__skip(s,4); // skip image x and y origin - tga_colormap_bpp = sz; - } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE - if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { - stbi__rewind(s); - return 0; // only RGB or grey allowed, +/- RLE - } - stbi__skip(s,9); // skip colormap specification and image x/y origin - tga_colormap_bpp = 0; - } - tga_w = stbi__get16le(s); - if( tga_w < 1 ) { - stbi__rewind(s); - return 0; // test width - } - tga_h = stbi__get16le(s); - if( tga_h < 1 ) { - stbi__rewind(s); - return 0; // test height - } - tga_bits_per_pixel = stbi__get8(s); // bits per pixel - stbi__get8(s); // ignore alpha bits - if (tga_colormap_bpp != 0) { - if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { - // when using a colormap, tga_bits_per_pixel is the size of the indexes - // I don't think anything but 8 or 16bit indexes makes sense - stbi__rewind(s); - return 0; - } - tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); - } else { - tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); - } - if(!tga_comp) { - stbi__rewind(s); - return 0; - } - if (x) *x = tga_w; - if (y) *y = tga_h; - if (comp) *comp = tga_comp; - return 1; // seems to have passed everything -} - -static int stbi__tga_test(stbi__context *s) -{ - int res = 0; - int sz, tga_color_type; - stbi__get8(s); // discard Offset - tga_color_type = stbi__get8(s); // color type - if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed - sz = stbi__get8(s); // image type - if ( tga_color_type == 1 ) { // colormapped (paletted) image - if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 - stbi__skip(s,4); // skip index of first colormap entry and number of entries - sz = stbi__get8(s); // check bits per palette color entry - if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; - stbi__skip(s,4); // skip image x and y origin - } else { // "normal" image w/o colormap - if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE - stbi__skip(s,9); // skip colormap specification and image x/y origin - } - if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width - if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height - sz = stbi__get8(s); // bits per pixel - if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index - if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; - - res = 1; // if we got this far, everything's good and we can return 1 instead of 0 - -errorEnd: - stbi__rewind(s); - return res; -} - -// read 16bit value and convert to 24bit RGB -static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) -{ - stbi__uint16 px = (stbi__uint16)stbi__get16le(s); - stbi__uint16 fiveBitMask = 31; - // we have 3 channels with 5bits each - int r = (px >> 10) & fiveBitMask; - int g = (px >> 5) & fiveBitMask; - int b = px & fiveBitMask; - // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later - out[0] = (stbi_uc)((r * 255)/31); - out[1] = (stbi_uc)((g * 255)/31); - out[2] = (stbi_uc)((b * 255)/31); - - // some people claim that the most significant bit might be used for alpha - // (possibly if an alpha-bit is set in the "image descriptor byte") - // but that only made 16bit test images completely translucent.. - // so let's treat all 15 and 16bit TGAs as RGB with no alpha. -} - -static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - // read in the TGA header stuff - int tga_offset = stbi__get8(s); - int tga_indexed = stbi__get8(s); - int tga_image_type = stbi__get8(s); - int tga_is_RLE = 0; - int tga_palette_start = stbi__get16le(s); - int tga_palette_len = stbi__get16le(s); - int tga_palette_bits = stbi__get8(s); - int tga_x_origin = stbi__get16le(s); - int tga_y_origin = stbi__get16le(s); - int tga_width = stbi__get16le(s); - int tga_height = stbi__get16le(s); - int tga_bits_per_pixel = stbi__get8(s); - int tga_comp, tga_rgb16=0; - int tga_inverted = stbi__get8(s); - // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?) - // image data - unsigned char *tga_data; - unsigned char *tga_palette = NULL; - int i, j; - unsigned char raw_data[4] = {0}; - int RLE_count = 0; - int RLE_repeating = 0; - int read_next_pixel = 1; - STBI_NOTUSED(ri); - - // do a tiny bit of precessing - if ( tga_image_type >= 8 ) - { - tga_image_type -= 8; - tga_is_RLE = 1; - } - tga_inverted = 1 - ((tga_inverted >> 5) & 1); - - // If I'm paletted, then I'll use the number of bits from the palette - if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); - else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); - - if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency - return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); - - // tga info - *x = tga_width; - *y = tga_height; - if (comp) *comp = tga_comp; - - if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) - return stbi__errpuc("too large", "Corrupt TGA"); - - tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); - if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); - - // skip to the data's starting position (offset usually = 0) - stbi__skip(s, tga_offset ); - - if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { - for (i=0; i < tga_height; ++i) { - int row = tga_inverted ? tga_height -i - 1 : i; - stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; - stbi__getn(s, tga_row, tga_width * tga_comp); - } - } else { - // do I need to load a palette? - if ( tga_indexed) - { - // any data to skip? (offset usually = 0) - stbi__skip(s, tga_palette_start ); - // load the palette - tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); - if (!tga_palette) { - STBI_FREE(tga_data); - return stbi__errpuc("outofmem", "Out of memory"); - } - if (tga_rgb16) { - stbi_uc *pal_entry = tga_palette; - STBI_ASSERT(tga_comp == STBI_rgb); - for (i=0; i < tga_palette_len; ++i) { - stbi__tga_read_rgb16(s, pal_entry); - pal_entry += tga_comp; - } - } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { - STBI_FREE(tga_data); - STBI_FREE(tga_palette); - return stbi__errpuc("bad palette", "Corrupt TGA"); - } - } - // load the data - for (i=0; i < tga_width * tga_height; ++i) - { - // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? - if ( tga_is_RLE ) - { - if ( RLE_count == 0 ) - { - // yep, get the next byte as a RLE command - int RLE_cmd = stbi__get8(s); - RLE_count = 1 + (RLE_cmd & 127); - RLE_repeating = RLE_cmd >> 7; - read_next_pixel = 1; - } else if ( !RLE_repeating ) - { - read_next_pixel = 1; - } - } else - { - read_next_pixel = 1; - } - // OK, if I need to read a pixel, do it now - if ( read_next_pixel ) - { - // load however much data we did have - if ( tga_indexed ) - { - // read in index, then perform the lookup - int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); - if ( pal_idx >= tga_palette_len ) { - // invalid index - pal_idx = 0; - } - pal_idx *= tga_comp; - for (j = 0; j < tga_comp; ++j) { - raw_data[j] = tga_palette[pal_idx+j]; - } - } else if(tga_rgb16) { - STBI_ASSERT(tga_comp == STBI_rgb); - stbi__tga_read_rgb16(s, raw_data); - } else { - // read in the data raw - for (j = 0; j < tga_comp; ++j) { - raw_data[j] = stbi__get8(s); - } - } - // clear the reading flag for the next pixel - read_next_pixel = 0; - } // end of reading a pixel - - // copy data - for (j = 0; j < tga_comp; ++j) - tga_data[i*tga_comp+j] = raw_data[j]; - - // in case we're in RLE mode, keep counting down - --RLE_count; - } - // do I need to invert the image? - if ( tga_inverted ) - { - for (j = 0; j*2 < tga_height; ++j) - { - int index1 = j * tga_width * tga_comp; - int index2 = (tga_height - 1 - j) * tga_width * tga_comp; - for (i = tga_width * tga_comp; i > 0; --i) - { - unsigned char temp = tga_data[index1]; - tga_data[index1] = tga_data[index2]; - tga_data[index2] = temp; - ++index1; - ++index2; - } - } - } - // clear my palette, if I had one - if ( tga_palette != NULL ) - { - STBI_FREE( tga_palette ); - } - } - - // swap RGB - if the source data was RGB16, it already is in the right order - if (tga_comp >= 3 && !tga_rgb16) - { - unsigned char* tga_pixel = tga_data; - for (i=0; i < tga_width * tga_height; ++i) - { - unsigned char temp = tga_pixel[0]; - tga_pixel[0] = tga_pixel[2]; - tga_pixel[2] = temp; - tga_pixel += tga_comp; - } - } - - // convert to target component count - if (req_comp && req_comp != tga_comp) - tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); - - // the things I do to get rid of an error message, and yet keep - // Microsoft's C compilers happy... [8^( - tga_palette_start = tga_palette_len = tga_palette_bits = - tga_x_origin = tga_y_origin = 0; - // OK, done - return tga_data; -} -#endif - -// ************************************************************************************************* -// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB - -#ifndef STBI_NO_PSD -static int stbi__psd_test(stbi__context *s) -{ - int r = (stbi__get32be(s) == 0x38425053); - stbi__rewind(s); - return r; -} - -static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) -{ - int count, nleft, len; - - count = 0; - while ((nleft = pixelCount - count) > 0) { - len = stbi__get8(s); - if (len == 128) { - // No-op. - } else if (len < 128) { - // Copy next len+1 bytes literally. - len++; - if (len > nleft) return 0; // corrupt data - count += len; - while (len) { - *p = stbi__get8(s); - p += 4; - len--; - } - } else if (len > 128) { - stbi_uc val; - // Next -len+1 bytes in the dest are replicated from next source byte. - // (Interpret len as a negative 8-bit int.) - len = 257 - len; - if (len > nleft) return 0; // corrupt data - val = stbi__get8(s); - count += len; - while (len) { - *p = val; - p += 4; - len--; - } - } - } - - return 1; -} - -static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) -{ - int pixelCount; - int channelCount, compression; - int channel, i; - int bitdepth; - int w,h; - stbi_uc *out; - STBI_NOTUSED(ri); - - // Check identifier - if (stbi__get32be(s) != 0x38425053) // "8BPS" - return stbi__errpuc("not PSD", "Corrupt PSD image"); - - // Check file type version. - if (stbi__get16be(s) != 1) - return stbi__errpuc("wrong version", "Unsupported version of PSD image"); - - // Skip 6 reserved bytes. - stbi__skip(s, 6 ); - - // Read the number of channels (R, G, B, A, etc). - channelCount = stbi__get16be(s); - if (channelCount < 0 || channelCount > 16) - return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); - - // Read the rows and columns of the image. - h = stbi__get32be(s); - w = stbi__get32be(s); - - // Make sure the depth is 8 bits. - bitdepth = stbi__get16be(s); - if (bitdepth != 8 && bitdepth != 16) - return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); - - // Make sure the color mode is RGB. - // Valid options are: - // 0: Bitmap - // 1: Grayscale - // 2: Indexed color - // 3: RGB color - // 4: CMYK color - // 7: Multichannel - // 8: Duotone - // 9: Lab color - if (stbi__get16be(s) != 3) - return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); - - // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) - stbi__skip(s,stbi__get32be(s) ); - - // Skip the image resources. (resolution, pen tool paths, etc) - stbi__skip(s, stbi__get32be(s) ); - - // Skip the reserved data. - stbi__skip(s, stbi__get32be(s) ); - - // Find out if the data is compressed. - // Known values: - // 0: no compression - // 1: RLE compressed - compression = stbi__get16be(s); - if (compression > 1) - return stbi__errpuc("bad compression", "PSD has an unknown compression format"); - - // Check size - if (!stbi__mad3sizes_valid(4, w, h, 0)) - return stbi__errpuc("too large", "Corrupt PSD"); - - // Create the destination image. - - if (!compression && bitdepth == 16 && bpc == 16) { - out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); - ri->bits_per_channel = 16; - } else - out = (stbi_uc *) stbi__malloc(4 * w*h); - - if (!out) return stbi__errpuc("outofmem", "Out of memory"); - pixelCount = w*h; - - // Initialize the data to zero. - //memset( out, 0, pixelCount * 4 ); - - // Finally, the image data. - if (compression) { - // RLE as used by .PSD and .TIFF - // Loop until you get the number of unpacked bytes you are expecting: - // Read the next source byte into n. - // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. - // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. - // Else if n is 128, noop. - // Endloop - - // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, - // which we're going to just skip. - stbi__skip(s, h * channelCount * 2 ); - - // Read the RLE data by channel. - for (channel = 0; channel < 4; channel++) { - stbi_uc *p; - - p = out+channel; - if (channel >= channelCount) { - // Fill this channel with default data. - for (i = 0; i < pixelCount; i++, p += 4) - *p = (channel == 3 ? 255 : 0); - } else { - // Read the RLE data. - if (!stbi__psd_decode_rle(s, p, pixelCount)) { - STBI_FREE(out); - return stbi__errpuc("corrupt", "bad RLE data"); - } - } - } - - } else { - // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) - // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. - - // Read the data by channel. - for (channel = 0; channel < 4; channel++) { - if (channel >= channelCount) { - // Fill this channel with default data. - if (bitdepth == 16 && bpc == 16) { - stbi__uint16 *q = ((stbi__uint16 *) out) + channel; - stbi__uint16 val = channel == 3 ? 65535 : 0; - for (i = 0; i < pixelCount; i++, q += 4) - *q = val; - } else { - stbi_uc *p = out+channel; - stbi_uc val = channel == 3 ? 255 : 0; - for (i = 0; i < pixelCount; i++, p += 4) - *p = val; - } - } else { - if (ri->bits_per_channel == 16) { // output bpc - stbi__uint16 *q = ((stbi__uint16 *) out) + channel; - for (i = 0; i < pixelCount; i++, q += 4) - *q = (stbi__uint16) stbi__get16be(s); - } else { - stbi_uc *p = out+channel; - if (bitdepth == 16) { // input bpc - for (i = 0; i < pixelCount; i++, p += 4) - *p = (stbi_uc) (stbi__get16be(s) >> 8); - } else { - for (i = 0; i < pixelCount; i++, p += 4) - *p = stbi__get8(s); - } - } - } - } - } - - // remove weird white matte from PSD - if (channelCount >= 4) { - if (ri->bits_per_channel == 16) { - for (i=0; i < w*h; ++i) { - stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; - if (pixel[3] != 0 && pixel[3] != 65535) { - float a = pixel[3] / 65535.0f; - float ra = 1.0f / a; - float inv_a = 65535.0f * (1 - ra); - pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); - pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); - pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); - } - } - } else { - for (i=0; i < w*h; ++i) { - unsigned char *pixel = out + 4*i; - if (pixel[3] != 0 && pixel[3] != 255) { - float a = pixel[3] / 255.0f; - float ra = 1.0f / a; - float inv_a = 255.0f * (1 - ra); - pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); - pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); - pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); - } - } - } - } - - // convert to desired output format - if (req_comp && req_comp != 4) { - if (ri->bits_per_channel == 16) - out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); - else - out = stbi__convert_format(out, 4, req_comp, w, h); - if (out == NULL) return out; // stbi__convert_format frees input on failure - } - - if (comp) *comp = 4; - *y = h; - *x = w; - - return out; -} -#endif - -// ************************************************************************************************* -// Softimage PIC loader -// by Tom Seddon -// -// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format -// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ - -#ifndef STBI_NO_PIC -static int stbi__pic_is4(stbi__context *s,const char *str) -{ - int i; - for (i=0; i<4; ++i) - if (stbi__get8(s) != (stbi_uc)str[i]) - return 0; - - return 1; -} - -static int stbi__pic_test_core(stbi__context *s) -{ - int i; - - if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) - return 0; - - for(i=0;i<84;++i) - stbi__get8(s); - - if (!stbi__pic_is4(s,"PICT")) - return 0; - - return 1; -} - -typedef struct -{ - stbi_uc size,type,channel; -} stbi__pic_packet; - -static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) -{ - int mask=0x80, i; - - for (i=0; i<4; ++i, mask>>=1) { - if (channel & mask) { - if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); - dest[i]=stbi__get8(s); - } - } - - return dest; -} - -static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) -{ - int mask=0x80,i; - - for (i=0;i<4; ++i, mask>>=1) - if (channel&mask) - dest[i]=src[i]; -} - -static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) -{ - int act_comp=0,num_packets=0,y,chained; - stbi__pic_packet packets[10]; - - // this will (should...) cater for even some bizarre stuff like having data - // for the same channel in multiple packets. - do { - stbi__pic_packet *packet; - - if (num_packets==sizeof(packets)/sizeof(packets[0])) - return stbi__errpuc("bad format","too many packets"); - - packet = &packets[num_packets++]; - - chained = stbi__get8(s); - packet->size = stbi__get8(s); - packet->type = stbi__get8(s); - packet->channel = stbi__get8(s); - - act_comp |= packet->channel; - - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); - if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); - } while (chained); - - *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? - - for(y=0; ytype) { - default: - return stbi__errpuc("bad format","packet has bad compression type"); - - case 0: {//uncompressed - int x; - - for(x=0;xchannel,dest)) - return 0; - break; - } - - case 1://Pure RLE - { - int left=width, i; - - while (left>0) { - stbi_uc count,value[4]; - - count=stbi__get8(s); - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); - - if (count > left) - count = (stbi_uc) left; - - if (!stbi__readval(s,packet->channel,value)) return 0; - - for(i=0; ichannel,dest,value); - left -= count; - } - } - break; - - case 2: {//Mixed RLE - int left=width; - while (left>0) { - int count = stbi__get8(s), i; - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); - - if (count >= 128) { // Repeated - stbi_uc value[4]; - - if (count==128) - count = stbi__get16be(s); - else - count -= 127; - if (count > left) - return stbi__errpuc("bad file","scanline overrun"); - - if (!stbi__readval(s,packet->channel,value)) - return 0; - - for(i=0;ichannel,dest,value); - } else { // Raw - ++count; - if (count>left) return stbi__errpuc("bad file","scanline overrun"); - - for(i=0;ichannel,dest)) - return 0; - } - left-=count; - } - break; - } - } - } - } - - return result; -} - -static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) -{ - stbi_uc *result; - int i, x,y, internal_comp; - STBI_NOTUSED(ri); - - if (!comp) comp = &internal_comp; - - for (i=0; i<92; ++i) - stbi__get8(s); - - x = stbi__get16be(s); - y = stbi__get16be(s); - if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); - if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); - - stbi__get32be(s); //skip `ratio' - stbi__get16be(s); //skip `fields' - stbi__get16be(s); //skip `pad' - - // intermediate buffer is RGBA - result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); - memset(result, 0xff, x*y*4); - - if (!stbi__pic_load_core(s,x,y,comp, result)) { - STBI_FREE(result); - result=0; - } - *px = x; - *py = y; - if (req_comp == 0) req_comp = *comp; - result=stbi__convert_format(result,4,req_comp,x,y); - - return result; -} - -static int stbi__pic_test(stbi__context *s) -{ - int r = stbi__pic_test_core(s); - stbi__rewind(s); - return r; -} -#endif - -// ************************************************************************************************* -// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb - -#ifndef STBI_NO_GIF -typedef struct -{ - stbi__int16 prefix; - stbi_uc first; - stbi_uc suffix; -} stbi__gif_lzw; - -typedef struct -{ - int w,h; - stbi_uc *out, *old_out; // output buffer (always 4 components) - int flags, bgindex, ratio, transparent, eflags, delay; - stbi_uc pal[256][4]; - stbi_uc lpal[256][4]; - stbi__gif_lzw codes[4096]; - stbi_uc *color_table; - int parse, step; - int lflags; - int start_x, start_y; - int max_x, max_y; - int cur_x, cur_y; - int line_size; -} stbi__gif; - -static int stbi__gif_test_raw(stbi__context *s) -{ - int sz; - if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; - sz = stbi__get8(s); - if (sz != '9' && sz != '7') return 0; - if (stbi__get8(s) != 'a') return 0; - return 1; -} - -static int stbi__gif_test(stbi__context *s) -{ - int r = stbi__gif_test_raw(s); - stbi__rewind(s); - return r; -} - -static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) -{ - int i; - for (i=0; i < num_entries; ++i) { - pal[i][2] = stbi__get8(s); - pal[i][1] = stbi__get8(s); - pal[i][0] = stbi__get8(s); - pal[i][3] = transp == i ? 0 : 255; - } -} - -static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) -{ - stbi_uc version; - if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') - return stbi__err("not GIF", "Corrupt GIF"); - - version = stbi__get8(s); - if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); - if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); - - stbi__g_failure_reason = ""; - g->w = stbi__get16le(s); - g->h = stbi__get16le(s); - g->flags = stbi__get8(s); - g->bgindex = stbi__get8(s); - g->ratio = stbi__get8(s); - g->transparent = -1; - - if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments - - if (is_info) return 1; - - if (g->flags & 0x80) - stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); - - return 1; -} - -static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) -{ - stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); - if (!stbi__gif_header(s, g, comp, 1)) { - STBI_FREE(g); - stbi__rewind( s ); - return 0; - } - if (x) *x = g->w; - if (y) *y = g->h; - STBI_FREE(g); - return 1; -} - -static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) -{ - stbi_uc *p, *c; - - // recurse to decode the prefixes, since the linked-list is backwards, - // and working backwards through an interleaved image would be nasty - if (g->codes[code].prefix >= 0) - stbi__out_gif_code(g, g->codes[code].prefix); - - if (g->cur_y >= g->max_y) return; - - p = &g->out[g->cur_x + g->cur_y]; - c = &g->color_table[g->codes[code].suffix * 4]; - - if (c[3] >= 128) { - p[0] = c[2]; - p[1] = c[1]; - p[2] = c[0]; - p[3] = c[3]; - } - g->cur_x += 4; - - if (g->cur_x >= g->max_x) { - g->cur_x = g->start_x; - g->cur_y += g->step; - - while (g->cur_y >= g->max_y && g->parse > 0) { - g->step = (1 << g->parse) * g->line_size; - g->cur_y = g->start_y + (g->step >> 1); - --g->parse; - } - } -} - -static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) -{ - stbi_uc lzw_cs; - stbi__int32 len, init_code; - stbi__uint32 first; - stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; - stbi__gif_lzw *p; - - lzw_cs = stbi__get8(s); - if (lzw_cs > 12) return NULL; - clear = 1 << lzw_cs; - first = 1; - codesize = lzw_cs + 1; - codemask = (1 << codesize) - 1; - bits = 0; - valid_bits = 0; - for (init_code = 0; init_code < clear; init_code++) { - g->codes[init_code].prefix = -1; - g->codes[init_code].first = (stbi_uc) init_code; - g->codes[init_code].suffix = (stbi_uc) init_code; - } - - // support no starting clear code - avail = clear+2; - oldcode = -1; - - len = 0; - for(;;) { - if (valid_bits < codesize) { - if (len == 0) { - len = stbi__get8(s); // start new block - if (len == 0) - return g->out; - } - --len; - bits |= (stbi__int32) stbi__get8(s) << valid_bits; - valid_bits += 8; - } else { - stbi__int32 code = bits & codemask; - bits >>= codesize; - valid_bits -= codesize; - // @OPTIMIZE: is there some way we can accelerate the non-clear path? - if (code == clear) { // clear code - codesize = lzw_cs + 1; - codemask = (1 << codesize) - 1; - avail = clear + 2; - oldcode = -1; - first = 0; - } else if (code == clear + 1) { // end of stream code - stbi__skip(s, len); - while ((len = stbi__get8(s)) > 0) - stbi__skip(s,len); - return g->out; - } else if (code <= avail) { - if (first) return stbi__errpuc("no clear code", "Corrupt GIF"); - - if (oldcode >= 0) { - p = &g->codes[avail++]; - if (avail > 4096) return stbi__errpuc("too many codes", "Corrupt GIF"); - p->prefix = (stbi__int16) oldcode; - p->first = g->codes[oldcode].first; - p->suffix = (code == avail) ? p->first : g->codes[code].first; - } else if (code == avail) - return stbi__errpuc("illegal code in raster", "Corrupt GIF"); - - stbi__out_gif_code(g, (stbi__uint16) code); - - if ((avail & codemask) == 0 && avail <= 0x0FFF) { - codesize++; - codemask = (1 << codesize) - 1; - } - - oldcode = code; - } else { - return stbi__errpuc("illegal code in raster", "Corrupt GIF"); - } - } - } -} - -static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1) -{ - int x, y; - stbi_uc *c = g->pal[g->bgindex]; - for (y = y0; y < y1; y += 4 * g->w) { - for (x = x0; x < x1; x += 4) { - stbi_uc *p = &g->out[y + x]; - p[0] = c[2]; - p[1] = c[1]; - p[2] = c[0]; - p[3] = 0; - } - } -} - -// this function is designed to support animated gifs, although stb_image doesn't support it -static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp) -{ - int i; - stbi_uc *prev_out = 0; - - if (g->out == 0 && !stbi__gif_header(s, g, comp,0)) - return 0; // stbi__g_failure_reason set by stbi__gif_header - - if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0)) - return stbi__errpuc("too large", "GIF too large"); - - prev_out = g->out; - g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0); - if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory"); - - switch ((g->eflags & 0x1C) >> 2) { - case 0: // unspecified (also always used on 1st frame) - stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h); - break; - case 1: // do not dispose - if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h); - g->old_out = prev_out; - break; - case 2: // dispose to background - if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h); - stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y); - break; - case 3: // dispose to previous - if (g->old_out) { - for (i = g->start_y; i < g->max_y; i += 4 * g->w) - memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x); - } - break; - } - - for (;;) { - switch (stbi__get8(s)) { - case 0x2C: /* Image Descriptor */ - { - int prev_trans = -1; - stbi__int32 x, y, w, h; - stbi_uc *o; - - x = stbi__get16le(s); - y = stbi__get16le(s); - w = stbi__get16le(s); - h = stbi__get16le(s); - if (((x + w) > (g->w)) || ((y + h) > (g->h))) - return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); - - g->line_size = g->w * 4; - g->start_x = x * 4; - g->start_y = y * g->line_size; - g->max_x = g->start_x + w * 4; - g->max_y = g->start_y + h * g->line_size; - g->cur_x = g->start_x; - g->cur_y = g->start_y; - - g->lflags = stbi__get8(s); - - if (g->lflags & 0x40) { - g->step = 8 * g->line_size; // first interlaced spacing - g->parse = 3; - } else { - g->step = g->line_size; - g->parse = 0; - } - - if (g->lflags & 0x80) { - stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); - g->color_table = (stbi_uc *) g->lpal; - } else if (g->flags & 0x80) { - if (g->transparent >= 0 && (g->eflags & 0x01)) { - prev_trans = g->pal[g->transparent][3]; - g->pal[g->transparent][3] = 0; - } - g->color_table = (stbi_uc *) g->pal; - } else - return stbi__errpuc("missing color table", "Corrupt GIF"); - - o = stbi__process_gif_raster(s, g); - if (o == NULL) return NULL; - - if (prev_trans != -1) - g->pal[g->transparent][3] = (stbi_uc) prev_trans; - - return o; - } - - case 0x21: // Comment Extension. - { - int len; - if (stbi__get8(s) == 0xF9) { // Graphic Control Extension. - len = stbi__get8(s); - if (len == 4) { - g->eflags = stbi__get8(s); - g->delay = stbi__get16le(s); - g->transparent = stbi__get8(s); - } else { - stbi__skip(s, len); - break; - } - } - while ((len = stbi__get8(s)) != 0) - stbi__skip(s, len); - break; - } - - case 0x3B: // gif stream termination code - return (stbi_uc *) s; // using '1' causes warning on some compilers - - default: - return stbi__errpuc("unknown code", "Corrupt GIF"); - } - } - - STBI_NOTUSED(req_comp); -} - -static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - stbi_uc *u = 0; - stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); - memset(g, 0, sizeof(*g)); - STBI_NOTUSED(ri); - - u = stbi__gif_load_next(s, g, comp, req_comp); - if (u == (stbi_uc *) s) u = 0; // end of animated gif marker - if (u) { - *x = g->w; - *y = g->h; - if (req_comp && req_comp != 4) - u = stbi__convert_format(u, 4, req_comp, g->w, g->h); - } - else if (g->out) - STBI_FREE(g->out); - STBI_FREE(g); - return u; -} - -static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) -{ - return stbi__gif_info_raw(s,x,y,comp); -} -#endif - -// ************************************************************************************************* -// Radiance RGBE HDR loader -// originally by Nicolas Schulz -#ifndef STBI_NO_HDR -static int stbi__hdr_test_core(stbi__context *s, const char *signature) -{ - int i; - for (i=0; signature[i]; ++i) - if (stbi__get8(s) != signature[i]) - return 0; - stbi__rewind(s); - return 1; -} - -static int stbi__hdr_test(stbi__context* s) -{ - int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); - stbi__rewind(s); - if(!r) { - r = stbi__hdr_test_core(s, "#?RGBE\n"); - stbi__rewind(s); - } - return r; -} - -#define STBI__HDR_BUFLEN 1024 -static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) -{ - int len=0; - char c = '\0'; - - c = (char) stbi__get8(z); - - while (!stbi__at_eof(z) && c != '\n') { - buffer[len++] = c; - if (len == STBI__HDR_BUFLEN-1) { - // flush to end of line - while (!stbi__at_eof(z) && stbi__get8(z) != '\n') - ; - break; - } - c = (char) stbi__get8(z); - } - - buffer[len] = 0; - return buffer; -} - -static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) -{ - if ( input[3] != 0 ) { - float f1; - // Exponent - f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); - if (req_comp <= 2) - output[0] = (input[0] + input[1] + input[2]) * f1 / 3; - else { - output[0] = input[0] * f1; - output[1] = input[1] * f1; - output[2] = input[2] * f1; - } - if (req_comp == 2) output[1] = 1; - if (req_comp == 4) output[3] = 1; - } else { - switch (req_comp) { - case 4: output[3] = 1; /* fallthrough */ - case 3: output[0] = output[1] = output[2] = 0; - break; - case 2: output[1] = 1; /* fallthrough */ - case 1: output[0] = 0; - break; - } - } -} - -static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - char buffer[STBI__HDR_BUFLEN]; - char *token; - int valid = 0; - int width, height; - stbi_uc *scanline; - float *hdr_data; - int len; - unsigned char count, value; - int i, j, k, c1,c2, z; - const char *headerToken; - STBI_NOTUSED(ri); - - // Check identifier - headerToken = stbi__hdr_gettoken(s,buffer); - if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) - return stbi__errpf("not HDR", "Corrupt HDR image"); - - // Parse header - for(;;) { - token = stbi__hdr_gettoken(s,buffer); - if (token[0] == 0) break; - if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; - } - - if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); - - // Parse width and height - // can't use sscanf() if we're not using stdio! - token = stbi__hdr_gettoken(s,buffer); - if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); - token += 3; - height = (int) strtol(token, &token, 10); - while (*token == ' ') ++token; - if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); - token += 3; - width = (int) strtol(token, NULL, 10); - - *x = width; - *y = height; - - if (comp) *comp = 3; - if (req_comp == 0) req_comp = 3; - - if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) - return stbi__errpf("too large", "HDR image is too large"); - - // Read data - hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); - if (!hdr_data) - return stbi__errpf("outofmem", "Out of memory"); - - // Load image data - // image data is stored as some number of sca - if ( width < 8 || width >= 32768) { - // Read flat data - for (j=0; j < height; ++j) { - for (i=0; i < width; ++i) { - stbi_uc rgbe[4]; - main_decode_loop: - stbi__getn(s, rgbe, 4); - stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); - } - } - } else { - // Read RLE-encoded data - scanline = NULL; - - for (j = 0; j < height; ++j) { - c1 = stbi__get8(s); - c2 = stbi__get8(s); - len = stbi__get8(s); - if (c1 != 2 || c2 != 2 || (len & 0x80)) { - // not run-length encoded, so we have to actually use THIS data as a decoded - // pixel (note this can't be a valid pixel--one of RGB must be >= 128) - stbi_uc rgbe[4]; - rgbe[0] = (stbi_uc) c1; - rgbe[1] = (stbi_uc) c2; - rgbe[2] = (stbi_uc) len; - rgbe[3] = (stbi_uc) stbi__get8(s); - stbi__hdr_convert(hdr_data, rgbe, req_comp); - i = 1; - j = 0; - STBI_FREE(scanline); - goto main_decode_loop; // yes, this makes no sense - } - len <<= 8; - len |= stbi__get8(s); - if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } - if (scanline == NULL) { - scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); - if (!scanline) { - STBI_FREE(hdr_data); - return stbi__errpf("outofmem", "Out of memory"); - } - } - - for (k = 0; k < 4; ++k) { - int nleft; - i = 0; - while ((nleft = width - i) > 0) { - count = stbi__get8(s); - if (count > 128) { - // Run - value = stbi__get8(s); - count -= 128; - if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } - for (z = 0; z < count; ++z) - scanline[i++ * 4 + k] = value; - } else { - // Dump - if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } - for (z = 0; z < count; ++z) - scanline[i++ * 4 + k] = stbi__get8(s); - } - } - } - for (i=0; i < width; ++i) - stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); - } - if (scanline) - STBI_FREE(scanline); - } - - return hdr_data; -} - -static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) -{ - char buffer[STBI__HDR_BUFLEN]; - char *token; - int valid = 0; - int dummy; - - if (!x) x = &dummy; - if (!y) y = &dummy; - if (!comp) comp = &dummy; - - if (stbi__hdr_test(s) == 0) { - stbi__rewind( s ); - return 0; - } - - for(;;) { - token = stbi__hdr_gettoken(s,buffer); - if (token[0] == 0) break; - if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; - } - - if (!valid) { - stbi__rewind( s ); - return 0; - } - token = stbi__hdr_gettoken(s,buffer); - if (strncmp(token, "-Y ", 3)) { - stbi__rewind( s ); - return 0; - } - token += 3; - *y = (int) strtol(token, &token, 10); - while (*token == ' ') ++token; - if (strncmp(token, "+X ", 3)) { - stbi__rewind( s ); - return 0; - } - token += 3; - *x = (int) strtol(token, NULL, 10); - *comp = 3; - return 1; -} -#endif // STBI_NO_HDR - -#ifndef STBI_NO_BMP -static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) -{ - void *p; - stbi__bmp_data info; - - info.all_a = 255; - p = stbi__bmp_parse_header(s, &info); - stbi__rewind( s ); - if (p == NULL) - return 0; - if (x) *x = s->img_x; - if (y) *y = s->img_y; - if (comp) *comp = info.ma ? 4 : 3; - return 1; -} -#endif - -#ifndef STBI_NO_PSD -static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) -{ - int channelCount, dummy; - if (!x) x = &dummy; - if (!y) y = &dummy; - if (!comp) comp = &dummy; - if (stbi__get32be(s) != 0x38425053) { - stbi__rewind( s ); - return 0; - } - if (stbi__get16be(s) != 1) { - stbi__rewind( s ); - return 0; - } - stbi__skip(s, 6); - channelCount = stbi__get16be(s); - if (channelCount < 0 || channelCount > 16) { - stbi__rewind( s ); - return 0; - } - *y = stbi__get32be(s); - *x = stbi__get32be(s); - if (stbi__get16be(s) != 8) { - stbi__rewind( s ); - return 0; - } - if (stbi__get16be(s) != 3) { - stbi__rewind( s ); - return 0; - } - *comp = 4; - return 1; -} -#endif - -#ifndef STBI_NO_PIC -static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) -{ - int act_comp=0,num_packets=0,chained,dummy; - stbi__pic_packet packets[10]; - - if (!x) x = &dummy; - if (!y) y = &dummy; - if (!comp) comp = &dummy; - - if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { - stbi__rewind(s); - return 0; - } - - stbi__skip(s, 88); - - *x = stbi__get16be(s); - *y = stbi__get16be(s); - if (stbi__at_eof(s)) { - stbi__rewind( s); - return 0; - } - if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { - stbi__rewind( s ); - return 0; - } - - stbi__skip(s, 8); - - do { - stbi__pic_packet *packet; - - if (num_packets==sizeof(packets)/sizeof(packets[0])) - return 0; - - packet = &packets[num_packets++]; - chained = stbi__get8(s); - packet->size = stbi__get8(s); - packet->type = stbi__get8(s); - packet->channel = stbi__get8(s); - act_comp |= packet->channel; - - if (stbi__at_eof(s)) { - stbi__rewind( s ); - return 0; - } - if (packet->size != 8) { - stbi__rewind( s ); - return 0; - } - } while (chained); - - *comp = (act_comp & 0x10 ? 4 : 3); - - return 1; -} -#endif - -// ************************************************************************************************* -// Portable Gray Map and Portable Pixel Map loader -// by Ken Miller -// -// PGM: http://netpbm.sourceforge.net/doc/pgm.html -// PPM: http://netpbm.sourceforge.net/doc/ppm.html -// -// Known limitations: -// Does not support comments in the header section -// Does not support ASCII image data (formats P2 and P3) -// Does not support 16-bit-per-channel - -#ifndef STBI_NO_PNM - -static int stbi__pnm_test(stbi__context *s) -{ - char p, t; - p = (char) stbi__get8(s); - t = (char) stbi__get8(s); - if (p != 'P' || (t != '5' && t != '6')) { - stbi__rewind( s ); - return 0; - } - return 1; -} - -static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) -{ - stbi_uc *out; - STBI_NOTUSED(ri); - - if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n)) - return 0; - - *x = s->img_x; - *y = s->img_y; - if (comp) *comp = s->img_n; - - if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0)) - return stbi__errpuc("too large", "PNM too large"); - - out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0); - if (!out) return stbi__errpuc("outofmem", "Out of memory"); - stbi__getn(s, out, s->img_n * s->img_x * s->img_y); - - if (req_comp && req_comp != s->img_n) { - out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); - if (out == NULL) return out; // stbi__convert_format frees input on failure - } - return out; -} - -static int stbi__pnm_isspace(char c) -{ - return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; -} - -static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) -{ - for (;;) { - while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) - *c = (char) stbi__get8(s); - - if (stbi__at_eof(s) || *c != '#') - break; - - while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) - *c = (char) stbi__get8(s); - } -} - -static int stbi__pnm_isdigit(char c) -{ - return c >= '0' && c <= '9'; -} - -static int stbi__pnm_getinteger(stbi__context *s, char *c) -{ - int value = 0; - - while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { - value = value*10 + (*c - '0'); - *c = (char) stbi__get8(s); - } - - return value; -} - -static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) -{ - int maxv, dummy; - char c, p, t; - - if (!x) x = &dummy; - if (!y) y = &dummy; - if (!comp) comp = &dummy; - - stbi__rewind(s); - - // Get identifier - p = (char) stbi__get8(s); - t = (char) stbi__get8(s); - if (p != 'P' || (t != '5' && t != '6')) { - stbi__rewind(s); - return 0; - } - - *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm - - c = (char) stbi__get8(s); - stbi__pnm_skip_whitespace(s, &c); - - *x = stbi__pnm_getinteger(s, &c); // read width - stbi__pnm_skip_whitespace(s, &c); - - *y = stbi__pnm_getinteger(s, &c); // read height - stbi__pnm_skip_whitespace(s, &c); - - maxv = stbi__pnm_getinteger(s, &c); // read max value - - if (maxv > 255) - return stbi__err("max value > 255", "PPM image not 8-bit"); - else - return 1; -} -#endif - -static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) -{ - #ifndef STBI_NO_JPEG - if (stbi__jpeg_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_PNG - if (stbi__png_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_GIF - if (stbi__gif_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_BMP - if (stbi__bmp_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_PSD - if (stbi__psd_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_PIC - if (stbi__pic_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_PNM - if (stbi__pnm_info(s, x, y, comp)) return 1; - #endif - - #ifndef STBI_NO_HDR - if (stbi__hdr_info(s, x, y, comp)) return 1; - #endif - - // test tga last because it's a crappy test! - #ifndef STBI_NO_TGA - if (stbi__tga_info(s, x, y, comp)) - return 1; - #endif - return stbi__err("unknown image type", "Image not of any known type, or corrupt"); -} - -#ifndef STBI_NO_STDIO -STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) -{ - FILE *f = stbi__fopen(filename, "rb"); - int result; - if (!f) return stbi__err("can't fopen", "Unable to open file"); - result = stbi_info_from_file(f, x, y, comp); - fclose(f); - return result; -} - -STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) -{ - int r; - stbi__context s; - long pos = ftell(f); - stbi__start_file(&s, f); - r = stbi__info_main(&s,x,y,comp); - fseek(f,pos,SEEK_SET); - return r; -} -#endif // !STBI_NO_STDIO - -STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) -{ - stbi__context s; - stbi__start_mem(&s,buffer,len); - return stbi__info_main(&s,x,y,comp); -} - -STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) -{ - stbi__context s; - stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); - return stbi__info_main(&s,x,y,comp); -} - -#endif // STB_IMAGE_IMPLEMENTATION - -/* - revision history: - 2.16 (2017-07-23) all functions have 16-bit variants; - STBI_NO_STDIO works again; - compilation fixes; - fix rounding in unpremultiply; - optimize vertical flip; - disable raw_len validation; - documentation fixes - 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; - warning fixes; disable run-time SSE detection on gcc; - uniform handling of optional "return" values; - thread-safe initialization of zlib tables - 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs - 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now - 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes - 2.11 (2016-04-02) allocate large structures on the stack - remove white matting for transparent PSD - fix reported channel count for PNG & BMP - re-enable SSE2 in non-gcc 64-bit - support RGB-formatted JPEG - read 16-bit PNGs (only as 8-bit) - 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED - 2.09 (2016-01-16) allow comments in PNM files - 16-bit-per-pixel TGA (not bit-per-component) - info() for TGA could break due to .hdr handling - info() for BMP to shares code instead of sloppy parse - can use STBI_REALLOC_SIZED if allocator doesn't support realloc - code cleanup - 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA - 2.07 (2015-09-13) fix compiler warnings - partial animated GIF support - limited 16-bpc PSD support - #ifdef unused functions - bug with < 92 byte PIC,PNM,HDR,TGA - 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value - 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning - 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit - 2.03 (2015-04-12) extra corruption checking (mmozeiko) - stbi_set_flip_vertically_on_load (nguillemot) - fix NEON support; fix mingw support - 2.02 (2015-01-19) fix incorrect assert, fix warning - 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 - 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG - 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) - progressive JPEG (stb) - PGM/PPM support (Ken Miller) - STBI_MALLOC,STBI_REALLOC,STBI_FREE - GIF bugfix -- seemingly never worked - STBI_NO_*, STBI_ONLY_* - 1.48 (2014-12-14) fix incorrectly-named assert() - 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) - optimize PNG (ryg) - fix bug in interlaced PNG with user-specified channel count (stb) - 1.46 (2014-08-26) - fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG - 1.45 (2014-08-16) - fix MSVC-ARM internal compiler error by wrapping malloc - 1.44 (2014-08-07) - various warning fixes from Ronny Chevalier - 1.43 (2014-07-15) - fix MSVC-only compiler problem in code changed in 1.42 - 1.42 (2014-07-09) - don't define _CRT_SECURE_NO_WARNINGS (affects user code) - fixes to stbi__cleanup_jpeg path - added STBI_ASSERT to avoid requiring assert.h - 1.41 (2014-06-25) - fix search&replace from 1.36 that messed up comments/error messages - 1.40 (2014-06-22) - fix gcc struct-initialization warning - 1.39 (2014-06-15) - fix to TGA optimization when req_comp != number of components in TGA; - fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) - add support for BMP version 5 (more ignored fields) - 1.38 (2014-06-06) - suppress MSVC warnings on integer casts truncating values - fix accidental rename of 'skip' field of I/O - 1.37 (2014-06-04) - remove duplicate typedef - 1.36 (2014-06-03) - convert to header file single-file library - if de-iphone isn't set, load iphone images color-swapped instead of returning NULL - 1.35 (2014-05-27) - various warnings - fix broken STBI_SIMD path - fix bug where stbi_load_from_file no longer left file pointer in correct place - fix broken non-easy path for 32-bit BMP (possibly never used) - TGA optimization by Arseny Kapoulkine - 1.34 (unknown) - use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case - 1.33 (2011-07-14) - make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements - 1.32 (2011-07-13) - support for "info" function for all supported filetypes (SpartanJ) - 1.31 (2011-06-20) - a few more leak fixes, bug in PNG handling (SpartanJ) - 1.30 (2011-06-11) - added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) - removed deprecated format-specific test/load functions - removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway - error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) - fix inefficiency in decoding 32-bit BMP (David Woo) - 1.29 (2010-08-16) - various warning fixes from Aurelien Pocheville - 1.28 (2010-08-01) - fix bug in GIF palette transparency (SpartanJ) - 1.27 (2010-08-01) - cast-to-stbi_uc to fix warnings - 1.26 (2010-07-24) - fix bug in file buffering for PNG reported by SpartanJ - 1.25 (2010-07-17) - refix trans_data warning (Won Chun) - 1.24 (2010-07-12) - perf improvements reading from files on platforms with lock-heavy fgetc() - minor perf improvements for jpeg - deprecated type-specific functions so we'll get feedback if they're needed - attempt to fix trans_data warning (Won Chun) - 1.23 fixed bug in iPhone support - 1.22 (2010-07-10) - removed image *writing* support - stbi_info support from Jetro Lauha - GIF support from Jean-Marc Lienher - iPhone PNG-extensions from James Brown - warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) - 1.21 fix use of 'stbi_uc' in header (reported by jon blow) - 1.20 added support for Softimage PIC, by Tom Seddon - 1.19 bug in interlaced PNG corruption check (found by ryg) - 1.18 (2008-08-02) - fix a threading bug (local mutable static) - 1.17 support interlaced PNG - 1.16 major bugfix - stbi__convert_format converted one too many pixels - 1.15 initialize some fields for thread safety - 1.14 fix threadsafe conversion bug - header-file-only version (#define STBI_HEADER_FILE_ONLY before including) - 1.13 threadsafe - 1.12 const qualifiers in the API - 1.11 Support installable IDCT, colorspace conversion routines - 1.10 Fixes for 64-bit (don't use "unsigned long") - optimized upsampling by Fabian "ryg" Giesen - 1.09 Fix format-conversion for PSD code (bad global variables!) - 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz - 1.07 attempt to fix C++ warning/errors again - 1.06 attempt to fix C++ warning/errors again - 1.05 fix TGA loading to return correct *comp and use good luminance calc - 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free - 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR - 1.02 support for (subset of) HDR files, float interface for preferred access to them - 1.01 fix bug: possible bug in handling right-side up bmps... not sure - fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all - 1.00 interface to zlib that skips zlib header - 0.99 correct handling of alpha in palette - 0.98 TGA loader by lonesock; dynamically add loaders (untested) - 0.97 jpeg errors on too large a file; also catch another malloc failure - 0.96 fix detection of invalid v value - particleman@mollyrocket forum - 0.95 during header scan, seek to markers in case of padding - 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same - 0.93 handle jpegtran output; verbose errors - 0.92 read 4,8,16,24,32-bit BMP files of several formats - 0.91 output 24-bit Windows 3.0 BMP files - 0.90 fix a few more warnings; bump version number to approach 1.0 - 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd - 0.60 fix compiling as c++ - 0.59 fix warnings: merge Dave Moore's -Wall fixes - 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian - 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available - 0.56 fix bug: zlib uncompressed mode len vs. nlen - 0.55 fix bug: restart_interval not initialized to 0 - 0.54 allow NULL for 'int *comp' - 0.53 fix bug in png 3->4; speedup png decoding - 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments - 0.51 obey req_comp requests, 1-component jpegs return as 1-component, - on 'test' only check type, not whether we support this variant - 0.50 (2006-11-19) - first released version -*/ - - -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License -Copyright (c) 2017 Sean Barrett -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/ diff --git a/src/Detector/darknet/3rdparty/stb/include/stb_image_write.h b/src/Detector/darknet/3rdparty/stb/include/stb_image_write.h deleted file mode 100644 index 274b1d81c..000000000 --- a/src/Detector/darknet/3rdparty/stb/include/stb_image_write.h +++ /dev/null @@ -1,1458 +0,0 @@ -/* stb_image_write - v1.07 - public domain - http://nothings.org/stb/stb_image_write.h - writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 - no warranty implied; use at your own risk - - Before #including, - - #define STB_IMAGE_WRITE_IMPLEMENTATION - - in the file that you want to have the implementation. - - Will probably not work correctly with strict-aliasing optimizations. - -ABOUT: - - This header file is a library for writing images to C stdio. It could be - adapted to write to memory or a general streaming interface; let me know. - - The PNG output is not optimal; it is 20-50% larger than the file - written by a decent optimizing implementation. This library is designed - for source code compactness and simplicity, not optimal image file size - or run-time performance. - -BUILDING: - - You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. - You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace - malloc,realloc,free. - You can define STBIW_MEMMOVE() to replace memmove() - -USAGE: - - There are four functions, one for each image file format: - - int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); - int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); - int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); - int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); - int stbi_write_jpg(char const *filename, int w, int h, int comp, const float *data); - - There are also four equivalent functions that use an arbitrary write function. You are - expected to open/close your file-equivalent before and after calling these: - - int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); - int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); - int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); - int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); - int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); - - where the callback is: - void stbi_write_func(void *context, void *data, int size); - - You can define STBI_WRITE_NO_STDIO to disable the file variant of these - functions, so the library will not use stdio.h at all. However, this will - also disable HDR writing, because it requires stdio for formatted output. - - Each function returns 0 on failure and non-0 on success. - - The functions create an image file defined by the parameters. The image - is a rectangle of pixels stored from left-to-right, top-to-bottom. - Each pixel contains 'comp' channels of data stored interleaved with 8-bits - per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is - monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. - The *data pointer points to the first byte of the top-left-most pixel. - For PNG, "stride_in_bytes" is the distance in bytes from the first byte of - a row of pixels to the first byte of the next row of pixels. - - PNG creates output files with the same number of components as the input. - The BMP format expands Y to RGB in the file format and does not - output alpha. - - PNG supports writing rectangles of data even when the bytes storing rows of - data are not consecutive in memory (e.g. sub-rectangles of a larger image), - by supplying the stride between the beginning of adjacent rows. The other - formats do not. (Thus you cannot write a native-format BMP through the BMP - writer, both because it is in BGR order and because it may have padding - at the end of the line.) - - HDR expects linear float data. Since the format is always 32-bit rgb(e) - data, alpha (if provided) is discarded, and for monochrome data it is - replicated across all three channels. - - TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed - data, set the global variable 'stbi_write_tga_with_rle' to 0. - - JPEG does ignore alpha channels in input data; quality is between 1 and 100. - Higher quality looks better but results in a bigger image. - JPEG baseline (no JPEG progressive). - -CREDITS: - - PNG/BMP/TGA - Sean Barrett - HDR - Baldur Karlsson - TGA monochrome: - Jean-Sebastien Guay - misc enhancements: - Tim Kelsey - TGA RLE - Alan Hickman - initial file IO callback implementation - Emmanuel Julien - JPEG - Jon Olick (original jo_jpeg.cpp code) - Daniel Gibson - bugfixes: - github:Chribba - Guillaume Chereau - github:jry2 - github:romigrou - Sergio Gonzalez - Jonas Karlsson - Filip Wasil - Thatcher Ulrich - github:poppolopoppo - Patrick Boettcher - -LICENSE - - See end of file for license information. - -*/ - -#ifndef INCLUDE_STB_IMAGE_WRITE_H -#define INCLUDE_STB_IMAGE_WRITE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef STB_IMAGE_WRITE_STATIC -#define STBIWDEF static -#else -#define STBIWDEF extern -extern int stbi_write_tga_with_rle; -#endif - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); -STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); -STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); -STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); -STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); -#endif - -typedef void stbi_write_func(void *context, void *data, int size); - -STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); -STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); -STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); -STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); -STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); - -#ifdef __cplusplus -} -#endif - -#endif//INCLUDE_STB_IMAGE_WRITE_H - -#ifdef STB_IMAGE_WRITE_IMPLEMENTATION - -#ifdef _WIN32 - #ifndef _CRT_SECURE_NO_WARNINGS - #define _CRT_SECURE_NO_WARNINGS - #endif - #ifndef _CRT_NONSTDC_NO_DEPRECATE - #define _CRT_NONSTDC_NO_DEPRECATE - #endif -#endif - -#ifndef STBI_WRITE_NO_STDIO -#include -#endif // STBI_WRITE_NO_STDIO - -#include -#include -#include -#include - -#if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) -// ok -#elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) -// ok -#else -#error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." -#endif - -#ifndef STBIW_MALLOC -#define STBIW_MALLOC(sz) malloc(sz) -#define STBIW_REALLOC(p,newsz) realloc(p,newsz) -#define STBIW_FREE(p) free(p) -#endif - -#ifndef STBIW_REALLOC_SIZED -#define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) -#endif - - -#ifndef STBIW_MEMMOVE -#define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) -#endif - - -#ifndef STBIW_ASSERT -#include -#define STBIW_ASSERT(x) assert(x) -#endif - -#define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) - -typedef struct -{ - stbi_write_func *func; - void *context; -} stbi__write_context; - -// initialize a callback-based context -static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) -{ - s->func = c; - s->context = context; -} - -#ifndef STBI_WRITE_NO_STDIO - -static void stbi__stdio_write(void *context, void *data, int size) -{ - fwrite(data,1,size,(FILE*) context); -} - -static int stbi__start_write_file(stbi__write_context *s, const char *filename) -{ - FILE *f = fopen(filename, "wb"); - stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); - return f != NULL; -} - -static void stbi__end_write_file(stbi__write_context *s) -{ - fclose((FILE *)s->context); -} - -#endif // !STBI_WRITE_NO_STDIO - -typedef unsigned int stbiw_uint32; -typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; - -#ifdef STB_IMAGE_WRITE_STATIC -static int stbi_write_tga_with_rle = 1; -#else -int stbi_write_tga_with_rle = 1; -#endif - -static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) -{ - while (*fmt) { - switch (*fmt++) { - case ' ': break; - case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); - s->func(s->context,&x,1); - break; } - case '2': { int x = va_arg(v,int); - unsigned char b[2]; - b[0] = STBIW_UCHAR(x); - b[1] = STBIW_UCHAR(x>>8); - s->func(s->context,b,2); - break; } - case '4': { stbiw_uint32 x = va_arg(v,int); - unsigned char b[4]; - b[0]=STBIW_UCHAR(x); - b[1]=STBIW_UCHAR(x>>8); - b[2]=STBIW_UCHAR(x>>16); - b[3]=STBIW_UCHAR(x>>24); - s->func(s->context,b,4); - break; } - default: - STBIW_ASSERT(0); - return; - } - } -} - -static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) -{ - va_list v; - va_start(v, fmt); - stbiw__writefv(s, fmt, v); - va_end(v); -} - -static void stbiw__putc(stbi__write_context *s, unsigned char c) -{ - s->func(s->context, &c, 1); -} - -static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) -{ - unsigned char arr[3]; - arr[0] = a, arr[1] = b, arr[2] = c; - s->func(s->context, arr, 3); -} - -static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) -{ - unsigned char bg[3] = { 255, 0, 255}, px[3]; - int k; - - if (write_alpha < 0) - s->func(s->context, &d[comp - 1], 1); - - switch (comp) { - case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case - case 1: - if (expand_mono) - stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp - else - s->func(s->context, d, 1); // monochrome TGA - break; - case 4: - if (!write_alpha) { - // composite against pink background - for (k = 0; k < 3; ++k) - px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; - stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); - break; - } - /* FALLTHROUGH */ - case 3: - stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); - break; - } - if (write_alpha > 0) - s->func(s->context, &d[comp - 1], 1); -} - -static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) -{ - stbiw_uint32 zero = 0; - int i,j, j_end; - - if (y <= 0) - return; - - if (vdir < 0) - j_end = -1, j = y-1; - else - j_end = y, j = 0; - - for (; j != j_end; j += vdir) { - for (i=0; i < x; ++i) { - unsigned char *d = (unsigned char *) data + (j*x+i)*comp; - stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); - } - s->func(s->context, &zero, scanline_pad); - } -} - -static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) -{ - if (y < 0 || x < 0) { - return 0; - } else { - va_list v; - va_start(v, fmt); - stbiw__writefv(s, fmt, v); - va_end(v); - stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); - return 1; - } -} - -static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) -{ - int pad = (-x*3) & 3; - return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, - "11 4 22 4" "4 44 22 444444", - 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header - 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header -} - -STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) -{ - stbi__write_context s; - stbi__start_write_callbacks(&s, func, context); - return stbi_write_bmp_core(&s, x, y, comp, data); -} - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) -{ - stbi__write_context s; - if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_bmp_core(&s, x, y, comp, data); - stbi__end_write_file(&s); - return r; - } else - return 0; -} -#endif //!STBI_WRITE_NO_STDIO - -static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) -{ - int has_alpha = (comp == 2 || comp == 4); - int colorbytes = has_alpha ? comp-1 : comp; - int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 - - if (y < 0 || x < 0) - return 0; - - if (!stbi_write_tga_with_rle) { - return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, - "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); - } else { - int i,j,k; - - stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); - - for (j = y - 1; j >= 0; --j) { - unsigned char *row = (unsigned char *) data + j * x * comp; - int len; - - for (i = 0; i < x; i += len) { - unsigned char *begin = row + i * comp; - int diff = 1; - len = 1; - - if (i < x - 1) { - ++len; - diff = memcmp(begin, row + (i + 1) * comp, comp); - if (diff) { - const unsigned char *prev = begin; - for (k = i + 2; k < x && len < 128; ++k) { - if (memcmp(prev, row + k * comp, comp)) { - prev += comp; - ++len; - } else { - --len; - break; - } - } - } else { - for (k = i + 2; k < x && len < 128; ++k) { - if (!memcmp(begin, row + k * comp, comp)) { - ++len; - } else { - break; - } - } - } - } - - if (diff) { - unsigned char header = STBIW_UCHAR(len - 1); - s->func(s->context, &header, 1); - for (k = 0; k < len; ++k) { - stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); - } - } else { - unsigned char header = STBIW_UCHAR(len - 129); - s->func(s->context, &header, 1); - stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); - } - } - } - } - return 1; -} - -STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) -{ - stbi__write_context s; - stbi__start_write_callbacks(&s, func, context); - return stbi_write_tga_core(&s, x, y, comp, (void *) data); -} - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) -{ - stbi__write_context s; - if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); - stbi__end_write_file(&s); - return r; - } else - return 0; -} -#endif - -// ************************************************************************************************* -// Radiance RGBE HDR writer -// by Baldur Karlsson - -#define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) - -void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) -{ - int exponent; - float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); - - if (maxcomp < 1e-32f) { - rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; - } else { - float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; - - rgbe[0] = (unsigned char)(linear[0] * normalize); - rgbe[1] = (unsigned char)(linear[1] * normalize); - rgbe[2] = (unsigned char)(linear[2] * normalize); - rgbe[3] = (unsigned char)(exponent + 128); - } -} - -void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) -{ - unsigned char lengthbyte = STBIW_UCHAR(length+128); - STBIW_ASSERT(length+128 <= 255); - s->func(s->context, &lengthbyte, 1); - s->func(s->context, &databyte, 1); -} - -void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) -{ - unsigned char lengthbyte = STBIW_UCHAR(length); - STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code - s->func(s->context, &lengthbyte, 1); - s->func(s->context, data, length); -} - -void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) -{ - unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; - unsigned char rgbe[4]; - float linear[3]; - int x; - - scanlineheader[2] = (width&0xff00)>>8; - scanlineheader[3] = (width&0x00ff); - - /* skip RLE for images too small or large */ - if (width < 8 || width >= 32768) { - for (x=0; x < width; x++) { - switch (ncomp) { - case 4: /* fallthrough */ - case 3: linear[2] = scanline[x*ncomp + 2]; - linear[1] = scanline[x*ncomp + 1]; - linear[0] = scanline[x*ncomp + 0]; - break; - default: - linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; - break; - } - stbiw__linear_to_rgbe(rgbe, linear); - s->func(s->context, rgbe, 4); - } - } else { - int c,r; - /* encode into scratch buffer */ - for (x=0; x < width; x++) { - switch(ncomp) { - case 4: /* fallthrough */ - case 3: linear[2] = scanline[x*ncomp + 2]; - linear[1] = scanline[x*ncomp + 1]; - linear[0] = scanline[x*ncomp + 0]; - break; - default: - linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; - break; - } - stbiw__linear_to_rgbe(rgbe, linear); - scratch[x + width*0] = rgbe[0]; - scratch[x + width*1] = rgbe[1]; - scratch[x + width*2] = rgbe[2]; - scratch[x + width*3] = rgbe[3]; - } - - s->func(s->context, scanlineheader, 4); - - /* RLE each component separately */ - for (c=0; c < 4; c++) { - unsigned char *comp = &scratch[width*c]; - - x = 0; - while (x < width) { - // find first run - r = x; - while (r+2 < width) { - if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) - break; - ++r; - } - if (r+2 >= width) - r = width; - // dump up to first run - while (x < r) { - int len = r-x; - if (len > 128) len = 128; - stbiw__write_dump_data(s, len, &comp[x]); - x += len; - } - // if there's a run, output it - if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd - // find next byte after run - while (r < width && comp[r] == comp[x]) - ++r; - // output run up to r - while (x < r) { - int len = r-x; - if (len > 127) len = 127; - stbiw__write_run_data(s, len, comp[x]); - x += len; - } - } - } - } - } -} - -static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) -{ - if (y <= 0 || x <= 0 || data == NULL) - return 0; - else { - // Each component is stored separately. Allocate scratch space for full output scanline. - unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); - int i, len; - char buffer[128]; - char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; - s->func(s->context, header, sizeof(header)-1); - - len = sprintf(buffer, "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); - s->func(s->context, buffer, len); - - for(i=0; i < y; i++) - stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*i*x); - STBIW_FREE(scratch); - return 1; - } -} - -STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) -{ - stbi__write_context s; - stbi__start_write_callbacks(&s, func, context); - return stbi_write_hdr_core(&s, x, y, comp, (float *) data); -} - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) -{ - stbi__write_context s; - if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); - stbi__end_write_file(&s); - return r; - } else - return 0; -} -#endif // STBI_WRITE_NO_STDIO - - -////////////////////////////////////////////////////////////////////////////// -// -// PNG writer -// - -// stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() -#define stbiw__sbraw(a) ((int *) (a) - 2) -#define stbiw__sbm(a) stbiw__sbraw(a)[0] -#define stbiw__sbn(a) stbiw__sbraw(a)[1] - -#define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) -#define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) -#define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) - -#define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) -#define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) -#define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) - -static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) -{ - int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; - void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, itemsize * m + sizeof(int)*2); - STBIW_ASSERT(p); - if (p) { - if (!*arr) ((int *) p)[1] = 0; - *arr = (void *) ((int *) p + 2); - stbiw__sbm(*arr) = m; - } - return *arr; -} - -static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) -{ - while (*bitcount >= 8) { - stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); - *bitbuffer >>= 8; - *bitcount -= 8; - } - return data; -} - -static int stbiw__zlib_bitrev(int code, int codebits) -{ - int res=0; - while (codebits--) { - res = (res << 1) | (code & 1); - code >>= 1; - } - return res; -} - -static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) -{ - int i; - for (i=0; i < limit && i < 258; ++i) - if (a[i] != b[i]) break; - return i; -} - -static unsigned int stbiw__zhash(unsigned char *data) -{ - stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; - return hash; -} - -#define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) -#define stbiw__zlib_add(code,codebits) \ - (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) -#define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) -// default huffman tables -#define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) -#define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) -#define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) -#define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) -#define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) -#define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) - -#define stbiw__ZHASH 16384 - -unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) -{ - static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; - static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; - static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; - static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; - unsigned int bitbuf=0; - int i,j, bitcount=0; - unsigned char *out = NULL; - unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(char**)); - if (quality < 5) quality = 5; - - stbiw__sbpush(out, 0x78); // DEFLATE 32K window - stbiw__sbpush(out, 0x5e); // FLEVEL = 1 - stbiw__zlib_add(1,1); // BFINAL = 1 - stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman - - for (i=0; i < stbiw__ZHASH; ++i) - hash_table[i] = NULL; - - i=0; - while (i < data_len-3) { - // hash next 3 bytes of data to be compressed - int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; - unsigned char *bestloc = 0; - unsigned char **hlist = hash_table[h]; - int n = stbiw__sbcount(hlist); - for (j=0; j < n; ++j) { - if (hlist[j]-data > i-32768) { // if entry lies within window - int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); - if (d >= best) best=d,bestloc=hlist[j]; - } - } - // when hash table entry is too long, delete half the entries - if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { - STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); - stbiw__sbn(hash_table[h]) = quality; - } - stbiw__sbpush(hash_table[h],data+i); - - if (bestloc) { - // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal - h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); - hlist = hash_table[h]; - n = stbiw__sbcount(hlist); - for (j=0; j < n; ++j) { - if (hlist[j]-data > i-32767) { - int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); - if (e > best) { // if next match is better, bail on current match - bestloc = NULL; - break; - } - } - } - } - - if (bestloc) { - int d = (int) (data+i - bestloc); // distance back - STBIW_ASSERT(d <= 32767 && best <= 258); - for (j=0; best > lengthc[j+1]-1; ++j); - stbiw__zlib_huff(j+257); - if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); - for (j=0; d > distc[j+1]-1; ++j); - stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); - if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); - i += best; - } else { - stbiw__zlib_huffb(data[i]); - ++i; - } - } - // write out final bytes - for (;i < data_len; ++i) - stbiw__zlib_huffb(data[i]); - stbiw__zlib_huff(256); // end of block - // pad with 0 bits to byte boundary - while (bitcount) - stbiw__zlib_add(0,1); - - for (i=0; i < stbiw__ZHASH; ++i) - (void) stbiw__sbfree(hash_table[i]); - STBIW_FREE(hash_table); - - { - // compute adler32 on input - unsigned int s1=1, s2=0; - int blocklen = (int) (data_len % 5552); - j=0; - while (j < data_len) { - for (i=0; i < blocklen; ++i) s1 += data[j+i], s2 += s1; - s1 %= 65521, s2 %= 65521; - j += blocklen; - blocklen = 5552; - } - stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); - stbiw__sbpush(out, STBIW_UCHAR(s2)); - stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); - stbiw__sbpush(out, STBIW_UCHAR(s1)); - } - *out_len = stbiw__sbn(out); - // make returned pointer freeable - STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); - return (unsigned char *) stbiw__sbraw(out); -} - -static unsigned int stbiw__crc32(unsigned char *buffer, int len) -{ - static unsigned int crc_table[256] = - { - 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, - 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, - 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, - 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, - 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, - 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, - 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, - 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, - 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, - 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, - 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, - 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, - 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, - 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, - 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, - 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, - 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, - 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, - 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, - 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, - 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, - 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D - }; - - unsigned int crc = ~0u; - int i; - for (i=0; i < len; ++i) - crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; - return ~crc; -} - -#define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) -#define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); -#define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) - -static void stbiw__wpcrc(unsigned char **data, int len) -{ - unsigned int crc = stbiw__crc32(*data - len - 4, len+4); - stbiw__wp32(*data, crc); -} - -static unsigned char stbiw__paeth(int a, int b, int c) -{ - int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); - if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); - if (pb <= pc) return STBIW_UCHAR(b); - return STBIW_UCHAR(c); -} - -// @OPTIMIZE: provide an option that always forces left-predict or paeth predict -unsigned char *stbi_write_png_to_mem(unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) -{ - int ctype[5] = { -1, 0, 4, 2, 6 }; - unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; - unsigned char *out,*o, *filt, *zlib; - signed char *line_buffer; - int i,j,k,p,zlen; - - if (stride_bytes == 0) - stride_bytes = x * n; - - filt = (unsigned char *) STBIW_MALLOC((x*n+1) * y); if (!filt) return 0; - line_buffer = (signed char *) STBIW_MALLOC(x * n); if (!line_buffer) { STBIW_FREE(filt); return 0; } - for (j=0; j < y; ++j) { - static int mapping[] = { 0,1,2,3,4 }; - static int firstmap[] = { 0,1,0,5,6 }; - int *mymap = (j != 0) ? mapping : firstmap; - int best = 0, bestval = 0x7fffffff; - for (p=0; p < 2; ++p) { - for (k= p?best:0; k < 5; ++k) { // @TODO: clarity: rewrite this to go 0..5, and 'continue' the unwanted ones during 2nd pass - int type = mymap[k],est=0; - unsigned char *z = pixels + stride_bytes*j; - for (i=0; i < n; ++i) - switch (type) { - case 0: line_buffer[i] = z[i]; break; - case 1: line_buffer[i] = z[i]; break; - case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break; - case 3: line_buffer[i] = z[i] - (z[i-stride_bytes]>>1); break; - case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-stride_bytes],0)); break; - case 5: line_buffer[i] = z[i]; break; - case 6: line_buffer[i] = z[i]; break; - } - for (i=n; i < x*n; ++i) { - switch (type) { - case 0: line_buffer[i] = z[i]; break; - case 1: line_buffer[i] = z[i] - z[i-n]; break; - case 2: line_buffer[i] = z[i] - z[i-stride_bytes]; break; - case 3: line_buffer[i] = z[i] - ((z[i-n] + z[i-stride_bytes])>>1); break; - case 4: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-stride_bytes], z[i-stride_bytes-n]); break; - case 5: line_buffer[i] = z[i] - (z[i-n]>>1); break; - case 6: line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; - } - } - if (p) break; - for (i=0; i < x*n; ++i) - est += abs((signed char) line_buffer[i]); - if (est < bestval) { bestval = est; best = k; } - } - } - // when we get here, best contains the filter type, and line_buffer contains the data - filt[j*(x*n+1)] = (unsigned char) best; - STBIW_MEMMOVE(filt+j*(x*n+1)+1, line_buffer, x*n); - } - STBIW_FREE(line_buffer); - zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, 8); // increase 8 to get smaller but use more memory - STBIW_FREE(filt); - if (!zlib) return 0; - - // each tag requires 12 bytes of overhead - out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); - if (!out) return 0; - *out_len = 8 + 12+13 + 12+zlen + 12; - - o=out; - STBIW_MEMMOVE(o,sig,8); o+= 8; - stbiw__wp32(o, 13); // header length - stbiw__wptag(o, "IHDR"); - stbiw__wp32(o, x); - stbiw__wp32(o, y); - *o++ = 8; - *o++ = STBIW_UCHAR(ctype[n]); - *o++ = 0; - *o++ = 0; - *o++ = 0; - stbiw__wpcrc(&o,13); - - stbiw__wp32(o, zlen); - stbiw__wptag(o, "IDAT"); - STBIW_MEMMOVE(o, zlib, zlen); - o += zlen; - STBIW_FREE(zlib); - stbiw__wpcrc(&o, zlen); - - stbiw__wp32(o,0); - stbiw__wptag(o, "IEND"); - stbiw__wpcrc(&o,0); - - STBIW_ASSERT(o == out + *out_len); - - return out; -} - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) -{ - FILE *f; - int len; - unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); - if (png == NULL) return 0; - f = fopen(filename, "wb"); - if (!f) { STBIW_FREE(png); return 0; } - fwrite(png, 1, len, f); - fclose(f); - STBIW_FREE(png); - return 1; -} -#endif - -STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) -{ - int len; - unsigned char *png = stbi_write_png_to_mem((unsigned char *) data, stride_bytes, x, y, comp, &len); - if (png == NULL) return 0; - func(context, png, len); - STBIW_FREE(png); - return 1; -} - - -/* *************************************************************************** - * - * JPEG writer - * - * This is based on Jon Olick's jo_jpeg.cpp: - * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html - */ - -static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, - 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; - -static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { - int bitBuf = *bitBufP, bitCnt = *bitCntP; - bitCnt += bs[1]; - bitBuf |= bs[0] << (24 - bitCnt); - while(bitCnt >= 8) { - unsigned char c = (bitBuf >> 16) & 255; - stbiw__putc(s, c); - if(c == 255) { - stbiw__putc(s, 0); - } - bitBuf <<= 8; - bitCnt -= 8; - } - *bitBufP = bitBuf; - *bitCntP = bitCnt; -} - -static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { - float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; - float z1, z2, z3, z4, z5, z11, z13; - - float tmp0 = d0 + d7; - float tmp7 = d0 - d7; - float tmp1 = d1 + d6; - float tmp6 = d1 - d6; - float tmp2 = d2 + d5; - float tmp5 = d2 - d5; - float tmp3 = d3 + d4; - float tmp4 = d3 - d4; - - // Even part - float tmp10 = tmp0 + tmp3; // phase 2 - float tmp13 = tmp0 - tmp3; - float tmp11 = tmp1 + tmp2; - float tmp12 = tmp1 - tmp2; - - d0 = tmp10 + tmp11; // phase 3 - d4 = tmp10 - tmp11; - - z1 = (tmp12 + tmp13) * 0.707106781f; // c4 - d2 = tmp13 + z1; // phase 5 - d6 = tmp13 - z1; - - // Odd part - tmp10 = tmp4 + tmp5; // phase 2 - tmp11 = tmp5 + tmp6; - tmp12 = tmp6 + tmp7; - - // The rotator is modified from fig 4-8 to avoid extra negations. - z5 = (tmp10 - tmp12) * 0.382683433f; // c6 - z2 = tmp10 * 0.541196100f + z5; // c2-c6 - z4 = tmp12 * 1.306562965f + z5; // c2+c6 - z3 = tmp11 * 0.707106781f; // c4 - - z11 = tmp7 + z3; // phase 5 - z13 = tmp7 - z3; - - *d5p = z13 + z2; // phase 6 - *d3p = z13 - z2; - *d1p = z11 + z4; - *d7p = z11 - z4; - - *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; -} - -static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { - int tmp1 = val < 0 ? -val : val; - val = val < 0 ? val-1 : val; - bits[1] = 1; - while(tmp1 >>= 1) { - ++bits[1]; - } - bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { - } - // end0pos = first element in reverse order !=0 - if(end0pos == 0) { - stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); - return DU[0]; - } - for(i = 1; i <= end0pos; ++i) { - int startpos = i; - int nrzeroes; - unsigned short bits[2]; - for (; DU[i]==0 && i<=end0pos; ++i) { - } - nrzeroes = i-startpos; - if ( nrzeroes >= 16 ) { - int lng = nrzeroes>>4; - int nrmarker; - for (nrmarker=1; nrmarker <= lng; ++nrmarker) - stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); - nrzeroes &= 15; - } - stbiw__jpg_calcBits(DU[i], bits); - stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); - stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); - } - if(end0pos != 63) { - stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); - } - return DU[0]; -} - -static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { - // Constants that don't pollute global namespace - static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; - static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; - static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; - static const unsigned char std_ac_luminance_values[] = { - 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, - 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, - 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, - 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, - 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, - 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, - 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa - }; - static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; - static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; - static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; - static const unsigned char std_ac_chrominance_values[] = { - 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, - 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, - 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, - 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, - 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, - 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, - 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa - }; - // Huffman tables - static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; - static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; - static const unsigned short YAC_HT[256][2] = { - {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} - }; - static const unsigned short UVAC_HT[256][2] = { - {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, - {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, - {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} - }; - static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, - 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; - static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, - 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; - static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, - 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; - - int row, col, i, k; - float fdtbl_Y[64], fdtbl_UV[64]; - unsigned char YTable[64], UVTable[64]; - - if(!data || !width || !height || comp > 4 || comp < 1) { - return 0; - } - - quality = quality ? quality : 90; - quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; - quality = quality < 50 ? 5000 / quality : 200 - quality * 2; - - for(i = 0; i < 64; ++i) { - int uvti, yti = (YQT[i]*quality+50)/100; - YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); - uvti = (UVQT[i]*quality+50)/100; - UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); - } - - for(row = 0, k = 0; row < 8; ++row) { - for(col = 0; col < 8; ++col, ++k) { - fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); - fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); - } - } - - // Write Headers - { - static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; - static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; - const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), - 3,1,0x11,0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; - s->func(s->context, (void*)head0, sizeof(head0)); - s->func(s->context, (void*)YTable, sizeof(YTable)); - stbiw__putc(s, 1); - s->func(s->context, UVTable, sizeof(UVTable)); - s->func(s->context, (void*)head1, sizeof(head1)); - s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); - s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); - stbiw__putc(s, 0x10); // HTYACinfo - s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); - s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); - stbiw__putc(s, 1); // HTUDCinfo - s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); - s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); - stbiw__putc(s, 0x11); // HTUACinfo - s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); - s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); - s->func(s->context, (void*)head2, sizeof(head2)); - } - - // Encode 8x8 macroblocks - { - static const unsigned short fillBits[] = {0x7F, 7}; - const unsigned char *imageData = (const unsigned char *)data; - int DCY=0, DCU=0, DCV=0; - int bitBuf=0, bitCnt=0; - // comp == 2 is grey+alpha (alpha is ignored) - int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; - int x, y, pos; - for(y = 0; y < height; y += 8) { - for(x = 0; x < width; x += 8) { - float YDU[64], UDU[64], VDU[64]; - for(row = y, pos = 0; row < y+8; ++row) { - for(col = x; col < x+8; ++col, ++pos) { - int p = row*width*comp + col*comp; - float r, g, b; - if(row >= height) { - p -= width*comp*(row+1 - height); - } - if(col >= width) { - p -= comp*(col+1 - width); - } - - r = imageData[p+0]; - g = imageData[p+ofsG]; - b = imageData[p+ofsB]; - YDU[pos]=+0.29900f*r+0.58700f*g+0.11400f*b-128; - UDU[pos]=-0.16874f*r-0.33126f*g+0.50000f*b; - VDU[pos]=+0.50000f*r-0.41869f*g-0.08131f*b; - } - } - - DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, YDU, fdtbl_Y, DCY, YDC_HT, YAC_HT); - DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, UDU, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); - DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, VDU, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); - } - } - - // Do the bit alignment of the EOI marker - stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); - } - - // EOI - stbiw__putc(s, 0xFF); - stbiw__putc(s, 0xD9); - - return 1; -} - -STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) -{ - stbi__write_context s; - stbi__start_write_callbacks(&s, func, context); - return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); -} - - -#ifndef STBI_WRITE_NO_STDIO -STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) -{ - stbi__write_context s; - if (stbi__start_write_file(&s,filename)) { - int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); - stbi__end_write_file(&s); - return r; - } else - return 0; -} -#endif - -#endif // STB_IMAGE_WRITE_IMPLEMENTATION - -/* Revision history - 1.07 (2017-07-24) - doc fix - 1.06 (2017-07-23) - writing JPEG (using Jon Olick's code) - 1.05 ??? - 1.04 (2017-03-03) - monochrome BMP expansion - 1.03 ??? - 1.02 (2016-04-02) - avoid allocating large structures on the stack - 1.01 (2016-01-16) - STBIW_REALLOC_SIZED: support allocators with no realloc support - avoid race-condition in crc initialization - minor compile issues - 1.00 (2015-09-14) - installable file IO function - 0.99 (2015-09-13) - warning fixes; TGA rle support - 0.98 (2015-04-08) - added STBIW_MALLOC, STBIW_ASSERT etc - 0.97 (2015-01-18) - fixed HDR asserts, rewrote HDR rle logic - 0.96 (2015-01-17) - add HDR output - fix monochrome BMP - 0.95 (2014-08-17) - add monochrome TGA output - 0.94 (2014-05-31) - rename private functions to avoid conflicts with stb_image.h - 0.93 (2014-05-27) - warning fixes - 0.92 (2010-08-01) - casts to unsigned char to fix warnings - 0.91 (2010-07-17) - first public release - 0.90 first internal release -*/ - -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License -Copyright (c) 2017 Sean Barrett -Permission is hereby granted, free of charge, to any person obtaining a copy of -this software and associated documentation files (the "Software"), to deal in -the Software without restriction, including without limitation the rights to -use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies -of the Software, and to permit persons to whom the Software is furnished to do -so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/ diff --git a/src/Detector/darknet/CMakeLists.txt b/src/Detector/darknet/CMakeLists.txt deleted file mode 100644 index 636725c76..000000000 --- a/src/Detector/darknet/CMakeLists.txt +++ /dev/null @@ -1,106 +0,0 @@ -cmake_minimum_required (VERSION 3.10) - -set(libname "yolo_lib") -project(${libname}) - -#cuda -find_package(CUDA REQUIRED) - -if (CMAKE_COMPILER_IS_GNUCXX) - set(CUDA_PROPAGATE_HOST_FLAGS OFF) - set(CUDA_HOST_COMPILATION_CPP ON) - set(CUDA_NVCC_FLAGS -std=c++11 -g -Xcompiler -fexceptions -Xcompiler -fPIC) - set(CUDA_SEPARABLE_COMPILATION ON) -elseif(MSVC) -# set(CUDA_PROPAGATE_HOST_FLAGS OFF) - set(CUDA_HOST_COMPILATION_CPP ON) -# set(CUDA_NVCC_FLAGS -std=c++11 -g -Xcompiler -fexceptions -Xcompiler -fPIC) - set(CUDA_SEPARABLE_COMPILATION ON) -else() -# set(CUDA_PROPAGATE_HOST_FLAGS OFF) - set(CUDA_HOST_COMPILATION_CPP ON) - set(CUDA_NVCC_FLAGS -std=c++11 -g -Xcompiler -fexceptions -Xcompiler -fPIC) - set(CUDA_SEPARABLE_COMPILATION ON) -endif() - -set(CUDA_WARNING "cross-execution-space-call") -# new flags introduced in CUDA 9 set(CUDA_WARNING "reorder,cross-execution- -# space-call,deprecated-declarations") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror ${CUDA_WARNING} -restrict") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_61,code=sm_61") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_52,code=sm_52") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_35,code=sm_35") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_30,code=sm_30") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=sm_75") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=compute_75") - -SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -find_package(CUDNN REQUIRED) - -# target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_14) - -# target_compile_definitions(${PROJECT_NAME} PUBLIC DKGPU) - -include_directories(${OpenCV_INCLUDE_DIRS}) -include_directories(${CUDA_INCLUDE_DIRS}) -include_directories(${CUDNN_INCLUDE_DIR}) - -include_directories(${PROJECT_SOURCE_DIR}/include) -include_directories(${PROJECT_SOURCE_DIR}/3rdparty/stb/include) - -if (MSVC) - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(BIT_SYSTEM x32) - else() - set(BIT_SYSTEM x64) - endif() - - include_directories(${PROJECT_SOURCE_DIR}/3rdparty/include) - link_directories(${PROJECT_SOURCE_DIR}/3rdparty/lib/${BIT_SYSTEM}) - - add_definitions(-DHAVE_STRUCT_TIMESPEC) - add_definitions(-DWIN32) - set(LIB_PTHREAD pthreadVC2) -else() - set(LIB_PTHREAD pthread) -endif() - -file(GLOB DARKNET_SOURCE_FILES src/*.c) -set(DARKNET_SOURCE_FILES ${DARKNET_SOURCE_FILES} src/yolo_v2_class.cpp src/http_stream.cpp src/image_opencv.cpp) - -file(GLOB DARKNET_HEADER_FILES src/*.h) -set(DARKNET_HEADER_FILES ${DARKNET_HEADER_FILES} include/darknet.h include/yolo_v2_class.hpp) - -file(GLOB DARKNET_CUDA_FILES src/*.cu) - -add_definitions(-DYOLO_DETECTOR_EXPORTS) -add_definitions(-DLIB_EXPORTS) -add_definitions(-DGPU) -add_definitions(-DCUDNN) -# add_definitions(-DOPENCV) - -#if(NOT CMAKE_DEBUG_POSTFIX) -# set(CMAKE_DEBUG_POSTFIX d) -#endif() - -cuda_add_library(${libname} SHARED - ${DARKNET_CUDA_FILES} - ${DARKNET_SOURCE_FILES} - ${DARKNET_HEADER_FILES} -) - -set(YOLO_LIBS - ${OpenCV_LIBS} - ${CUDA_LIBRARIES} - ${CUDA_CUDART_LIBRARY} - ${CUDA_CUDA_LIBRARY} - ${CUDA_cublas_LIBRARY} - ${CUDA_curand_LIBRARY} - ${CUDNN_LIBRARY} - ${LIB_PTHREAD} -) - -# message("${YOLO_LIBS}") - -target_link_libraries(${libname} ${YOLO_LIBS}) diff --git a/src/Detector/darknet/include/darknet.h b/src/Detector/darknet/include/darknet.h deleted file mode 100644 index 040d9e359..000000000 --- a/src/Detector/darknet/include/darknet.h +++ /dev/null @@ -1,1111 +0,0 @@ -#ifndef DARKNET_API -#define DARKNET_API - -#if defined(_MSC_VER) && _MSC_VER < 1900 -#define inline __inline -#endif - -#if defined(DEBUG) && !defined(_CRTDBG_MAP_ALLOC) -#define _CRTDBG_MAP_ALLOC -#endif - -#include -#include -#include -#include -#include -#include - -#ifndef LIB_API -#ifdef LIB_EXPORTS -#if defined(_MSC_VER) -#define LIB_API __declspec(dllexport) -#else -#define LIB_API __attribute__((visibility("default"))) -#endif -#else -#if defined(_MSC_VER) -#define LIB_API -#else -#define LIB_API -#endif -#endif -#endif - -#define SECRET_NUM -1234 - -typedef enum { UNUSED_DEF_VAL } UNUSED_ENUM_TYPE; - -#ifdef GPU - -#include -#include -#include - -#ifdef CUDNN -#include -#endif // CUDNN -#endif // GPU - -#ifdef __cplusplus -extern "C" { -#endif - -struct network; -typedef struct network network; - -struct network_state; -typedef struct network_state network_state; - -struct layer; -typedef struct layer layer; - -struct image; -typedef struct image image; - -struct detection; -typedef struct detection detection; - -struct load_args; -typedef struct load_args load_args; - -struct data; -typedef struct data data; - -struct metadata; -typedef struct metadata metadata; - -struct tree; -typedef struct tree tree; - -extern int gpu_index; - -// option_list.h -typedef struct metadata { - int classes; - char **names; -} metadata; - - -// tree.h -typedef struct tree { - int *leaf; - int n; - int *parent; - int *child; - int *group; - char **name; - - int groups; - int *group_size; - int *group_offset; -} tree; - - -// activations.h -typedef enum { - LOGISTIC, RELU, RELU6, RELIE, LINEAR, RAMP, TANH, PLSE, REVLEAKY, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, GELU, SWISH, MISH, HARD_MISH, NORM_CHAN, NORM_CHAN_SOFTMAX, NORM_CHAN_SOFTMAX_MAXVAL -}ACTIVATION; - -// parser.h -typedef enum { - IOU, GIOU, MSE, DIOU, CIOU -} IOU_LOSS; - -// parser.h -typedef enum { - DEFAULT_NMS, GREEDY_NMS, DIOU_NMS, CORNERS_NMS -} NMS_KIND; - -// parser.h -typedef enum { - YOLO_CENTER = 1 << 0, YOLO_LEFT_TOP = 1 << 1, YOLO_RIGHT_BOTTOM = 1 << 2 -} YOLO_POINT; - -// parser.h -typedef enum { - NO_WEIGHTS, PER_FEATURE, PER_CHANNEL -} WEIGHTS_TYPE_T; - -// parser.h -typedef enum { - NO_NORMALIZATION, RELU_NORMALIZATION, SOFTMAX_NORMALIZATION -} WEIGHTS_NORMALIZATION_T; - -// image.h -typedef enum{ - PNG, BMP, TGA, JPG -} IMTYPE; - -// activations.h -typedef enum{ - MULT, ADD, SUB, DIV -} BINARY_ACTIVATION; - -// blas.h -typedef struct contrastive_params { - float sim; - float exp_sim; - float P; - int i, j; - int time_step_i, time_step_j; -} contrastive_params; - - -// layer.h -typedef enum { - CONVOLUTIONAL, - DECONVOLUTIONAL, - CONNECTED, - MAXPOOL, - LOCAL_AVGPOOL, - SOFTMAX, - DETECTION, - DROPOUT, - CROP, - ROUTE, - COST, - NORMALIZATION, - AVGPOOL, - LOCAL, - SHORTCUT, - SCALE_CHANNELS, - SAM, - ACTIVE, - RNN, - GRU, - LSTM, - CONV_LSTM, - HISTORY, - CRNN, - BATCHNORM, - NETWORK, - XNOR, - REGION, - YOLO, - GAUSSIAN_YOLO, - ISEG, - REORG, - REORG_OLD, - UPSAMPLE, - LOGXENT, - L2NORM, - EMPTY, - BLANK, - CONTRASTIVE -} LAYER_TYPE; - -// layer.h -typedef enum{ - SSE, MASKED, L1, SEG, SMOOTH,WGAN -} COST_TYPE; - -// layer.h -typedef struct update_args { - int batch; - float learning_rate; - float momentum; - float decay; - int adam; - float B1; - float B2; - float eps; - int t; -} update_args; - -// layer.h -struct layer { - LAYER_TYPE type; - ACTIVATION activation; - ACTIVATION lstm_activation; - COST_TYPE cost_type; - void(*forward) (struct layer, struct network_state); - void(*backward) (struct layer, struct network_state); - void(*update) (struct layer, int, float, float, float); - void(*forward_gpu) (struct layer, struct network_state); - void(*backward_gpu) (struct layer, struct network_state); - void(*update_gpu) (struct layer, int, float, float, float, float); - layer *share_layer; - int train; - int avgpool; - int batch_normalize; - int shortcut; - int batch; - int dynamic_minibatch; - int forced; - int flipped; - int inputs; - int outputs; - float mean_alpha; - int nweights; - int nbiases; - int extra; - int truths; - int h, w, c; - int out_h, out_w, out_c; - int n; - int max_boxes; - int truth_size; - int groups; - int group_id; - int size; - int side; - int stride; - int stride_x; - int stride_y; - int dilation; - int antialiasing; - int maxpool_depth; - int maxpool_zero_nonmax; - int out_channels; - float reverse; - int coordconv; - int flatten; - int spatial; - int pad; - int sqrt; - int flip; - int index; - int scale_wh; - int binary; - int xnor; - int peephole; - int use_bin_output; - int keep_delta_gpu; - int optimized_memory; - int steps; - int history_size; - int bottleneck; - float time_normalizer; - int state_constrain; - int hidden; - int truth; - float smooth; - float dot; - int deform; - int grad_centr; - int sway; - int rotate; - int stretch; - int stretch_sway; - float angle; - float jitter; - float resize; - float saturation; - float exposure; - float shift; - float ratio; - float learning_rate_scale; - float clip; - int focal_loss; - float *classes_multipliers; - float label_smooth_eps; - int noloss; - int softmax; - int classes; - int detection; - int embedding_layer_id; - float *embedding_output; - int embedding_size; - float sim_thresh; - int track_history_size; - int dets_for_track; - int dets_for_show; - float track_ciou_norm; - int coords; - int background; - int rescore; - int objectness; - int does_cost; - int joint; - int noadjust; - int reorg; - int log; - int tanh; - int *mask; - int total; - float bflops; - - int adam; - float B1; - float B2; - float eps; - - int t; - - float alpha; - float beta; - float kappa; - - float coord_scale; - float object_scale; - float noobject_scale; - float mask_scale; - float class_scale; - int bias_match; - float random; - float ignore_thresh; - float truth_thresh; - float iou_thresh; - float thresh; - float focus; - int classfix; - int absolute; - int assisted_excitation; - - int onlyforward; - int stopbackward; - int train_only_bn; - int dont_update; - int burnin_update; - int dontload; - int dontsave; - int dontloadscales; - int numload; - - float temperature; - float probability; - float dropblock_size_rel; - int dropblock_size_abs; - int dropblock; - float scale; - - int receptive_w; - int receptive_h; - int receptive_w_scale; - int receptive_h_scale; - - char * cweights; - int * indexes; - int * input_layers; - int * input_sizes; - float **layers_output; - float **layers_delta; - WEIGHTS_TYPE_T weights_type; - WEIGHTS_NORMALIZATION_T weights_normalization; - int * map; - int * counts; - float ** sums; - float * rand; - float * cost; - int *labels; - int *class_ids; - int contrastive_neg_max; - float *cos_sim; - float *exp_cos_sim; - float *p_constrastive; - contrastive_params *contrast_p_gpu; - float * state; - float * prev_state; - float * forgot_state; - float * forgot_delta; - float * state_delta; - float * combine_cpu; - float * combine_delta_cpu; - - float *concat; - float *concat_delta; - - float *binary_weights; - - float *biases; - float *bias_updates; - - float *scales; - float *scale_updates; - - float *weights_ema; - float *biases_ema; - float *scales_ema; - - float *weights; - float *weight_updates; - - float scale_x_y; - int objectness_smooth; - int new_coords; - int show_details; - float max_delta; - float uc_normalizer; - float iou_normalizer; - float obj_normalizer; - float cls_normalizer; - float delta_normalizer; - IOU_LOSS iou_loss; - IOU_LOSS iou_thresh_kind; - NMS_KIND nms_kind; - float beta_nms; - YOLO_POINT yolo_point; - - char *align_bit_weights_gpu; - float *mean_arr_gpu; - float *align_workspace_gpu; - float *transposed_align_workspace_gpu; - int align_workspace_size; - - char *align_bit_weights; - float *mean_arr; - int align_bit_weights_size; - int lda_align; - int new_lda; - int bit_align; - - float *col_image; - float * delta; - float * output; - float * activation_input; - int delta_pinned; - int output_pinned; - float * loss; - float * squared; - float * norms; - - float * spatial_mean; - float * mean; - float * variance; - - float * mean_delta; - float * variance_delta; - - float * rolling_mean; - float * rolling_variance; - - float * x; - float * x_norm; - - float * m; - float * v; - - float * bias_m; - float * bias_v; - float * scale_m; - float * scale_v; - - - float *z_cpu; - float *r_cpu; - float *h_cpu; - float *stored_h_cpu; - float * prev_state_cpu; - - float *temp_cpu; - float *temp2_cpu; - float *temp3_cpu; - - float *dh_cpu; - float *hh_cpu; - float *prev_cell_cpu; - float *cell_cpu; - float *f_cpu; - float *i_cpu; - float *g_cpu; - float *o_cpu; - float *c_cpu; - float *stored_c_cpu; - float *dc_cpu; - - float *binary_input; - uint32_t *bin_re_packed_input; - char *t_bit_input; - - struct layer *input_layer; - struct layer *self_layer; - struct layer *output_layer; - - struct layer *reset_layer; - struct layer *update_layer; - struct layer *state_layer; - - struct layer *input_gate_layer; - struct layer *state_gate_layer; - struct layer *input_save_layer; - struct layer *state_save_layer; - struct layer *input_state_layer; - struct layer *state_state_layer; - - struct layer *input_z_layer; - struct layer *state_z_layer; - - struct layer *input_r_layer; - struct layer *state_r_layer; - - struct layer *input_h_layer; - struct layer *state_h_layer; - - struct layer *wz; - struct layer *uz; - struct layer *wr; - struct layer *ur; - struct layer *wh; - struct layer *uh; - struct layer *uo; - struct layer *wo; - struct layer *vo; - struct layer *uf; - struct layer *wf; - struct layer *vf; - struct layer *ui; - struct layer *wi; - struct layer *vi; - struct layer *ug; - struct layer *wg; - - tree *softmax_tree; - - size_t workspace_size; - -//#ifdef GPU - int *indexes_gpu; - - float *z_gpu; - float *r_gpu; - float *h_gpu; - float *stored_h_gpu; - float *bottelneck_hi_gpu; - float *bottelneck_delta_gpu; - - float *temp_gpu; - float *temp2_gpu; - float *temp3_gpu; - - float *dh_gpu; - float *hh_gpu; - float *prev_cell_gpu; - float *prev_state_gpu; - float *last_prev_state_gpu; - float *last_prev_cell_gpu; - float *cell_gpu; - float *f_gpu; - float *i_gpu; - float *g_gpu; - float *o_gpu; - float *c_gpu; - float *stored_c_gpu; - float *dc_gpu; - - // adam - float *m_gpu; - float *v_gpu; - float *bias_m_gpu; - float *scale_m_gpu; - float *bias_v_gpu; - float *scale_v_gpu; - - float * combine_gpu; - float * combine_delta_gpu; - - float * forgot_state_gpu; - float * forgot_delta_gpu; - float * state_gpu; - float * state_delta_gpu; - float * gate_gpu; - float * gate_delta_gpu; - float * save_gpu; - float * save_delta_gpu; - float * concat_gpu; - float * concat_delta_gpu; - - float *binary_input_gpu; - float *binary_weights_gpu; - float *bin_conv_shortcut_in_gpu; - float *bin_conv_shortcut_out_gpu; - - float * mean_gpu; - float * variance_gpu; - float * m_cbn_avg_gpu; - float * v_cbn_avg_gpu; - - float * rolling_mean_gpu; - float * rolling_variance_gpu; - - float * variance_delta_gpu; - float * mean_delta_gpu; - - float * col_image_gpu; - - float * x_gpu; - float * x_norm_gpu; - float * weights_gpu; - float * weight_updates_gpu; - float * weight_deform_gpu; - float * weight_change_gpu; - - float * weights_gpu16; - float * weight_updates_gpu16; - - float * biases_gpu; - float * bias_updates_gpu; - float * bias_change_gpu; - - float * scales_gpu; - float * scale_updates_gpu; - float * scale_change_gpu; - - float * input_antialiasing_gpu; - float * output_gpu; - float * output_avg_gpu; - float * activation_input_gpu; - float * loss_gpu; - float * delta_gpu; - float * cos_sim_gpu; - float * rand_gpu; - float * drop_blocks_scale; - float * drop_blocks_scale_gpu; - float * squared_gpu; - float * norms_gpu; - - float *gt_gpu; - float *a_avg_gpu; - - int *input_sizes_gpu; - float **layers_output_gpu; - float **layers_delta_gpu; -#ifdef CUDNN - cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; - cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16; - cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; - cudnnTensorDescriptor_t dsrcTensorDesc16, ddstTensorDesc16; - cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16; - cudnnFilterDescriptor_t weightDesc, weightDesc16; - cudnnFilterDescriptor_t dweightDesc, dweightDesc16; - cudnnConvolutionDescriptor_t convDesc; - cudnnConvolutionFwdAlgo_t fw_algo, fw_algo16; - cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; - cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; - cudnnPoolingDescriptor_t poolingDesc; -#else // CUDNN - void* srcTensorDesc, *dstTensorDesc; - void* srcTensorDesc16, *dstTensorDesc16; - void* dsrcTensorDesc, *ddstTensorDesc; - void* dsrcTensorDesc16, *ddstTensorDesc16; - void* normTensorDesc, *normDstTensorDesc, *normDstTensorDescF16; - void* weightDesc, *weightDesc16; - void* dweightDesc, *dweightDesc16; - void* convDesc; - UNUSED_ENUM_TYPE fw_algo, fw_algo16; - UNUSED_ENUM_TYPE bd_algo, bd_algo16; - UNUSED_ENUM_TYPE bf_algo, bf_algo16; - void* poolingDesc; -#endif // CUDNN -//#endif // GPU -}; - - -// network.h -typedef enum { - CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM, SGDR -} learning_rate_policy; - -// network.h -typedef struct network { - int n; - int batch; - uint64_t *seen; - float *badlabels_reject_threshold; - float *delta_rolling_max; - float *delta_rolling_avg; - float *delta_rolling_std; - int weights_reject_freq; - int equidistant_point; - float badlabels_rejection_percentage; - float num_sigmas_reject_badlabels; - float ema_alpha; - int *cur_iteration; - float loss_scale; - int *t; - float epoch; - int subdivisions; - layer *layers; - float *output; - learning_rate_policy policy; - int benchmark_layers; - int *total_bbox; - int *rewritten_bbox; - - float learning_rate; - float learning_rate_min; - float learning_rate_max; - int batches_per_cycle; - int batches_cycle_mult; - float momentum; - float decay; - float gamma; - float scale; - float power; - int time_steps; - int step; - int max_batches; - int num_boxes; - int train_images_num; - float *seq_scales; - float *scales; - int *steps; - int num_steps; - int burn_in; - int cudnn_half; - - int adam; - float B1; - float B2; - float eps; - - int inputs; - int outputs; - int truths; - int notruth; - int h, w, c; - int max_crop; - int min_crop; - float max_ratio; - float min_ratio; - int center; - int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1) - int gaussian_noise; - int blur; - int mixup; - float label_smooth_eps; - int resize_step; - int attention; - int adversarial; - float adversarial_lr; - float max_chart_loss; - int letter_box; - int mosaic_bound; - int contrastive; - int contrastive_jit_flip; - int contrastive_color; - int unsupervised; - float angle; - float aspect; - float exposure; - float saturation; - float hue; - int random; - int track; - int augment_speed; - int sequential_subdivisions; - int init_sequential_subdivisions; - int current_subdivision; - int try_fix_nan; - - int gpu_index; - tree *hierarchy; - - float *input; - float *truth; - float *delta; - float *workspace; - int train; - int index; - float *cost; - float clip; - -//#ifdef GPU - //float *input_gpu; - //float *truth_gpu; - float *delta_gpu; - float *output_gpu; - - float *input_state_gpu; - float *input_pinned_cpu; - int input_pinned_cpu_flag; - - float **input_gpu; - float **truth_gpu; - float **input16_gpu; - float **output16_gpu; - size_t *max_input16_size; - size_t *max_output16_size; - int wait_stream; - - float *global_delta_gpu; - float *state_delta_gpu; - size_t max_delta_gpu_size; -//#endif // GPU - int optimized_memory; - int dynamic_minibatch; - size_t workspace_size_limit; -} network; - -// network.h -typedef struct network_state { - float *truth; - float *input; - float *delta; - float *workspace; - int train; - int index; - network net; -} network_state; - -//typedef struct { -// int w; -// int h; -// float scale; -// float rad; -// float dx; -// float dy; -// float aspect; -//} augment_args; - -// image.h -typedef struct image { - int w; - int h; - int c; - float *data; -} image; - -//typedef struct { -// int w; -// int h; -// int c; -// float *data; -//} image; - -// box.h -typedef struct box { - float x, y, w, h; -} box; - -// box.h -typedef struct boxabs { - float left, right, top, bot; -} boxabs; - -// box.h -typedef struct dxrep { - float dt, db, dl, dr; -} dxrep; - -// box.h -typedef struct ious { - float iou, giou, diou, ciou; - dxrep dx_iou; - dxrep dx_giou; -} ious; - - -// box.h -typedef struct detection{ - box bbox; - int classes; - float *prob; - float *mask; - float objectness; - int sort_class; - float *uc; // Gaussian_YOLOv3 - tx,ty,tw,th uncertainty - int points; // bit-0 - center, bit-1 - top-left-corner, bit-2 - bottom-right-corner - float *embeddings; // embeddings for tracking - int embedding_size; - float sim; - int track_id; -} detection; - -// network.c -batch inference -typedef struct det_num_pair { - int num; - detection *dets; -} det_num_pair, *pdet_num_pair; - -// matrix.h -typedef struct matrix { - int rows, cols; - float **vals; -} matrix; - -// data.h -typedef struct data { - int w, h; - matrix X; - matrix y; - int shallow; - int *num_boxes; - box **boxes; -} data; - -// data.h -typedef enum { - CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA, ISEG_DATA -} data_type; - -// data.h -typedef struct load_args { - int threads; - char **paths; - char *path; - int n; - int m; - char **labels; - int h; - int w; - int c; // color depth - int out_w; - int out_h; - int nh; - int nw; - int num_boxes; - int truth_size; - int min, max, size; - int classes; - int background; - int scale; - int center; - int coords; - int mini_batch; - int track; - int augment_speed; - int letter_box; - int mosaic_bound; - int show_imgs; - int dontuse_opencv; - int contrastive; - int contrastive_jit_flip; - int contrastive_color; - float jitter; - float resize; - int flip; - int gaussian_noise; - int blur; - int mixup; - float label_smooth_eps; - float angle; - float aspect; - float saturation; - float exposure; - float hue; - data *d; - image *im; - image *resized; - data_type type; - tree *hierarchy; -} load_args; - -// data.h -typedef struct box_label { - int id; - int track_id; - float x, y, w, h; - float left, right, top, bottom; -} box_label; - -// list.h -//typedef struct node { -// void *val; -// struct node *next; -// struct node *prev; -//} node; - -// list.h -//typedef struct list { -// int size; -// node *front; -// node *back; -//} list; -// ----------------------------------------------------- - - -// parser.c -LIB_API network *load_network(char *cfg, char *weights, int clear); -LIB_API network *load_network_custom(char *cfg, char *weights, int clear, int batch); -LIB_API network *load_network(char *cfg, char *weights, int clear); -LIB_API void free_network(network net); -LIB_API void free_network_ptr(network* net); - -// network.c -LIB_API load_args get_base_args(network *net); - -// box.h -LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); -LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); -LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1); - -// network.h -LIB_API float *network_predict(network net, float *input); -LIB_API float *network_predict_ptr(network *net, float *input); -LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); -LIB_API det_num_pair* network_predict_batch(network *net, image im, int batch_size, int w, int h, float thresh, float hier, int *map, int relative, int letter); -LIB_API void free_detections(detection *dets, int n); -LIB_API void free_batch_detections(det_num_pair *det_num_pairs, int n); -LIB_API void fuse_conv_batchnorm(network net); -LIB_API void calculate_binary_weights(network net); -LIB_API char *detection_to_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, char *filename); - -LIB_API layer* get_network_layer(network* net, int i); -//LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); -LIB_API detection *make_network_boxes(network *net, float thresh, int *num); -LIB_API void reset_rnn(network *net); -LIB_API float *network_predict_image(network *net, image im); -LIB_API float *network_predict_image_letterbox(network *net, image im); -LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net); -LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path); -LIB_API void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, - float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers); -LIB_API int network_width(network *net); -LIB_API int network_height(network *net); -LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm); - -// image.h -LIB_API void make_image_red(image im); -LIB_API image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c); -LIB_API image resize_image(image im, int w, int h); -LIB_API void quantize_image(image im); -LIB_API void copy_image_from_bytes(image im, char *pdata); -LIB_API image letterbox_image(image im, int w, int h); -LIB_API void rgbgr_image(image im); -LIB_API image make_image(int w, int h, int c); -LIB_API image load_image_color(char *filename, int w, int h); -LIB_API void free_image(image m); -LIB_API image crop_image(image im, int dx, int dy, int w, int h); -LIB_API image resize_min(image im, int min); - -// layer.h -LIB_API void free_layer_custom(layer l, int keep_cudnn_desc); -LIB_API void free_layer(layer l); - -// data.c -LIB_API void free_data(data d); -LIB_API pthread_t load_data(load_args args); -LIB_API void free_load_threads(void *ptr); -LIB_API pthread_t load_data_in_thread(load_args args); -LIB_API void *load_thread(void *ptr); - -// dark_cuda.h -LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); -LIB_API void cuda_pull_array_async(float *x_gpu, float *x, size_t n); -LIB_API void cuda_set_device(int n); -LIB_API void *cuda_get_context(); - -// utils.h -LIB_API void free_ptrs(void **ptrs, int n); -LIB_API void top_k(float *a, int n, int k, int *index); - -// tree.h -LIB_API tree *read_tree(char *filename); - -// option_list.h -LIB_API metadata get_metadata(char *file); - - -// http_stream.h -LIB_API void delete_json_sender(); -LIB_API void send_json_custom(char const* send_buf, int port, int timeout); -LIB_API double get_time_point(); -void start_timer(); -void stop_timer(); -double get_time(); -void stop_timer_and_show(); -void stop_timer_and_show_name(char *name); -void show_total_time(); - -LIB_API void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim_thresh, float track_ciou_norm, int deque_size, int dets_for_track, int dets_for_show); -LIB_API int fill_remaining_id(detection *new_dets, int new_dets_num, int new_track_id, float thresh); - - -// gemm.h -LIB_API void init_cpu(); - -#ifdef __cplusplus -} -#endif // __cplusplus -#endif // DARKNET_API diff --git a/src/Detector/darknet/include/yolo_v2_class.hpp b/src/Detector/darknet/include/yolo_v2_class.hpp deleted file mode 100644 index 5fe3515ec..000000000 --- a/src/Detector/darknet/include/yolo_v2_class.hpp +++ /dev/null @@ -1,1053 +0,0 @@ -#ifndef YOLO_V2_CLASS_HPP -#define YOLO_V2_CLASS_HPP - -#ifndef LIB_API -#ifdef LIB_EXPORTS -#if defined(_MSC_VER) -#define LIB_API __declspec(dllexport) -#else -#define LIB_API __attribute__((visibility("default"))) -#endif -#else -#if defined(_MSC_VER) -#define LIB_API -#else -#define LIB_API -#endif -#endif -#endif - -#define C_SHARP_MAX_OBJECTS 1000 - -struct bbox_t { - unsigned int x, y, w, h; // (x,y) - top-left corner, (w, h) - width & height of bounded box - float prob; // confidence - probability that the object was found correctly - unsigned int obj_id; // class of object - from range [0, classes-1] - unsigned int track_id; // tracking id for video (0 - untracked, 1 - inf - tracked object) - unsigned int frames_counter; // counter of frames on which the object was detected - float x_3d, y_3d, z_3d; // center of object (in Meters) if ZED 3D Camera is used -}; - -struct image_t { - int h; // height - int w; // width - int c; // number of chanels (3 - for RGB) - float *data; // pointer to the image data -}; - -struct bbox_t_container { - bbox_t candidates[C_SHARP_MAX_OBJECTS]; -}; - -#ifdef __cplusplus -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef OPENCV -#include // C++ -#include // C -#include // C -#endif - -extern "C" LIB_API int init(const char *configurationFilename, const char *weightsFilename, int gpu, int batch_size); -extern "C" LIB_API int detect_image(const char *filename, bbox_t_container &container); -extern "C" LIB_API int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container &container); -extern "C" LIB_API int dispose(); -extern "C" LIB_API int get_device_count(); -extern "C" LIB_API int get_device_name(int gpu, char* deviceName); -extern "C" LIB_API bool built_with_cuda(); -extern "C" LIB_API bool built_with_cudnn(); -extern "C" LIB_API bool built_with_opencv(); -extern "C" LIB_API void send_json_custom(char const* send_buf, int port, int timeout); - -class Detector { - std::shared_ptr detector_gpu_ptr; - std::deque> prev_bbox_vec_deque; - std::string _cfg_filename, _weight_filename; -public: - const int cur_gpu_id = 0; - float nms = .4f; - bool wait_stream; - - LIB_API Detector(std::string cfg_filename, std::string weight_filename, int gpu_id, int batch_size); - LIB_API ~Detector(); - - LIB_API std::vector detect(std::string image_filename, float thresh = 0.2, bool use_mean = false); - LIB_API std::vector detect(image_t img, float thresh = 0.2, bool use_mean = false); - LIB_API std::vector> detectBatch(image_t img, int batch_size, int width, int height, float thresh); - static LIB_API image_t load_image(std::string image_filename); - static LIB_API void free_image(image_t m); - LIB_API int get_net_width() const; - LIB_API int get_net_height() const; - LIB_API int get_net_color_depth() const; - - LIB_API std::vector tracking_id(std::vector cur_bbox_vec, bool const change_history = true, - int const frames_story = 5, int const max_dist = 40); - - LIB_API void *get_cuda_context(); - - //LIB_API bool send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, - // std::string filename = std::string(), int timeout = 400000, int port = 8070); - - std::vector detect_resized(image_t img, int init_w, int init_h, float thresh = 0.2, bool use_mean = false) - { - if (img.data == NULL) - throw std::runtime_error("Image is empty"); - auto detection_boxes = detect(img, thresh, use_mean); - float wk = (float)init_w / img.w, hk = (float)init_h / img.h; - for (auto &i : detection_boxes) i.x *= wk, i.w *= wk, i.y *= hk, i.h *= hk; - return detection_boxes; - } - -#ifdef OPENCV - std::vector detect(cv::Mat mat, float thresh = 0.2, bool use_mean = false) - { - if(mat.data == NULL) - throw std::runtime_error("Image is empty"); - auto image_ptr = mat_to_image_resize(mat); - return detect_resized(*image_ptr, mat.cols, mat.rows, thresh, use_mean); - } - - std::shared_ptr mat_to_image_resize(cv::Mat mat) const - { - if (mat.data == NULL) return std::shared_ptr(NULL); - - cv::Size network_size = cv::Size(get_net_width(), get_net_height()); - cv::Mat det_mat; - if (mat.size() != network_size) - cv::resize(mat, det_mat, network_size); - else - det_mat = mat; // only reference is copied - - return mat_to_image(det_mat); - } - - static std::shared_ptr mat_to_image(cv::Mat img_src) - { - cv::Mat img; - if (img_src.channels() == 4) cv::cvtColor(img_src, img, cv::COLOR_RGBA2BGR); - else if (img_src.channels() == 3) cv::cvtColor(img_src, img, cv::COLOR_RGB2BGR); - else if (img_src.channels() == 1) cv::cvtColor(img_src, img, cv::COLOR_GRAY2BGR); - else std::cerr << " Warning: img_src.channels() is not 1, 3 or 4. It is = " << img_src.channels() << std::endl; - std::shared_ptr image_ptr(new image_t, [](image_t *img) { free_image(*img); delete img; }); - *image_ptr = mat_to_image_custom(img); - return image_ptr; - } - -private: - - static image_t mat_to_image_custom(cv::Mat mat) - { - int w = mat.cols; - int h = mat.rows; - int c = mat.channels(); - image_t im = make_image_custom(w, h, c); - unsigned char *data = (unsigned char *)mat.data; - int step = mat.step; - for (int y = 0; y < h; ++y) { - for (int k = 0; k < c; ++k) { - for (int x = 0; x < w; ++x) { - im.data[k*w*h + y*w + x] = data[y*step + x*c + k] / 255.0f; - } - } - } - return im; - } - - static image_t make_empty_image(int w, int h, int c) - { - image_t out; - out.data = 0; - out.h = h; - out.w = w; - out.c = c; - return out; - } - - static image_t make_image_custom(int w, int h, int c) - { - image_t out = make_empty_image(w, h, c); - out.data = (float *)calloc(h*w*c, sizeof(float)); - return out; - } - -#endif // OPENCV - -public: - - bool send_json_http(std::vector cur_bbox_vec, std::vector obj_names, int frame_id, - std::string filename = std::string(), int timeout = 400000, int port = 8070) - { - std::string send_str; - - char *tmp_buf = (char *)calloc(1024, sizeof(char)); - if (!filename.empty()) { - sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"filename\":\"%s\", \n \"objects\": [ \n", frame_id, filename.c_str()); - } - else { - sprintf(tmp_buf, "{\n \"frame_id\":%d, \n \"objects\": [ \n", frame_id); - } - send_str = tmp_buf; - free(tmp_buf); - - for (auto & i : cur_bbox_vec) { - char *buf = (char *)calloc(2048, sizeof(char)); - - sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"absolute_coordinates\":{\"center_x\":%d, \"center_y\":%d, \"width\":%d, \"height\":%d}, \"confidence\":%f", - i.obj_id, obj_names[i.obj_id].c_str(), i.x, i.y, i.w, i.h, i.prob); - - //sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f", - // i.obj_id, obj_names[i.obj_id], i.x, i.y, i.w, i.h, i.prob); - - send_str += buf; - - if (!std::isnan(i.z_3d)) { - sprintf(buf, "\n , \"coordinates_in_meters\":{\"x_3d\":%.2f, \"y_3d\":%.2f, \"z_3d\":%.2f}", - i.x_3d, i.y_3d, i.z_3d); - send_str += buf; - } - - send_str += "}\n"; - - free(buf); - } - - //send_str += "\n ] \n}, \n"; - send_str += "\n ] \n}"; - - send_json_custom(send_str.c_str(), port, timeout); - return true; - } -}; -// -------------------------------------------------------------------------------- - - -#if defined(TRACK_OPTFLOW) && defined(OPENCV) && defined(GPU) - -#include -#include -#include -#include - -class Tracker_optflow { -public: - const int gpu_count; - const int gpu_id; - const int flow_error; - - - Tracker_optflow(int _gpu_id = 0, int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : - gpu_count(cv::cuda::getCudaEnabledDeviceCount()), gpu_id(std::min(_gpu_id, gpu_count-1)), - flow_error((_flow_error > 0)? _flow_error:(win_size*4)) - { - int const old_gpu_id = cv::cuda::getDevice(); - cv::cuda::setDevice(gpu_id); - - stream = cv::cuda::Stream(); - - sync_PyrLKOpticalFlow_gpu = cv::cuda::SparsePyrLKOpticalFlow::create(); - sync_PyrLKOpticalFlow_gpu->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 - sync_PyrLKOpticalFlow_gpu->setMaxLevel(max_level); // +- 3 pt - sync_PyrLKOpticalFlow_gpu->setNumIters(iterations); // 2000, def: 30 - - cv::cuda::setDevice(old_gpu_id); - } - - // just to avoid extra allocations - cv::cuda::GpuMat src_mat_gpu; - cv::cuda::GpuMat dst_mat_gpu, dst_grey_gpu; - cv::cuda::GpuMat prev_pts_flow_gpu, cur_pts_flow_gpu; - cv::cuda::GpuMat status_gpu, err_gpu; - - cv::cuda::GpuMat src_grey_gpu; // used in both functions - cv::Ptr sync_PyrLKOpticalFlow_gpu; - cv::cuda::Stream stream; - - std::vector cur_bbox_vec; - std::vector good_bbox_vec_flags; - cv::Mat prev_pts_flow_cpu; - - void update_cur_bbox_vec(std::vector _cur_bbox_vec) - { - cur_bbox_vec = _cur_bbox_vec; - good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); - cv::Mat prev_pts, cur_pts_flow_cpu; - - for (auto &i : cur_bbox_vec) { - float x_center = (i.x + i.w / 2.0F); - float y_center = (i.y + i.h / 2.0F); - prev_pts.push_back(cv::Point2f(x_center, y_center)); - } - - if (prev_pts.rows == 0) - prev_pts_flow_cpu = cv::Mat(); - else - cv::transpose(prev_pts, prev_pts_flow_cpu); - - if (prev_pts_flow_gpu.cols < prev_pts_flow_cpu.cols) { - prev_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); - cur_pts_flow_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), prev_pts_flow_cpu.type()); - - status_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_8UC1); - err_gpu = cv::cuda::GpuMat(prev_pts_flow_cpu.size(), CV_32FC1); - } - - prev_pts_flow_gpu.upload(cv::Mat(prev_pts_flow_cpu), stream); - } - - - void update_tracking_flow(cv::Mat src_mat, std::vector _cur_bbox_vec) - { - int const old_gpu_id = cv::cuda::getDevice(); - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(gpu_id); - - if (src_mat.channels() == 1 || src_mat.channels() == 3 || src_mat.channels() == 4) { - if (src_mat_gpu.cols == 0) { - src_mat_gpu = cv::cuda::GpuMat(src_mat.size(), src_mat.type()); - src_grey_gpu = cv::cuda::GpuMat(src_mat.size(), CV_8UC1); - } - - if (src_mat.channels() == 1) { - src_mat_gpu.upload(src_mat, stream); - src_mat_gpu.copyTo(src_grey_gpu); - } - else if (src_mat.channels() == 3) { - src_mat_gpu.upload(src_mat, stream); - cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGR2GRAY, 1, stream); - } - else if (src_mat.channels() == 4) { - src_mat_gpu.upload(src_mat, stream); - cv::cuda::cvtColor(src_mat_gpu, src_grey_gpu, CV_BGRA2GRAY, 1, stream); - } - else { - std::cerr << " Warning: src_mat.channels() is not: 1, 3 or 4. It is = " << src_mat.channels() << " \n"; - return; - } - - } - update_cur_bbox_vec(_cur_bbox_vec); - - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(old_gpu_id); - } - - - std::vector tracking_flow(cv::Mat dst_mat, bool check_error = true) - { - if (sync_PyrLKOpticalFlow_gpu.empty()) { - std::cout << "sync_PyrLKOpticalFlow_gpu isn't initialized \n"; - return cur_bbox_vec; - } - - int const old_gpu_id = cv::cuda::getDevice(); - if(old_gpu_id != gpu_id) - cv::cuda::setDevice(gpu_id); - - if (dst_mat_gpu.cols == 0) { - dst_mat_gpu = cv::cuda::GpuMat(dst_mat.size(), dst_mat.type()); - dst_grey_gpu = cv::cuda::GpuMat(dst_mat.size(), CV_8UC1); - } - - //dst_grey_gpu.upload(dst_mat, stream); // use BGR - dst_mat_gpu.upload(dst_mat, stream); - cv::cuda::cvtColor(dst_mat_gpu, dst_grey_gpu, CV_BGR2GRAY, 1, stream); - - if (src_grey_gpu.rows != dst_grey_gpu.rows || src_grey_gpu.cols != dst_grey_gpu.cols) { - stream.waitForCompletion(); - src_grey_gpu = dst_grey_gpu.clone(); - cv::cuda::setDevice(old_gpu_id); - return cur_bbox_vec; - } - - ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x - sync_PyrLKOpticalFlow_gpu->calc(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, err_gpu, stream); // OpenCV 3.x - - cv::Mat cur_pts_flow_cpu; - cur_pts_flow_gpu.download(cur_pts_flow_cpu, stream); - - dst_grey_gpu.copyTo(src_grey_gpu, stream); - - cv::Mat err_cpu, status_cpu; - err_gpu.download(err_cpu, stream); - status_gpu.download(status_cpu, stream); - - stream.waitForCompletion(); - - std::vector result_bbox_vec; - - if (err_cpu.cols == cur_bbox_vec.size() && status_cpu.cols == cur_bbox_vec.size()) - { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - { - cv::Point2f cur_key_pt = cur_pts_flow_cpu.at(0, i); - cv::Point2f prev_key_pt = prev_pts_flow_cpu.at(0, i); - - float moved_x = cur_key_pt.x - prev_key_pt.x; - float moved_y = cur_key_pt.y - prev_key_pt.y; - - if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) - if (err_cpu.at(0, i) < flow_error && status_cpu.at(0, i) != 0 && - ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) - { - cur_bbox_vec[i].x += moved_x + 0.5; - cur_bbox_vec[i].y += moved_y + 0.5; - result_bbox_vec.push_back(cur_bbox_vec[i]); - } - else good_bbox_vec_flags[i] = false; - else good_bbox_vec_flags[i] = false; - - //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); - } - } - - cur_pts_flow_gpu.swap(prev_pts_flow_gpu); - cur_pts_flow_cpu.copyTo(prev_pts_flow_cpu); - - if (old_gpu_id != gpu_id) - cv::cuda::setDevice(old_gpu_id); - - return result_bbox_vec; - } - -}; - -#elif defined(TRACK_OPTFLOW) && defined(OPENCV) - -//#include -#include - -class Tracker_optflow { -public: - const int flow_error; - - - Tracker_optflow(int win_size = 15, int max_level = 3, int iterations = 8000, int _flow_error = -1) : - flow_error((_flow_error > 0)? _flow_error:(win_size*4)) - { - sync_PyrLKOpticalFlow = cv::SparsePyrLKOpticalFlow::create(); - sync_PyrLKOpticalFlow->setWinSize(cv::Size(win_size, win_size)); // 9, 15, 21, 31 - sync_PyrLKOpticalFlow->setMaxLevel(max_level); // +- 3 pt - - } - - // just to avoid extra allocations - cv::Mat dst_grey; - cv::Mat prev_pts_flow, cur_pts_flow; - cv::Mat status, err; - - cv::Mat src_grey; // used in both functions - cv::Ptr sync_PyrLKOpticalFlow; - - std::vector cur_bbox_vec; - std::vector good_bbox_vec_flags; - - void update_cur_bbox_vec(std::vector _cur_bbox_vec) - { - cur_bbox_vec = _cur_bbox_vec; - good_bbox_vec_flags = std::vector(cur_bbox_vec.size(), true); - cv::Mat prev_pts, cur_pts_flow; - - for (auto &i : cur_bbox_vec) { - float x_center = (i.x + i.w / 2.0F); - float y_center = (i.y + i.h / 2.0F); - prev_pts.push_back(cv::Point2f(x_center, y_center)); - } - - if (prev_pts.rows == 0) - prev_pts_flow = cv::Mat(); - else - cv::transpose(prev_pts, prev_pts_flow); - } - - - void update_tracking_flow(cv::Mat new_src_mat, std::vector _cur_bbox_vec) - { - if (new_src_mat.channels() == 1) { - src_grey = new_src_mat.clone(); - } - else if (new_src_mat.channels() == 3) { - cv::cvtColor(new_src_mat, src_grey, CV_BGR2GRAY, 1); - } - else if (new_src_mat.channels() == 4) { - cv::cvtColor(new_src_mat, src_grey, CV_BGRA2GRAY, 1); - } - else { - std::cerr << " Warning: new_src_mat.channels() is not: 1, 3 or 4. It is = " << new_src_mat.channels() << " \n"; - return; - } - update_cur_bbox_vec(_cur_bbox_vec); - } - - - std::vector tracking_flow(cv::Mat new_dst_mat, bool check_error = true) - { - if (sync_PyrLKOpticalFlow.empty()) { - std::cout << "sync_PyrLKOpticalFlow isn't initialized \n"; - return cur_bbox_vec; - } - - cv::cvtColor(new_dst_mat, dst_grey, CV_BGR2GRAY, 1); - - if (src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols) { - src_grey = dst_grey.clone(); - //std::cerr << " Warning: src_grey.rows != dst_grey.rows || src_grey.cols != dst_grey.cols \n"; - return cur_bbox_vec; - } - - if (prev_pts_flow.cols < 1) { - return cur_bbox_vec; - } - - ////sync_PyrLKOpticalFlow_gpu.sparse(src_grey_gpu, dst_grey_gpu, prev_pts_flow_gpu, cur_pts_flow_gpu, status_gpu, &err_gpu); // OpenCV 2.4.x - sync_PyrLKOpticalFlow->calc(src_grey, dst_grey, prev_pts_flow, cur_pts_flow, status, err); // OpenCV 3.x - - dst_grey.copyTo(src_grey); - - std::vector result_bbox_vec; - - if (err.rows == cur_bbox_vec.size() && status.rows == cur_bbox_vec.size()) - { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - { - cv::Point2f cur_key_pt = cur_pts_flow.at(0, i); - cv::Point2f prev_key_pt = prev_pts_flow.at(0, i); - - float moved_x = cur_key_pt.x - prev_key_pt.x; - float moved_y = cur_key_pt.y - prev_key_pt.y; - - if (abs(moved_x) < 100 && abs(moved_y) < 100 && good_bbox_vec_flags[i]) - if (err.at(0, i) < flow_error && status.at(0, i) != 0 && - ((float)cur_bbox_vec[i].x + moved_x) > 0 && ((float)cur_bbox_vec[i].y + moved_y) > 0) - { - cur_bbox_vec[i].x += moved_x + 0.5; - cur_bbox_vec[i].y += moved_y + 0.5; - result_bbox_vec.push_back(cur_bbox_vec[i]); - } - else good_bbox_vec_flags[i] = false; - else good_bbox_vec_flags[i] = false; - - //if(!check_error && !good_bbox_vec_flags[i]) result_bbox_vec.push_back(cur_bbox_vec[i]); - } - } - - prev_pts_flow = cur_pts_flow.clone(); - - return result_bbox_vec; - } - -}; -#else - -class Tracker_optflow {}; - -#endif // defined(TRACK_OPTFLOW) && defined(OPENCV) - - -#ifdef OPENCV - -static cv::Scalar obj_id_to_color(int obj_id) { - int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; - int const offset = obj_id * 123457 % 6; - int const color_scale = 150 + (obj_id * 123457) % 100; - cv::Scalar color(colors[offset][0], colors[offset][1], colors[offset][2]); - color *= color_scale; - return color; -} - -class preview_boxes_t { - enum { frames_history = 30 }; // how long to keep the history saved - - struct preview_box_track_t { - unsigned int track_id, obj_id, last_showed_frames_ago; - bool current_detection; - bbox_t bbox; - cv::Mat mat_obj, mat_resized_obj; - preview_box_track_t() : track_id(0), obj_id(0), last_showed_frames_ago(frames_history), current_detection(false) {} - }; - std::vector preview_box_track_id; - size_t const preview_box_size, bottom_offset; - bool const one_off_detections; -public: - preview_boxes_t(size_t _preview_box_size = 100, size_t _bottom_offset = 100, bool _one_off_detections = false) : - preview_box_size(_preview_box_size), bottom_offset(_bottom_offset), one_off_detections(_one_off_detections) - {} - - void set(cv::Mat src_mat, std::vector result_vec) - { - size_t const count_preview_boxes = src_mat.cols / preview_box_size; - if (preview_box_track_id.size() != count_preview_boxes) preview_box_track_id.resize(count_preview_boxes); - - // increment frames history - for (auto &i : preview_box_track_id) - i.last_showed_frames_ago = std::min((unsigned)frames_history, i.last_showed_frames_ago + 1); - - // occupy empty boxes - for (auto &k : result_vec) { - bool found = false; - // find the same (track_id) - for (auto &i : preview_box_track_id) { - if (i.track_id == k.track_id) { - if (!one_off_detections) i.last_showed_frames_ago = 0; // for tracked objects - found = true; - break; - } - } - if (!found) { - // find empty box - for (auto &i : preview_box_track_id) { - if (i.last_showed_frames_ago == frames_history) { - if (!one_off_detections && k.frames_counter == 0) break; // don't show if obj isn't tracked yet - i.track_id = k.track_id; - i.obj_id = k.obj_id; - i.bbox = k; - i.last_showed_frames_ago = 0; - break; - } - } - } - } - - // draw preview box (from old or current frame) - for (size_t i = 0; i < preview_box_track_id.size(); ++i) - { - // get object image - cv::Mat dst = preview_box_track_id[i].mat_resized_obj; - preview_box_track_id[i].current_detection = false; - - for (auto &k : result_vec) { - if (preview_box_track_id[i].track_id == k.track_id) { - if (one_off_detections && preview_box_track_id[i].last_showed_frames_ago > 0) { - preview_box_track_id[i].last_showed_frames_ago = frames_history; break; - } - bbox_t b = k; - cv::Rect r(b.x, b.y, b.w, b.h); - cv::Rect img_rect(cv::Point2i(0, 0), src_mat.size()); - cv::Rect rect_roi = r & img_rect; - if (rect_roi.width > 1 || rect_roi.height > 1) { - cv::Mat roi = src_mat(rect_roi); - cv::resize(roi, dst, cv::Size(preview_box_size, preview_box_size), cv::INTER_NEAREST); - preview_box_track_id[i].mat_obj = roi.clone(); - preview_box_track_id[i].mat_resized_obj = dst.clone(); - preview_box_track_id[i].current_detection = true; - preview_box_track_id[i].bbox = k; - } - break; - } - } - } - } - - - void draw(cv::Mat draw_mat, bool show_small_boxes = false) - { - // draw preview box (from old or current frame) - for (size_t i = 0; i < preview_box_track_id.size(); ++i) - { - auto &prev_box = preview_box_track_id[i]; - - // draw object image - cv::Mat dst = prev_box.mat_resized_obj; - if (prev_box.last_showed_frames_ago < frames_history && - dst.size() == cv::Size(preview_box_size, preview_box_size)) - { - cv::Rect dst_rect_roi(cv::Point2i(i * preview_box_size, draw_mat.rows - bottom_offset), dst.size()); - cv::Mat dst_roi = draw_mat(dst_rect_roi); - dst.copyTo(dst_roi); - - cv::Scalar color = obj_id_to_color(prev_box.obj_id); - int thickness = (prev_box.current_detection) ? 5 : 1; - cv::rectangle(draw_mat, dst_rect_roi, color, thickness); - - unsigned int const track_id = prev_box.track_id; - std::string track_id_str = (track_id > 0) ? std::to_string(track_id) : ""; - putText(draw_mat, track_id_str, dst_rect_roi.tl() - cv::Point2i(-4, 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.9, cv::Scalar(0, 0, 0), 2); - - std::string size_str = std::to_string(prev_box.bbox.w) + "x" + std::to_string(prev_box.bbox.h); - putText(draw_mat, size_str, dst_rect_roi.tl() + cv::Point2i(0, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); - - if (!one_off_detections && prev_box.current_detection) { - cv::line(draw_mat, dst_rect_roi.tl() + cv::Point2i(preview_box_size, 0), - cv::Point2i(prev_box.bbox.x, prev_box.bbox.y + prev_box.bbox.h), - color); - } - - if (one_off_detections && show_small_boxes) { - cv::Rect src_rect_roi(cv::Point2i(prev_box.bbox.x, prev_box.bbox.y), - cv::Size(prev_box.bbox.w, prev_box.bbox.h)); - unsigned int const color_history = (255 * prev_box.last_showed_frames_ago) / frames_history; - color = cv::Scalar(255 - 3 * color_history, 255 - 2 * color_history, 255 - 1 * color_history); - if (prev_box.mat_obj.size() == src_rect_roi.size()) { - prev_box.mat_obj.copyTo(draw_mat(src_rect_roi)); - } - cv::rectangle(draw_mat, src_rect_roi, color, thickness); - putText(draw_mat, track_id_str, src_rect_roi.tl() - cv::Point2i(0, 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); - } - } - } - } -}; - - -class track_kalman_t -{ - int track_id_counter; - std::chrono::steady_clock::time_point global_last_time; - float dT; - -public: - int max_objects; // max objects for tracking - int min_frames; // min frames to consider an object as detected - const float max_dist; // max distance (in px) to track with the same ID - cv::Size img_size; // max value of x,y,w,h - - struct tst_t { - int track_id; - int state_id; - std::chrono::steady_clock::time_point last_time; - int detection_count; - tst_t() : track_id(-1), state_id(-1) {} - }; - std::vector track_id_state_id_time; - std::vector result_vec_pred; - - struct one_kalman_t; - std::vector kalman_vec; - - struct one_kalman_t - { - cv::KalmanFilter kf; - cv::Mat state; - cv::Mat meas; - int measSize, stateSize, contrSize; - - void set_delta_time(float dT) { - kf.transitionMatrix.at(2) = dT; - kf.transitionMatrix.at(9) = dT; - } - - void set(bbox_t box) - { - initialize_kalman(); - - kf.errorCovPre.at(0) = 1; // px - kf.errorCovPre.at(7) = 1; // px - kf.errorCovPre.at(14) = 1; - kf.errorCovPre.at(21) = 1; - kf.errorCovPre.at(28) = 1; // px - kf.errorCovPre.at(35) = 1; // px - - state.at(0) = box.x; - state.at(1) = box.y; - state.at(2) = 0; - state.at(3) = 0; - state.at(4) = box.w; - state.at(5) = box.h; - // <<<< Initialization - - kf.statePost = state; - } - - // Kalman.correct() calculates: statePost = statePre + gain * (z(k)-measurementMatrix*statePre); - // corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k)) - void correct(bbox_t box) { - meas.at(0) = box.x; - meas.at(1) = box.y; - meas.at(2) = box.w; - meas.at(3) = box.h; - - kf.correct(meas); - - bbox_t new_box = predict(); - if (new_box.w == 0 || new_box.h == 0) { - set(box); - //std::cerr << " force set(): track_id = " << box.track_id << - // ", x = " << box.x << ", y = " << box.y << ", w = " << box.w << ", h = " << box.h << std::endl; - } - } - - // Kalman.predict() calculates: statePre = TransitionMatrix * statePost; - // predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k) - bbox_t predict() { - bbox_t box; - state = kf.predict(); - - box.x = state.at(0); - box.y = state.at(1); - box.w = state.at(4); - box.h = state.at(5); - return box; - } - - void initialize_kalman() - { - kf = cv::KalmanFilter(stateSize, measSize, contrSize, CV_32F); - - // Transition State Matrix A - // Note: set dT at each processing step! - // [ 1 0 dT 0 0 0 ] - // [ 0 1 0 dT 0 0 ] - // [ 0 0 1 0 0 0 ] - // [ 0 0 0 1 0 0 ] - // [ 0 0 0 0 1 0 ] - // [ 0 0 0 0 0 1 ] - cv::setIdentity(kf.transitionMatrix); - - // Measure Matrix H - // [ 1 0 0 0 0 0 ] - // [ 0 1 0 0 0 0 ] - // [ 0 0 0 0 1 0 ] - // [ 0 0 0 0 0 1 ] - kf.measurementMatrix = cv::Mat::zeros(measSize, stateSize, CV_32F); - kf.measurementMatrix.at(0) = 1.0f; - kf.measurementMatrix.at(7) = 1.0f; - kf.measurementMatrix.at(16) = 1.0f; - kf.measurementMatrix.at(23) = 1.0f; - - // Process Noise Covariance Matrix Q - result smoother with lower values (1e-2) - // [ Ex 0 0 0 0 0 ] - // [ 0 Ey 0 0 0 0 ] - // [ 0 0 Ev_x 0 0 0 ] - // [ 0 0 0 Ev_y 0 0 ] - // [ 0 0 0 0 Ew 0 ] - // [ 0 0 0 0 0 Eh ] - //cv::setIdentity(kf.processNoiseCov, cv::Scalar(1e-3)); - kf.processNoiseCov.at(0) = 1e-2; - kf.processNoiseCov.at(7) = 1e-2; - kf.processNoiseCov.at(14) = 1e-2;// 5.0f; - kf.processNoiseCov.at(21) = 1e-2;// 5.0f; - kf.processNoiseCov.at(28) = 5e-3; - kf.processNoiseCov.at(35) = 5e-3; - - // Measures Noise Covariance Matrix R - result smoother with higher values (1e-1) - cv::setIdentity(kf.measurementNoiseCov, cv::Scalar(1e-1)); - - //cv::setIdentity(kf.errorCovPost, cv::Scalar::all(1e-2)); - // <<<< Kalman Filter - - set_delta_time(0); - } - - - one_kalman_t(int _stateSize = 6, int _measSize = 4, int _contrSize = 0) : - kf(_stateSize, _measSize, _contrSize, CV_32F), measSize(_measSize), stateSize(_stateSize), contrSize(_contrSize) - { - state = cv::Mat(stateSize, 1, CV_32F); // [x,y,v_x,v_y,w,h] - meas = cv::Mat(measSize, 1, CV_32F); // [z_x,z_y,z_w,z_h] - //cv::Mat procNoise(stateSize, 1, type) - // [E_x,E_y,E_v_x,E_v_y,E_w,E_h] - - initialize_kalman(); - } - }; - // ------------------------------------------ - - - - track_kalman_t(int _max_objects = 1000, int _min_frames = 3, float _max_dist = 40, cv::Size _img_size = cv::Size(10000, 10000)) : - track_id_counter(0), max_objects(_max_objects), min_frames(_min_frames), max_dist(_max_dist), img_size(_img_size) - { - kalman_vec.resize(max_objects); - track_id_state_id_time.resize(max_objects); - result_vec_pred.resize(max_objects); - } - - float calc_dt() { - dT = std::chrono::duration(std::chrono::steady_clock::now() - global_last_time).count(); - return dT; - } - - static float get_distance(float src_x, float src_y, float dst_x, float dst_y) { - return sqrtf((src_x - dst_x)*(src_x - dst_x) + (src_y - dst_y)*(src_y - dst_y)); - } - - void clear_old_states() { - // clear old bboxes - for (size_t state_id = 0; state_id < track_id_state_id_time.size(); ++state_id) - { - float time_sec = std::chrono::duration(std::chrono::steady_clock::now() - track_id_state_id_time[state_id].last_time).count(); - float time_wait = 0.5; // 0.5 second - if (track_id_state_id_time[state_id].track_id > -1) - { - if ((result_vec_pred[state_id].x > img_size.width) || - (result_vec_pred[state_id].y > img_size.height)) - { - track_id_state_id_time[state_id].track_id = -1; - } - - if (time_sec >= time_wait || track_id_state_id_time[state_id].detection_count < 0) { - //std::cerr << " remove track_id = " << track_id_state_id_time[state_id].track_id << ", state_id = " << state_id << std::endl; - track_id_state_id_time[state_id].track_id = -1; // remove bbox - } - } - } - } - - tst_t get_state_id(bbox_t find_box, std::vector &busy_vec) - { - tst_t tst; - tst.state_id = -1; - - float min_dist = std::numeric_limits::max(); - - for (size_t i = 0; i < max_objects; ++i) - { - if (track_id_state_id_time[i].track_id > -1 && result_vec_pred[i].obj_id == find_box.obj_id && busy_vec[i] == false) - { - bbox_t pred_box = result_vec_pred[i]; - - float dist = get_distance(pred_box.x, pred_box.y, find_box.x, find_box.y); - - float movement_dist = std::max(max_dist, static_cast(std::max(pred_box.w, pred_box.h)) ); - - if ((dist < movement_dist) && (dist < min_dist)) { - min_dist = dist; - tst.state_id = i; - } - } - } - - if (tst.state_id > -1) { - track_id_state_id_time[tst.state_id].last_time = std::chrono::steady_clock::now(); - track_id_state_id_time[tst.state_id].detection_count = std::max(track_id_state_id_time[tst.state_id].detection_count + 2, 10); - tst = track_id_state_id_time[tst.state_id]; - busy_vec[tst.state_id] = true; - } - else { - //std::cerr << " Didn't find: obj_id = " << find_box.obj_id << ", x = " << find_box.x << ", y = " << find_box.y << - // ", track_id_counter = " << track_id_counter << std::endl; - } - - return tst; - } - - tst_t new_state_id(std::vector &busy_vec) - { - tst_t tst; - // find empty cell to add new track_id - auto it = std::find_if(track_id_state_id_time.begin(), track_id_state_id_time.end(), [&](tst_t &v) { return v.track_id == -1; }); - if (it != track_id_state_id_time.end()) { - it->state_id = it - track_id_state_id_time.begin(); - //it->track_id = track_id_counter++; - it->track_id = 0; - it->last_time = std::chrono::steady_clock::now(); - it->detection_count = 1; - tst = *it; - busy_vec[it->state_id] = true; - } - - return tst; - } - - std::vector find_state_ids(std::vector result_vec) - { - std::vector tst_vec(result_vec.size()); - - std::vector busy_vec(max_objects, false); - - for (size_t i = 0; i < result_vec.size(); ++i) - { - tst_t tst = get_state_id(result_vec[i], busy_vec); - int state_id = tst.state_id; - int track_id = tst.track_id; - - // if new state_id - if (state_id < 0) { - tst = new_state_id(busy_vec); - state_id = tst.state_id; - track_id = tst.track_id; - if (state_id > -1) { - kalman_vec[state_id].set(result_vec[i]); - //std::cerr << " post: "; - } - } - - //std::cerr << " track_id = " << track_id << ", state_id = " << state_id << - // ", x = " << result_vec[i].x << ", det_count = " << tst.detection_count << std::endl; - - if (state_id > -1) { - tst_vec[i] = tst; - result_vec_pred[state_id] = result_vec[i]; - result_vec_pred[state_id].track_id = track_id; - } - } - - return tst_vec; - } - - std::vector predict() - { - clear_old_states(); - std::vector result_vec; - - for (size_t i = 0; i < max_objects; ++i) - { - tst_t tst = track_id_state_id_time[i]; - if (tst.track_id > -1) { - bbox_t box = kalman_vec[i].predict(); - - result_vec_pred[i].x = box.x; - result_vec_pred[i].y = box.y; - result_vec_pred[i].w = box.w; - result_vec_pred[i].h = box.h; - - if (tst.detection_count >= min_frames) - { - if (track_id_state_id_time[i].track_id == 0) { - track_id_state_id_time[i].track_id = ++track_id_counter; - result_vec_pred[i].track_id = track_id_counter; - } - - result_vec.push_back(result_vec_pred[i]); - } - } - } - //std::cerr << " result_vec.size() = " << result_vec.size() << std::endl; - - //global_last_time = std::chrono::steady_clock::now(); - - return result_vec; - } - - - std::vector correct(std::vector result_vec) - { - calc_dt(); - clear_old_states(); - - for (size_t i = 0; i < max_objects; ++i) - track_id_state_id_time[i].detection_count--; - - std::vector tst_vec = find_state_ids(result_vec); - - for (size_t i = 0; i < tst_vec.size(); ++i) { - tst_t tst = tst_vec[i]; - int state_id = tst.state_id; - if (state_id > -1) - { - kalman_vec[state_id].set_delta_time(dT); - kalman_vec[state_id].correct(result_vec_pred[state_id]); - } - } - - result_vec = predict(); - - global_last_time = std::chrono::steady_clock::now(); - - return result_vec; - } - -}; -// ---------------------------------------------- -#endif // OPENCV - -#endif // __cplusplus - -#endif // YOLO_V2_CLASS_HPP diff --git a/src/Detector/darknet/src/.editorconfig b/src/Detector/darknet/src/.editorconfig deleted file mode 100644 index 2eb162b28..000000000 --- a/src/Detector/darknet/src/.editorconfig +++ /dev/null @@ -1,8 +0,0 @@ -root=true - -[*] -trim_trailing_whitespace = true -indent_style = space -indent_size = 4 - - diff --git a/src/Detector/darknet/src/activation_kernels.cu b/src/Detector/darknet/src/activation_kernels.cu deleted file mode 100644 index 3e9e1391f..000000000 --- a/src/Detector/darknet/src/activation_kernels.cu +++ /dev/null @@ -1,746 +0,0 @@ -#include "darknet.h" -#include -#include -#include -#include - -#include "activations.h" -#include "dark_cuda.h" - -__device__ float lhtan_activate_kernel(float x) -{ - if(x < 0) return .001*x; - if(x > 1) return .001*(x-1) + 1; - return x; -} -__device__ float lhtan_gradient_kernel(float x) -{ - if(x > 0 && x < 1) return 1; - return .001; -} - -__device__ float hardtan_activate_kernel(float x) -{ - if (x < -1) return -1; - if (x > 1) return 1; - return x; -} -__device__ float linear_activate_kernel(float x){return x;} -__device__ float logistic_activate_kernel(float x){return 1.f/(1.f + expf(-x));} -__device__ float loggy_activate_kernel(float x){return 2.f/(1.f + expf(-x)) - 1;} -__device__ float relu_activate_kernel(float x){return x*(x>0);} -__device__ float relu6_activate_kernel(float x) { return min_val_cmp(max_val_cmp(x, 0), 6); } -__device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} -__device__ float selu_activate_kernel(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } -__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01f*x;} -__device__ float ramp_activate_kernel(float x){return x*(x>0)+.1f*x;} -__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1f*x;} -__device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} -__device__ float gelu_activate_kernel(float x){return (0.5*x*(1 + tanhf(0.797885*x + 0.035677*powf(x, 3))));} -__device__ float softplus_kernel(float x, float threshold = 20) { - if (x > threshold) return x; // too large - else if (x < -threshold) return expf(x); // too small - return log1pf(expf(x)); - //return logf(expf(x) + 1); -} -__device__ float plse_activate_kernel(float x) -{ - if(x < -4) return .01f * (x + 4); - if(x > 4) return .01f * (x - 4) + 1; - return .125f*x + .5f; -} -__device__ float stair_activate_kernel(float x) -{ - int n = floorf(x); - if (n%2 == 0) return floorf(x/2.f); - else return (x - n) + floorf(x/2.f); -} - - -__device__ float hardtan_gradient_kernel(float x) -{ - if (x > -1 && x < 1) return 1; - return 0; -} -__device__ float linear_gradient_kernel(float x){return 1;} -__device__ float logistic_gradient_kernel(float x){return (1-x)*x;} -__device__ float loggy_gradient_kernel(float x) -{ - float y = (x+1.F)/2.F; - return 2*(1-y)*y; -} -__device__ float relu_gradient_kernel(float x){return (x>0);} -__device__ float relu6_gradient_kernel(float x) { return (x > 0 && x < 6); } -__device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} -__device__ float selu_gradient_kernel(float x) { return (x >= 0)*1.0507f + (x < 0)*(x + 1.0507f*1.6732f); } -__device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01f;} -__device__ float ramp_gradient_kernel(float x){return (x>0)+.1f;} -__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1f;} -__device__ float tanh_gradient_kernel(float x){return 1-x*x;} -__device__ float sech_gpu(float x) { return 2 / (expf(x) + expf(-x)); } -__device__ float gelu_gradient_kernel(float x) { - const float x3 = powf(x, 3); - return 0.5*tanhf(0.0356774*x3 + 0.797885*x) + (0.0535161*x3 + 0.398942*x) * powf(sech_gpu(0.0356774*x3 + 0.797885*x), 2) + 0.5; -} -__device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01f : .125f;} -__device__ float stair_gradient_kernel(float x) -{ - if (floor(x) == x) return 0; - return 1; -} - -__device__ float activate_kernel(float x, ACTIVATION a) -{ - switch(a){ - case LINEAR: - return linear_activate_kernel(x); - case LOGISTIC: - return logistic_activate_kernel(x); - case LOGGY: - return loggy_activate_kernel(x); - case RELU: - return relu_activate_kernel(x); - case RELU6: - return relu6_activate_kernel(x); - case ELU: - return elu_activate_kernel(x); - case SELU: - return selu_activate_kernel(x); - case GELU: - return gelu_activate_kernel(x); - case RELIE: - return relie_activate_kernel(x); - case RAMP: - return ramp_activate_kernel(x); - case LEAKY: - return leaky_activate_kernel(x); - case TANH: - return tanh_activate_kernel(x); - case PLSE: - return plse_activate_kernel(x); - case STAIR: - return stair_activate_kernel(x); - case HARDTAN: - return hardtan_activate_kernel(x); - case LHTAN: - return lhtan_activate_kernel(x); - } - return 0; -} - -__device__ float gradient_kernel(float x, ACTIVATION a) -{ - switch (a) { - case LINEAR: - return linear_gradient_kernel(x); - case LOGISTIC: - return logistic_gradient_kernel(x); - case LOGGY: - return loggy_gradient_kernel(x); - case RELU: - return relu_gradient_kernel(x); - case RELU6: - return relu6_gradient_kernel(x); - case NORM_CHAN: - return relu_gradient_kernel(x); - case ELU: - return elu_gradient_kernel(x); - case SELU: - return selu_gradient_kernel(x); - case GELU: - return gelu_gradient_kernel(x); - case RELIE: - return relie_gradient_kernel(x); - case RAMP: - return ramp_gradient_kernel(x); - case LEAKY: - return leaky_gradient_kernel(x); - case TANH: - return tanh_gradient_kernel(x); - case PLSE: - return plse_gradient_kernel(x); - case STAIR: - return stair_gradient_kernel(x); - case HARDTAN: - return hardtan_gradient_kernel(x); - case LHTAN: - return lhtan_gradient_kernel(x); - } - return 0; -} - -__global__ void binary_gradient_array_kernel(float *x, float *dy, int n, int s, BINARY_ACTIVATION a, float *dx) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - int i = id % s; - int b = id / s; - float x1 = x[b*s + i]; - float x2 = x[b*s + s / 2 + i]; - if (id < n) { - float de = dy[id]; - dx[b*s + i] = x2*de; - dx[b*s + s / 2 + i] = x1*de; - } -} - -extern "C" void binary_gradient_array_gpu(float *x, float *dx, int n, int size, BINARY_ACTIVATION a, float *y) -{ - binary_gradient_array_kernel << > >(x, dx, n / 2, size, a, y); - CHECK_CUDA(cudaPeekAtLastError()); -} -__global__ void binary_activate_array_kernel(float *x, int n, int s, BINARY_ACTIVATION a, float *y) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - int i = id % s; - int b = id / s; - float x1 = x[b*s + i]; - float x2 = x[b*s + s / 2 + i]; - if (id < n) y[id] = x1*x2; -} - -extern "C" void binary_activate_array_gpu(float *x, int n, int size, BINARY_ACTIVATION a, float *y) -{ - binary_activate_array_kernel << > >(x, n / 2, size, a, y); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void activate_array_kernel(float *x, int n, ACTIVATION a) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n) x[i] = activate_kernel(x[i], a); -} - - - -__global__ void activate_array_swish_kernel(float *x, int n, float *output_sigmoid_gpu, float *output_gpu) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - float x_val = x[i]; - float sigmoid = logistic_activate_kernel(x_val); - if (output_sigmoid_gpu) output_sigmoid_gpu[i] = sigmoid; - output_gpu[i] = x_val * sigmoid; - } -} - -__device__ float mish_njuffa(float x) -{ - float r; - float e = expf(x); - r = 1.0f / fmaf(fmaf(-0.5f, e, -1.0f), e, -1.0f); - r = fmaf(r, x, x); - return r; -} - -__device__ float mish_yashas(float x) -{ - float e = __expf(x); - if (x <= -18.0f) - return x * e; - - float n = e * e + 2 * e; - if (x <= -5.0f) - return x * __fdividef(n, n + 2); - - return x - 2 * __fdividef(x, n + 2); -} - -__device__ float mish_yashas2(float x) -{ - float e = __expf(x); - float n = e * e + 2 * e; - if (x <= -0.6f) - return x * __fdividef(n, n + 2); - - return x - 2 * __fdividef(x, n + 2); -} - -// https://github.com/digantamisra98/Mish -__global__ void activate_array_mish_kernel(float *x, int n, float *activation_input, float *output_gpu) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - const float MISH_THRESHOLD = 20; - float x_val = x[i]; - if (activation_input) activation_input[i] = x_val; // store value before activation - //output_gpu[i] = x_val * tanh_activate_kernel(logf(1 + expf(x_val))); - - // Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L17-L20 - // TF: https://github.com/tensorflow/addons/blob/093cdfa85d334cbe19a37624c33198f3140109ed/tensorflow_addons/custom_ops/activations/cc/kernels/mish_op.h#L40-L49 - // log1p(x) == log(x + 1) - //output_gpu[i] = x_val * tanh_activate_kernel( softplus_kernel(x_val, MISH_THRESHOLD) ); - output_gpu[i] = mish_yashas2(x_val); - //output_gpu[i] = mish_njuffa(x_val); - } -} - -__device__ float hard_mish_yashas(float x) -{ - if (x > 0) - return x; - if (x > -2) - return x * x / 2 + x; - return 0; -} - -__global__ void activate_array_hard_mish_kernel(float *x, int n, float *activation_input, float *output_gpu) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - - float x_val = x[i]; - if (activation_input) activation_input[i] = x_val; // store value before activation - output_gpu[i] = hard_mish_yashas(x_val); - } -} -__global__ void activate_array_leaky_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = leaky_activate_kernel(x[index]); - } -} - -__global__ void activate_array_selu_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = selu_activate_kernel(x[index]); - } -} - -__global__ void activate_array_gelu_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = gelu_activate_kernel(x[index]); - } -} - -__global__ void activate_array_logistic_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = logistic_activate_kernel(x[index]); - } -} - -__global__ void activate_array_tanh_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = tanh_activate_kernel(x[index]); - } -} - -__global__ void activate_array_hardtan_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = hardtan_activate_kernel(x[index]); - } -} - -__global__ void activate_array_relu_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = relu_activate_kernel(x[index]); - } -} - -__global__ void activate_array_relu6_kernel(float *x, int n) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - x[index] = relu6_activate_kernel(x[index]); - } -} - -__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n) delta[i] *= gradient_kernel(x[i], a); -} - -// https://github.com/BVLC/caffe/blob/04ab089db018a292ae48d51732dd6c66766b36b6/src/caffe/layers/swish_layer.cu#L28-L30 -__global__ void gradient_array_swish_kernel(float *x, int n, float *sigmoid_gpu, float *delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - float swish = x[i]; - delta[i] *= swish + sigmoid_gpu[i] * (1 - swish); // gradient_kernel(x[i], a); - } -} - -// https://github.com/digantamisra98/Mish -__global__ void gradient_array_mish_kernel(int n, float *activation_input_gpu, float *delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - const float MISH_THRESHOLD = 20.0f; - - // implementation from TensorFlow: https://github.com/tensorflow/addons/blob/093cdfa85d334cbe19a37624c33198f3140109ed/tensorflow_addons/custom_ops/activations/cc/kernels/mish_op.h#L66-L80 - // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 - // log1p(x) == log(x + 1) - const float inp = activation_input_gpu[i]; - const float sp = softplus_kernel(inp, MISH_THRESHOLD); - const float grad_sp = -expm1f(-sp); - //const float grad_sp = 1 - expf(-sp); - const float tsp = tanh(sp); - const float grad_tsp = (1 - tsp*tsp) * grad_sp; - const float grad = inp * grad_tsp + tsp; - delta[i] *= grad; - - //float x = activation_input[i]; - //float d = 2 * expf(x) + expf(2 * x) + 2; - //float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); - //float derivative = expf(x) * w / (d * d); - //delta[i] *= derivative; - } -} - -__device__ float hard_mish_yashas_grad(float x) -{ - if (x > 0) - return 1; - if (x > -2) - return x + 1; - return 0; -} - -__global__ void gradient_array_hard_mish_kernel(int n, float *activation_input_gpu, float *delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - - const float x = activation_input_gpu[i]; - delta[i] *= hard_mish_yashas_grad(x); - } -} - -__global__ void gradient_array_leaky_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= leaky_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_revleaky_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] /= leaky_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_selu_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= selu_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_gelu_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= gelu_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_logistic_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= logistic_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_tanh_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= tanh_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_hardtan_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= hardtan_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_relu_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= relu_gradient_kernel(x[index]); - } -} - -__global__ void gradient_array_relu6_kernel(float *x, int n, float *delta) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < n) { - delta[index] *= relu6_gradient_kernel(x[index]); - } -} - -extern "C" void activate_array_ongpu(float *x, int n, ACTIVATION a) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - if (a == LINEAR) return; - else if (a == LEAKY || a == REVLEAKY) activate_array_leaky_kernel << > >(x, n); - else if (a == LOGISTIC) activate_array_logistic_kernel << > >(x, n); - else if (a == TANH) activate_array_tanh_kernel << > >(x, n); - else if (a == HARDTAN) activate_array_hardtan_kernel << > >(x, n); - else if (a == RELU) activate_array_relu_kernel << > >(x, n); - else if (a == RELU6) activate_array_relu6_kernel << > >(x, n); - else if (a == SELU) activate_array_selu_kernel << > >(x, n); - else if (a == GELU) activate_array_gelu_kernel << > >(x, n); - else - activate_array_kernel<<>>(x, n, a); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_swish_kernel << > >(x, n, output_sigmoid_gpu, output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_mish_kernel << > >(x, n, activation_input_gpu, output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void activate_array_hard_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - activate_array_hard_mish_kernel << > >(x, n, activation_input_gpu, output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - if (a == LINEAR) return; - else if (a == LEAKY) gradient_array_leaky_kernel << > >(x, n, delta); - else if (a == REVLEAKY) gradient_array_revleaky_kernel << > >(x, n, delta); - else if (a == LOGISTIC) gradient_array_logistic_kernel << > >(x, n, delta); - else if (a == TANH) gradient_array_tanh_kernel << > >(x, n, delta); - else if (a == HARDTAN) gradient_array_hardtan_kernel << > >(x, n, delta); - else if (a == RELU) gradient_array_relu_kernel << > >(x, n, delta); - else if (a == RELU6) gradient_array_relu6_kernel << > >(x, n, delta); - //else if (a == NORM_CHAN) gradient_array_relu_kernel << > >(x, n, delta); - else if (a == NORM_CHAN_SOFTMAX || a == NORM_CHAN) { - printf(" Error: should be used custom NORM_CHAN_SOFTMAX-function for gradient \n"); - exit(0); - } - else if (a == SELU) gradient_array_selu_kernel << > >(x, n, delta); - else if (a == GELU) gradient_array_gelu_kernel << > >(x, n, delta); - else - gradient_array_kernel << > > (x, n, a, delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -extern "C" void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_swish_kernel << > > (x, n, sigmoid_gpu, delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_mish_kernel << > > (n, activation_input_gpu, delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void gradient_array_hard_mish_ongpu(int n, float *activation_input_gpu, float *delta) -{ - const int num_blocks = get_number_of_blocks(n, BLOCK); - gradient_array_hard_mish_kernel << > > (n, activation_input_gpu, delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void activate_array_normalize_channels_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - - int wh_i = i % wh_step; - int b = i / wh_step; - - const float eps = 0.0001; - if (i < size) { - float sum = eps; - int k; - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > 0) sum += val; - } - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > 0) val = val / sum; - else val = 0; - output_gpu[wh_i + k * wh_step + b*wh_step*channels] = val; - } - } -} - -extern "C" void activate_array_normalize_channels_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu) -{ - // n = w*h*c*batch - // size = w*h*batch - int size = n / channels; - - const int num_blocks = get_number_of_blocks(size, BLOCK); - - activate_array_normalize_channels_kernel << > > (x, size, batch, channels, wh_step, output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void activate_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *output_gpu, int use_max_val) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - - int wh_i = i % wh_step; - int b = i / wh_step; - - const float eps = 0.0001; - if (i < size) { - float sum = eps; - float max_val = -FLT_MAX; - int k; - if (use_max_val) { - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > max_val || k == 0) max_val = val; - } - } - else - max_val = 0; - - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - sum += expf(val - max_val); - } - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - val = expf(val - max_val) / sum; - if (isnan(val) || isinf(val)) val = 0; - output_gpu[wh_i + k * wh_step + b*wh_step*channels] = val; - } - } -} - -extern "C" void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val) -{ - // n = w*h*c*batch - // size = w*h*batch - int size = n / channels; - - const int num_blocks = get_number_of_blocks(size, BLOCK); - - activate_array_normalize_channels_softmax_kernel << > > (x, size, batch, channels, wh_step, output_gpu, use_max_val); - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void gradient_array_normalize_channels_softmax_kernel(float *x, int size, int batch, int channels, int wh_step, float *delta_gpu) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - - int wh_i = i % wh_step; - int b = i / wh_step; - - if (i < size) { - int k; - /* - float grad = 0; - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float out = x[index]; - float delta = delta_gpu[index]; - grad += out*fabs(delta); - } - */ - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float delta = delta_gpu[index]; - float grad = x[index] * (1 - x[index]); - delta = delta * grad; - if (isnan(delta) || isinf(delta)) delta = 0; - delta_gpu[index] = delta; - } - } -} - -extern "C" void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu) -{ - // n = w*h*c*batch - // size = w*h*batch - int size = n / channels; - - const int num_blocks = get_number_of_blocks(size, BLOCK); - - gradient_array_normalize_channels_softmax_kernel << > > (output_gpu, size, batch, channels, wh_step, delta_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void gradient_array_normalize_channels_kernel(float *x, int size, int batch, int channels, int wh_step, float *delta_gpu) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - - int wh_i = i % wh_step; - int b = i / wh_step; - - if (i < size) { - int k; - /* - float grad = 0; - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float out = x[index]; - float delta = delta_gpu[index]; - grad += out*fabs(delta); - } - */ - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - if (x[index] > 0) { - float delta = delta_gpu[index]; - float grad = x[index]; - delta = delta * grad; - delta_gpu[index] = delta; - } - } - } -} - -extern "C" void gradient_array_normalize_channels_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu) -{ - // n = w*h*c*batch - // size = w*h*batch - int size = n / channels; - - const int num_blocks = get_number_of_blocks(size, BLOCK); - - gradient_array_normalize_channels_kernel << > > (output_gpu, size, batch, channels, wh_step, delta_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} diff --git a/src/Detector/darknet/src/activation_layer.c b/src/Detector/darknet/src/activation_layer.c deleted file mode 100644 index 4383d7e1b..000000000 --- a/src/Detector/darknet/src/activation_layer.c +++ /dev/null @@ -1,63 +0,0 @@ -#include "activation_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -layer make_activation_layer(int batch, int inputs, ACTIVATION activation) -{ - layer l = { (LAYER_TYPE)0 }; - l.type = ACTIVE; - - l.inputs = inputs; - l.outputs = inputs; - l.batch=batch; - - l.output = (float*)xcalloc(batch * inputs, sizeof(float)); - l.delta = (float*)xcalloc(batch * inputs, sizeof(float)); - - l.forward = forward_activation_layer; - l.backward = backward_activation_layer; -#ifdef GPU - l.forward_gpu = forward_activation_layer_gpu; - l.backward_gpu = backward_activation_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, inputs*batch); - l.delta_gpu = cuda_make_array(l.delta, inputs*batch); -#endif - l.activation = activation; - fprintf(stderr, "Activation Layer: %d inputs\n", inputs); - return l; -} - -void forward_activation_layer(layer l, network_state state) -{ - copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); - activate_array(l.output, l.outputs*l.batch, l.activation); -} - -void backward_activation_layer(layer l, network_state state) -{ - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); -} - -#ifdef GPU - -void forward_activation_layer_gpu(layer l, network_state state) -{ - copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); -} - -void backward_activation_layer_gpu(layer l, network_state state) -{ - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); -} -#endif diff --git a/src/Detector/darknet/src/activation_layer.h b/src/Detector/darknet/src/activation_layer.h deleted file mode 100644 index c766c6af0..000000000 --- a/src/Detector/darknet/src/activation_layer.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef ACTIVATION_LAYER_H -#define ACTIVATION_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_activation_layer(int batch, int inputs, ACTIVATION activation); - -void forward_activation_layer(layer l, network_state state); -void backward_activation_layer(layer l, network_state state); - -#ifdef GPU -void forward_activation_layer_gpu(layer l, network_state state); -void backward_activation_layer_gpu(layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/activations.c b/src/Detector/darknet/src/activations.c deleted file mode 100644 index 87ba1d9e3..000000000 --- a/src/Detector/darknet/src/activations.c +++ /dev/null @@ -1,420 +0,0 @@ -#include "activations.h" - -#include -#include -#include -#include -#include - -char *get_activation_string(ACTIVATION a) -{ - switch(a){ - case LOGISTIC: - return "logistic"; - case LOGGY: - return "loggy"; - case RELU: - return "relu"; - case ELU: - return "elu"; - case SELU: - return "selu"; - case GELU: - return "gelu"; - case RELIE: - return "relie"; - case RAMP: - return "ramp"; - case LINEAR: - return "linear"; - case TANH: - return "tanh"; - case PLSE: - return "plse"; - case LEAKY: - return "leaky"; - case STAIR: - return "stair"; - case HARDTAN: - return "hardtan"; - case LHTAN: - return "lhtan"; - default: - break; - } - return "relu"; -} - -ACTIVATION get_activation(char *s) -{ - if (strcmp(s, "logistic")==0) return LOGISTIC; - if (strcmp(s, "swish") == 0) return SWISH; - if (strcmp(s, "mish") == 0) return MISH; - if (strcmp(s, "hard_mish") == 0) return HARD_MISH; - if (strcmp(s, "normalize_channels") == 0) return NORM_CHAN; - if (strcmp(s, "normalize_channels_softmax") == 0) return NORM_CHAN_SOFTMAX; - if (strcmp(s, "normalize_channels_softmax_maxval") == 0) return NORM_CHAN_SOFTMAX_MAXVAL; - if (strcmp(s, "loggy")==0) return LOGGY; - if (strcmp(s, "relu")==0) return RELU; - if (strcmp(s, "relu6") == 0) return RELU6; - if (strcmp(s, "elu")==0) return ELU; - if (strcmp(s, "selu") == 0) return SELU; - if (strcmp(s, "gelu") == 0) return GELU; - if (strcmp(s, "relie")==0) return RELIE; - if (strcmp(s, "plse")==0) return PLSE; - if (strcmp(s, "hardtan")==0) return HARDTAN; - if (strcmp(s, "lhtan")==0) return LHTAN; - if (strcmp(s, "linear")==0) return LINEAR; - if (strcmp(s, "ramp")==0) return RAMP; - if (strcmp(s, "revleaky") == 0) return REVLEAKY; - if (strcmp(s, "leaky")==0) return LEAKY; - if (strcmp(s, "tanh")==0) return TANH; - if (strcmp(s, "stair")==0) return STAIR; - fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); - return RELU; -} - -float activate(float x, ACTIVATION a) -{ - switch(a){ - case LINEAR: - return linear_activate(x); - case LOGISTIC: - return logistic_activate(x); - case LOGGY: - return loggy_activate(x); - case RELU: - return relu_activate(x); - case ELU: - return elu_activate(x); - case SELU: - return selu_activate(x); - case GELU: - return gelu_activate(x); - case RELIE: - return relie_activate(x); - case RAMP: - return ramp_activate(x); - case REVLEAKY: - case LEAKY: - return leaky_activate(x); - case TANH: - return tanh_activate(x); - case PLSE: - return plse_activate(x); - case STAIR: - return stair_activate(x); - case HARDTAN: - return hardtan_activate(x); - case LHTAN: - return lhtan_activate(x); - } - return 0; -} - -void activate_array(float *x, const int n, const ACTIVATION a) -{ - int i; - if (a == LINEAR) {} - else if (a == LEAKY) { - #pragma omp parallel for - for (i = 0; i < n; ++i) { - x[i] = leaky_activate(x[i]); - } - } - else if (a == LOGISTIC) { - #pragma omp parallel for - for (i = 0; i < n; ++i) { - x[i] = logistic_activate(x[i]); - } - } - else { - for (i = 0; i < n; ++i) { - x[i] = activate(x[i], a); - } - } -} - -void activate_array_swish(float *x, const int n, float * output_sigmoid, float * output) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - float x_val = x[i]; - float sigmoid = logistic_activate(x_val); - output_sigmoid[i] = sigmoid; - output[i] = x_val * sigmoid; - } -} - -// https://github.com/digantamisra98/Mish -void activate_array_mish(float *x, const int n, float * activation_input, float * output) -{ - const float MISH_THRESHOLD = 20; - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - float x_val = x[i]; - activation_input[i] = x_val; // store value before activation - output[i] = x_val * tanh_activate( softplus_activate(x_val, MISH_THRESHOLD) ); - } -} - -static float hard_mish_yashas(float x) -{ - if (x > 0) - return x; - if (x > -2) - return x * x / 2 + x; - return 0; -} - -void activate_array_hard_mish(float *x, const int n, float * activation_input, float * output) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - float x_val = x[i]; - activation_input[i] = x_val; // store value before activation - output[i] = hard_mish_yashas(x_val); - } -} - -void activate_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *output) -{ - int size = n / channels; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int wh_i = i % wh_step; - int b = i / wh_step; - - const float eps = 0.0001; - if (i < size) { - float sum = eps; - int k; - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > 0) sum += val; - } - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > 0) val = val / sum; - else val = 0; - output[wh_i + k * wh_step + b*wh_step*channels] = val; - } - } - } -} - -void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val) -{ - int size = n / channels; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int wh_i = i % wh_step; - int b = i / wh_step; - - const float eps = 0.0001; - if (i < size) { - float sum = eps; - float max_val = -FLT_MAX; - int k; - if (use_max_val) { - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - if (val > max_val || k == 0) max_val = val; - } - } - else - max_val = 0; - - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - sum += expf(val - max_val); - } - for (k = 0; k < channels; ++k) { - float val = x[wh_i + k * wh_step + b*wh_step*channels]; - val = expf(val - max_val) / sum; - output[wh_i + k * wh_step + b*wh_step*channels] = val; - } - } - } -} - -void gradient_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *delta) -{ - int size = n / channels; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int wh_i = i % wh_step; - int b = i / wh_step; - - if (i < size) { - float grad = 0; - int k; - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float out = x[index]; - float d = delta[index]; - grad += out*d; - } - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float d = delta[index]; - d = d * grad; - delta[index] = d; - } - } - } -} - -void gradient_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *delta) -{ - int size = n / channels; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int wh_i = i % wh_step; - int b = i / wh_step; - - if (i < size) { - float grad = 0; - int k; - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - float out = x[index]; - float d = delta[index]; - grad += out*d; - } - for (k = 0; k < channels; ++k) { - const int index = wh_i + k * wh_step + b*wh_step*channels; - if (x[index] > 0) { - float d = delta[index]; - d = d * grad; - delta[index] = d; - } - } - } - } -} - -float gradient(float x, ACTIVATION a) -{ - switch(a){ - case LINEAR: - return linear_gradient(x); - case LOGISTIC: - return logistic_gradient(x); - case LOGGY: - return loggy_gradient(x); - case RELU: - return relu_gradient(x); - case RELU6: - return relu6_gradient(x); - case NORM_CHAN: - //return relu_gradient(x); - case NORM_CHAN_SOFTMAX_MAXVAL: - //... - case NORM_CHAN_SOFTMAX: - printf(" Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient \n"); - exit(0); - return 0; - case ELU: - return elu_gradient(x); - case SELU: - return selu_gradient(x); - case GELU: - return gelu_gradient(x); - case RELIE: - return relie_gradient(x); - case RAMP: - return ramp_gradient(x); - case REVLEAKY: - case LEAKY: - return leaky_gradient(x); - case TANH: - return tanh_gradient(x); - case PLSE: - return plse_gradient(x); - case STAIR: - return stair_gradient(x); - case HARDTAN: - return hardtan_gradient(x); - case LHTAN: - return lhtan_gradient(x); - } - return 0; -} - -void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta) -{ - int i; - #pragma omp parallel for - for(i = 0; i < n; ++i){ - delta[i] *= gradient(x[i], a); - } -} - -// https://github.com/BVLC/caffe/blob/04ab089db018a292ae48d51732dd6c66766b36b6/src/caffe/layers/swish_layer.cpp#L54-L56 -void gradient_array_swish(const float *x, const int n, const float * sigmoid, float * delta) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - float swish = x[i]; - delta[i] *= swish + sigmoid[i]*(1 - swish); - } -} - -// https://github.com/digantamisra98/Mish -void gradient_array_mish(const int n, const float * activation_input, float * delta) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - const float MISH_THRESHOLD = 20.0f; - - // implementation from TensorFlow: https://github.com/tensorflow/addons/commit/093cdfa85d334cbe19a37624c33198f3140109ed - // implementation from Pytorch: https://github.com/thomasbrandon/mish-cuda/blob/master/csrc/mish.h#L26-L31 - float inp = activation_input[i]; - const float sp = softplus_activate(inp, MISH_THRESHOLD); - const float grad_sp = 1 - exp(-sp); - const float tsp = tanh(sp); - const float grad_tsp = (1 - tsp*tsp) * grad_sp; - const float grad = inp * grad_tsp + tsp; - delta[i] *= grad; - - - //float x = activation_input[i]; - //float d = 2 * expf(x) + expf(2 * x) + 2; - //float w = 4 * (x + 1) + 4 * expf(2 * x) + expf(3 * x) + expf(x)*(4 * x + 6); - //float derivative = expf(x) * w / (d * d); - //delta[i] *= derivative; - } -} - -static float hard_mish_yashas_grad(float x) -{ - if (x > 0) - return 1; - if (x > -2) - return x + 1; - return 0; -} - -void gradient_array_hard_mish(const int n, const float * activation_input, float * delta) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; ++i) { - float inp = activation_input[i]; - delta[i] *= hard_mish_yashas_grad(inp); - } -} diff --git a/src/Detector/darknet/src/activations.h b/src/Detector/darknet/src/activations.h deleted file mode 100644 index 95c2c2c13..000000000 --- a/src/Detector/darknet/src/activations.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef ACTIVATIONS_H -#define ACTIVATIONS_H -#include "darknet.h" -#include "dark_cuda.h" -#include "math.h" -#include "utils.h" - -//typedef enum{ -// LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN, SELU, SWISH, MISH -//}ACTIVATION; - -#ifdef __cplusplus -extern "C" { -#endif -ACTIVATION get_activation(char *s); - -char *get_activation_string(ACTIVATION a); -float activate(float x, ACTIVATION a); -float gradient(float x, ACTIVATION a); -void gradient_array(const float *x, const int n, const ACTIVATION a, float *delta); -void gradient_array_swish(const float *x, const int n, const float * sigmoid, float * delta); -void gradient_array_mish(const int n, const float * activation_input, float * delta); -void gradient_array_hard_mish(const int n, const float * activation_input, float * delta); -void activate_array(float *x, const int n, const ACTIVATION a); -void activate_array_swish(float *x, const int n, float * output_sigmoid, float * output); -void activate_array_mish(float *x, const int n, float * activation_input, float * output); -void activate_array_hard_mish(float *x, const int n, float * activation_input, float * output); -void activate_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *output); -void gradient_array_normalize_channels(float *x, const int n, int batch, int channels, int wh_step, float *delta); -void activate_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *output, int use_max_val); -void gradient_array_normalize_channels_softmax(float *x, const int n, int batch, int channels, int wh_step, float *delta); -#ifdef GPU -void activate_array_ongpu(float *x, int n, ACTIVATION a); -void activate_array_swish_ongpu(float *x, int n, float *output_sigmoid_gpu, float *output_gpu); -void activate_array_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu); -void activate_array_hard_mish_ongpu(float *x, int n, float *activation_input_gpu, float *output_gpu); -void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta); -void gradient_array_swish_ongpu(float *x, int n, float *sigmoid_gpu, float *delta); -void gradient_array_mish_ongpu(int n, float *activation_input_gpu, float *delta); -void gradient_array_hard_mish_ongpu(int n, float *activation_input_gpu, float *delta); -void activate_array_normalize_channels_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu); -void gradient_array_normalize_channels_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu); -void activate_array_normalize_channels_softmax_ongpu(float *x, int n, int batch, int channels, int wh_step, float *output_gpu, int use_max_val); -void gradient_array_normalize_channels_softmax_ongpu(float *output_gpu, int n, int batch, int channels, int wh_step, float *delta_gpu); - -#endif - -static inline float stair_activate(float x) -{ - int n = floorf(x); - if (n%2 == 0) return floorf(x/2.f); - else return (x - n) + floorf(x/2.f); -} -static inline float hardtan_activate(float x) -{ - if (x < -1) return -1; - if (x > 1) return 1; - return x; -} -static inline float linear_activate(float x){return x;} -static inline float logistic_activate(float x){return 1.f/(1.f + expf(-x));} -static inline float loggy_activate(float x){return 2.f/(1.f + expf(-x)) - 1;} -static inline float relu_activate(float x){return x*(x>0);} -static inline float relu6_activate(float x) { return min_val_cmp(max_val_cmp(x, 0), 6); } -static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(expf(x)-1);} -static inline float selu_activate(float x) { return (x >= 0)*1.0507f*x + (x < 0)*1.0507f*1.6732f*(expf(x) - 1); } -static inline float relie_activate(float x){return (x>0) ? x : .01f*x;} -static inline float ramp_activate(float x){return x*(x>0)+.1f*x;} -static inline float leaky_activate(float x){return (x>0) ? x : .1f*x;} -//static inline float tanh_activate(float x){return (expf(2*x)-1)/(expf(2*x)+1);} -static inline float tanh_activate(float x) { return (2 / (1 + expf(-2 * x)) - 1); } -static inline float gelu_activate(float x) { return (0.5*x*(1 + tanhf(0.797885*x + 0.035677*powf(x, 3)))); } -static inline float softplus_activate(float x, float threshold) { - if (x > threshold) return x; // too large - else if (x < -threshold) return expf(x); // too small - return logf(expf(x) + 1); -} -static inline float plse_activate(float x) -{ - if(x < -4) return .01f * (x + 4); - if(x > 4) return .01f * (x - 4) + 1; - return .125f*x + .5f; -} - -static inline float lhtan_activate(float x) -{ - if(x < 0) return .001f*x; - if(x > 1) return .001f*(x-1) + 1; - return x; -} -static inline float lhtan_gradient(float x) -{ - if(x > 0 && x < 1) return 1; - return .001f; -} - -static inline float hardtan_gradient(float x) -{ - if (x > -1 && x < 1) return 1; - return 0; -} -static inline float linear_gradient(float x){return 1;} -static inline float logistic_gradient(float x){return (1-x)*x;} -static inline float loggy_gradient(float x) -{ - float y = (x+1.f)/2.f; - return 2*(1-y)*y; -} -static inline float stair_gradient(float x) -{ - if (floor(x) == x) return 0; - return 1.0f; -} -static inline float relu_gradient(float x){return (x>0);} -static inline float relu6_gradient(float x) { return (x > 0 && x < 6); } -static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} -static inline float selu_gradient(float x) { return (x >= 0)*1.0507f + (x < 0)*(x + 1.0507f*1.6732f); } -static inline float relie_gradient(float x){return (x>0) ? 1 : .01f;} -static inline float ramp_gradient(float x){return (x>0)+.1f;} -static inline float leaky_gradient(float x){return (x>0) ? 1 : .1f;} -static inline float tanh_gradient(float x){return 1-x*x;} - -static inline float sech(float x) { return 2 / (expf(x) + expf(-x)); } -static inline float gelu_gradient(float x) { - const float x3 = powf(x, 3); - return 0.5*tanhf(0.0356774*x3 + 0.797885*x) + (0.0535161*x3 + 0.398942*x) * powf(sech(0.0356774*x3 + 0.797885*x), 2) + 0.5; -} -static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01f : .125f;} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/art.c b/src/Detector/darknet/src/art.c deleted file mode 100644 index 22e8ab86f..000000000 --- a/src/Detector/darknet/src/art.c +++ /dev/null @@ -1,75 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" -#include "option_list.h" -#include "blas.h" -#include "classifier.h" -#ifdef WIN32 -#include -#include "gettimeofday.h" -#else -#include -#endif - - -void demo_art(char *cfgfile, char *weightfile, int cam_index) -{ -#ifdef OPENCV - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - - srand(2222222); - cap_cv * cap; - - cap = get_capture_webcam(cam_index); - - char *window = "ArtJudgementBot9000!!!"; - if(!cap) error("Couldn't connect to webcam.\n"); - create_window_cv(window, 0, 512, 512); - int i; - int idx[] = {37, 401, 434}; - int n = sizeof(idx)/sizeof(idx[0]); - - while(1){ - image in = get_image_from_stream_cpp(cap); - image in_s = resize_image(in, net.w, net.h); - show_image(in, window); - - float *p = network_predict(net, in_s.data); - - printf("\033[2J"); - printf("\033[1;1H"); - - float score = 0; - for(i = 0; i < n; ++i){ - float s = p[idx[i]]; - if (s > score) score = s; - } - score = score; - printf("I APPRECIATE THIS ARTWORK: %10.7f%%\n", score*100); - printf("["); - int upper = 30; - for(i = 0; i < upper; ++i){ - printf("%c", ((i+.5) < score*upper) ? 219 : ' '); - } - printf("]\n"); - - free_image(in_s); - free_image(in); - - wait_key_cv(1); - } -#endif -} - - -void run_art(int argc, char **argv) -{ - int cam_index = find_int_arg(argc, argv, "-c", 0); - char *cfg = argv[2]; - char *weights = argv[3]; - demo_art(cfg, weights, cam_index); -} diff --git a/src/Detector/darknet/src/avgpool_layer.c b/src/Detector/darknet/src/avgpool_layer.c deleted file mode 100644 index 2b595aacf..000000000 --- a/src/Detector/darknet/src/avgpool_layer.c +++ /dev/null @@ -1,71 +0,0 @@ -#include "avgpool_layer.h" -#include "dark_cuda.h" -#include "utils.h" -#include - -avgpool_layer make_avgpool_layer(int batch, int w, int h, int c) -{ - fprintf(stderr, "avg %4d x%4d x%4d -> %4d\n", w, h, c, c); - avgpool_layer l = { (LAYER_TYPE)0 }; - l.type = AVGPOOL; - l.batch = batch; - l.h = h; - l.w = w; - l.c = c; - l.out_w = 1; - l.out_h = 1; - l.out_c = c; - l.outputs = l.out_c; - l.inputs = h*w*c; - int output_size = l.outputs * batch; - l.output = (float*)xcalloc(output_size, sizeof(float)); - l.delta = (float*)xcalloc(output_size, sizeof(float)); - l.forward = forward_avgpool_layer; - l.backward = backward_avgpool_layer; - #ifdef GPU - l.forward_gpu = forward_avgpool_layer_gpu; - l.backward_gpu = backward_avgpool_layer_gpu; - l.output_gpu = cuda_make_array(l.output, output_size); - l.delta_gpu = cuda_make_array(l.delta, output_size); - #endif - return l; -} - -void resize_avgpool_layer(avgpool_layer *l, int w, int h) -{ - l->w = w; - l->h = h; - l->inputs = h*w*l->c; -} - -void forward_avgpool_layer(const avgpool_layer l, network_state state) -{ - int b,i,k; - - for(b = 0; b < l.batch; ++b){ - for(k = 0; k < l.c; ++k){ - int out_index = k + b*l.c; - l.output[out_index] = 0; - for(i = 0; i < l.h*l.w; ++i){ - int in_index = i + l.h*l.w*(k + b*l.c); - l.output[out_index] += state.input[in_index]; - } - l.output[out_index] /= l.h*l.w; - } - } -} - -void backward_avgpool_layer(const avgpool_layer l, network_state state) -{ - int b,i,k; - - for(b = 0; b < l.batch; ++b){ - for(k = 0; k < l.c; ++k){ - int out_index = k + b*l.c; - for(i = 0; i < l.h*l.w; ++i){ - int in_index = i + l.h*l.w*(k + b*l.c); - state.delta[in_index] += l.delta[out_index] / (l.h*l.w); - } - } - } -} diff --git a/src/Detector/darknet/src/avgpool_layer.h b/src/Detector/darknet/src/avgpool_layer.h deleted file mode 100644 index 2277ec6d0..000000000 --- a/src/Detector/darknet/src/avgpool_layer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef AVGPOOL_LAYER_H -#define AVGPOOL_LAYER_H - -#include "image.h" -#include "dark_cuda.h" -#include "layer.h" -#include "network.h" - -typedef layer avgpool_layer; - -#ifdef __cplusplus -extern "C" { -#endif -image get_avgpool_image(avgpool_layer l); -avgpool_layer make_avgpool_layer(int batch, int w, int h, int c); -void resize_avgpool_layer(avgpool_layer *l, int w, int h); -void forward_avgpool_layer(const avgpool_layer l, network_state state); -void backward_avgpool_layer(const avgpool_layer l, network_state state); - -#ifdef GPU -void forward_avgpool_layer_gpu(avgpool_layer l, network_state state); -void backward_avgpool_layer_gpu(avgpool_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/avgpool_layer_kernels.cu b/src/Detector/darknet/src/avgpool_layer_kernels.cu deleted file mode 100644 index b8cdd603d..000000000 --- a/src/Detector/darknet/src/avgpool_layer_kernels.cu +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include - -#include "avgpool_layer.h" -#include "dark_cuda.h" - -__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int k = id % c; - id /= c; - int b = id; - - int i; - int out_index = (k + c*b); - output[out_index] = 0; - for(i = 0; i < w*h; ++i){ - int in_index = i + h*w*(k + b*c); - output[out_index] += input[in_index]; - } - output[out_index] /= w*h; -} - -__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int k = id % c; - id /= c; - int b = id; - - int i; - int out_index = (k + c*b); - for(i = 0; i < w*h; ++i){ - int in_index = i + h*w*(k + b*c); - in_delta[in_index] += out_delta[out_index] / (w*h); - } -} - -extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state) -{ - size_t n = layer.c*layer.batch; - - forward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state) -{ - size_t n = layer.c*layer.batch; - - backward_avgpool_layer_kernel<<>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} diff --git a/src/Detector/darknet/src/batchnorm_layer.c b/src/Detector/darknet/src/batchnorm_layer.c deleted file mode 100644 index eeba5cc57..000000000 --- a/src/Detector/darknet/src/batchnorm_layer.c +++ /dev/null @@ -1,425 +0,0 @@ -#include "batchnorm_layer.h" -#include "blas.h" -#include "utils.h" -#include - -layer make_batchnorm_layer(int batch, int w, int h, int c, int train) -{ - fprintf(stderr, "Batch Normalization Layer: %d x %d x %d image\n", w,h,c); - layer layer = { (LAYER_TYPE)0 }; - layer.type = BATCHNORM; - layer.batch = batch; - layer.train = train; - layer.h = layer.out_h = h; - layer.w = layer.out_w = w; - layer.c = layer.out_c = c; - - layer.n = layer.c; - layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.inputs = w*h*c; - layer.outputs = layer.inputs; - - layer.biases = (float*)xcalloc(c, sizeof(float)); - layer.bias_updates = (float*)xcalloc(c, sizeof(float)); - - layer.scales = (float*)xcalloc(c, sizeof(float)); - layer.scale_updates = (float*)xcalloc(c, sizeof(float)); - int i; - for(i = 0; i < c; ++i){ - layer.scales[i] = 1; - } - - layer.mean = (float*)xcalloc(c, sizeof(float)); - layer.variance = (float*)xcalloc(c, sizeof(float)); - - layer.rolling_mean = (float*)xcalloc(c, sizeof(float)); - layer.rolling_variance = (float*)xcalloc(c, sizeof(float)); - - layer.forward = forward_batchnorm_layer; - layer.backward = backward_batchnorm_layer; - layer.update = update_batchnorm_layer; -#ifdef GPU - layer.forward_gpu = forward_batchnorm_layer_gpu; - layer.backward_gpu = backward_batchnorm_layer_gpu; - layer.update_gpu = update_batchnorm_layer_gpu; - - layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); - - layer.biases_gpu = cuda_make_array(layer.biases, c); - layer.scales_gpu = cuda_make_array(layer.scales, c); - - if (train) { - layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); - - layer.bias_updates_gpu = cuda_make_array(layer.bias_updates, c); - layer.scale_updates_gpu = cuda_make_array(layer.scale_updates, c); - - layer.mean_delta_gpu = cuda_make_array(layer.mean, c); - layer.variance_delta_gpu = cuda_make_array(layer.variance, c); - } - - layer.mean_gpu = cuda_make_array(layer.mean, c); - layer.variance_gpu = cuda_make_array(layer.variance, c); - - layer.rolling_mean_gpu = cuda_make_array(layer.mean, c); - layer.rolling_variance_gpu = cuda_make_array(layer.variance, c); - - if (train) { - layer.x_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs); -#ifndef CUDNN - layer.x_norm_gpu = cuda_make_array(layer.output, layer.batch*layer.outputs); -#endif // not CUDNN - } - -#ifdef CUDNN - CHECK_CUDNN(cudnnCreateTensorDescriptor(&layer.normTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&layer.normDstTensorDesc)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(layer.normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, layer.batch, layer.out_c, layer.out_h, layer.out_w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(layer.normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, layer.out_c, 1, 1)); -#endif -#endif - return layer; -} - -void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) -{ - int i,b,f; - for(f = 0; f < n; ++f){ - float sum = 0; - for(b = 0; b < batch; ++b){ - for(i = 0; i < size; ++i){ - int index = i + size*(f + n*b); - sum += delta[index] * x_norm[index]; - } - } - scale_updates[f] += sum; - } -} - -void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) -{ - - int i,j,k; - for(i = 0; i < filters; ++i){ - mean_delta[i] = 0; - for (j = 0; j < batch; ++j) { - for (k = 0; k < spatial; ++k) { - int index = j*filters*spatial + i*spatial + k; - mean_delta[i] += delta[index]; - } - } - mean_delta[i] *= (-1./sqrt(variance[i] + .00001f)); - } -} -void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) -{ - - int i,j,k; - for(i = 0; i < filters; ++i){ - variance_delta[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - variance_delta[i] += delta[index]*(x[index] - mean[i]); - } - } - variance_delta[i] *= -.5 * pow(variance[i] + .00001f, (float)(-3./2.)); - } -} -void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) -{ - int f, j, k; - for(j = 0; j < batch; ++j){ - for(f = 0; f < filters; ++f){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + f*spatial + k; - delta[index] = delta[index] * 1./(sqrt(variance[f]) + .00001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); - } - } - } -} - -void resize_batchnorm_layer(layer *l, int w, int h) -{ - l->out_h = l->h = h; - l->out_w = l->w = w; - l->outputs = l->inputs = h*w*l->c; - - const int output_size = l->outputs * l->batch; - - l->output = (float*)realloc(l->output, output_size * sizeof(float)); - l->delta = (float*)realloc(l->delta, output_size * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - l->output_gpu = cuda_make_array(l->output, output_size); - - if (l->train) { - cuda_free(l->delta_gpu); - l->delta_gpu = cuda_make_array(l->delta, output_size); - - cuda_free(l->x_gpu); - l->x_gpu = cuda_make_array(l->output, output_size); -#ifndef CUDNN - cuda_free(l->x_norm_gpu); - l->x_norm_gpu = cuda_make_array(l->output, output_size); -#endif // not CUDNN - } - - -#ifdef CUDNN - CHECK_CUDNN(cudnnDestroyTensorDescriptor(l->normDstTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->normDstTensorDesc)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); -#endif // CUDNN -#endif // GPU -} - -void forward_batchnorm_layer(layer l, network_state state) -{ - if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); - if(l.type == CONNECTED){ - l.out_c = l.outputs; - l.out_h = l.out_w = 1; - } - if(state.train){ - mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); - variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); - - scal_cpu(l.out_c, .9, l.rolling_mean, 1); - axpy_cpu(l.out_c, .1, l.mean, 1, l.rolling_mean, 1); - scal_cpu(l.out_c, .9, l.rolling_variance, 1); - axpy_cpu(l.out_c, .1, l.variance, 1, l.rolling_variance, 1); - - copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); - normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); - copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); - } else { - normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); - } - scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); - add_bias(l.output, l.biases, l.batch, l.out_c, l.out_w*l.out_h); -} - -void backward_batchnorm_layer(const layer l, network_state state) -{ - backward_scale_cpu(l.x_norm, l.delta, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates); - - scale_bias(l.delta, l.scales, l.batch, l.out_c, l.out_h*l.out_w); - - mean_delta_cpu(l.delta, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta); - variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta); - normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.out_c, l.out_w*l.out_h, l.delta); - if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1); -} - -void update_batchnorm_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - //int size = l.nweights; - axpy_cpu(l.c, learning_rate / batch, l.bias_updates, 1, l.biases, 1); - scal_cpu(l.c, momentum, l.bias_updates, 1); - - axpy_cpu(l.c, learning_rate / batch, l.scale_updates, 1, l.scales, 1); - scal_cpu(l.c, momentum, l.scale_updates, 1); -} - - - - -#ifdef GPU - -void pull_batchnorm_layer(layer l) -{ - cuda_pull_array(l.biases_gpu, l.biases, l.out_c); - cuda_pull_array(l.scales_gpu, l.scales, l.out_c); - cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.out_c); - cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.out_c); -} -void push_batchnorm_layer(layer l) -{ - cuda_push_array(l.biases_gpu, l.biases, l.out_c); - cuda_push_array(l.scales_gpu, l.scales, l.out_c); - cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.out_c); - cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.out_c); -} - -void forward_batchnorm_layer_gpu(layer l, network_state state) -{ - if (l.type == BATCHNORM) simple_copy_ongpu(l.outputs*l.batch, state.input, l.output_gpu); - //copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - - if (state.net.adversarial) { - normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); - return; - } - - if (state.train) { - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.x_gpu); - - // cbn - if (l.batch_normalize == 2) { - - fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); - - //fast_v_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.v_cbn_gpu); - const int minibatch_index = state.net.current_subdivision + 1; - const int max_minibatch_index = state.net.subdivisions; - //printf("\n minibatch_index = %d, max_minibatch_index = %d \n", minibatch_index, max_minibatch_index); - const float alpha = 0.01; - - int inverse_variance = 0; -#ifdef CUDNN - inverse_variance = 1; -#endif // CUDNN - - fast_v_cbn_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, minibatch_index, max_minibatch_index, l.m_cbn_avg_gpu, l.v_cbn_avg_gpu, l.variance_gpu, - alpha, l.rolling_mean_gpu, l.rolling_variance_gpu, inverse_variance, .00001); - - normalize_scale_bias_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.scales_gpu, l.biases_gpu, l.batch, l.out_c, l.out_h*l.out_w, inverse_variance, .00001f); - -#ifndef CUDNN - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.x_norm_gpu); -#endif // CUDNN - - //printf("\n CBN, minibatch_index = %d \n", minibatch_index); - } - else { -#ifdef CUDNN - float one = 1; - float zero = 0; - cudnnBatchNormalizationForwardTraining(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - l.normDstTensorDesc, - l.x_gpu, // input - l.normDstTensorDesc, - l.output_gpu, // output - l.normTensorDesc, - l.scales_gpu, - l.biases_gpu, - .01, - l.rolling_mean_gpu, // output (should be FP32) - l.rolling_variance_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // output (should be FP32) - l.variance_gpu); // output (should be FP32) - - if (state.net.try_fix_nan) { - fix_nan_and_inf(l.scales_gpu, l.n); - fix_nan_and_inf(l.biases_gpu, l.n); - fix_nan_and_inf(l.mean_gpu, l.n); - fix_nan_and_inf(l.variance_gpu, l.n); - fix_nan_and_inf(l.rolling_mean_gpu, l.n); - fix_nan_and_inf(l.rolling_variance_gpu, l.n); - } - - //simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.x_norm_gpu); -#else // CUDNN - fast_mean_gpu(l.output_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.mean_gpu); - fast_variance_gpu(l.output_gpu, l.mean_gpu, l.batch, l.out_c, l.out_h*l.out_w, l.variance_gpu); - - scal_ongpu(l.out_c, .99, l.rolling_mean_gpu, 1); - axpy_ongpu(l.out_c, .01, l.mean_gpu, 1, l.rolling_mean_gpu, 1); - scal_ongpu(l.out_c, .99, l.rolling_variance_gpu, 1); - axpy_ongpu(l.out_c, .01, l.variance_gpu, 1, l.rolling_variance_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_gpu, 1); - normalize_gpu(l.output_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.x_norm_gpu, 1); - - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); -#endif // CUDNN - } - } - else { - normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); - } - -} - -void backward_batchnorm_layer_gpu(layer l, network_state state) -{ - if (state.net.adversarial) { - inverse_variance_ongpu(l.out_c, l.rolling_variance_gpu, l.variance_gpu, 0.00001); - - scale_bias_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - return; - } - - if (!state.train) { - //l.mean_gpu = l.rolling_mean_gpu; - //l.variance_gpu = l.rolling_variance_gpu; - simple_copy_ongpu(l.out_c, l.rolling_mean_gpu, l.mean_gpu); -#ifdef CUDNN - inverse_variance_ongpu(l.out_c, l.rolling_variance_gpu, l.variance_gpu, 0.00001); -#else - simple_copy_ongpu(l.out_c, l.rolling_variance_gpu, l.variance_gpu); -#endif - } - -#ifdef CUDNN - float one = 1; - float zero = 0; - cudnnBatchNormalizationBackward(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - &one, - &one, - l.normDstTensorDesc, - l.x_gpu, // input - l.normDstTensorDesc, - l.delta_gpu, // input - l.normDstTensorDesc, - l.output_gpu, //l.x_norm_gpu, // output - l.normTensorDesc, - l.scales_gpu, // input (should be FP32) - l.scale_updates_gpu, // output (should be FP32) - l.bias_updates_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // input (should be FP32) - l.variance_gpu); // input (should be FP32) - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.delta_gpu); - //simple_copy_ongpu(l.outputs*l.batch, l.x_norm_gpu, l.delta_gpu); -#else // CUDNN - backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h); - backward_scale_gpu(l.x_norm_gpu, l.delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.scale_updates_gpu); - - scale_bias_gpu(l.delta_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - - fast_mean_delta_gpu(l.delta_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.mean_delta_gpu); - fast_variance_delta_gpu(l.x_gpu, l.delta_gpu, l.mean_gpu, l.variance_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.variance_delta_gpu); - normalize_delta_gpu(l.x_gpu, l.mean_gpu, l.variance_gpu, l.mean_delta_gpu, l.variance_delta_gpu, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); -#endif // CUDNN - if (l.type == BATCHNORM) simple_copy_ongpu(l.outputs*l.batch, l.delta_gpu, state.delta); - //copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1); - - if (state.net.try_fix_nan) { - fix_nan_and_inf(l.scale_updates_gpu, l.n); - fix_nan_and_inf(l.bias_updates_gpu, l.n); - } -} - -void update_batchnorm_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) -{ - float learning_rate = learning_rate_init * l.learning_rate_scale / loss_scale; - //float momentum = a.momentum; - //float decay = a.decay; - //int batch = a.batch; - - axpy_ongpu(l.c, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); - scal_ongpu(l.c, momentum, l.bias_updates_gpu, 1); - - axpy_ongpu(l.c, learning_rate / batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); - scal_ongpu(l.c, momentum, l.scale_updates_gpu, 1); -} - -#endif // GPU diff --git a/src/Detector/darknet/src/batchnorm_layer.h b/src/Detector/darknet/src/batchnorm_layer.h deleted file mode 100644 index afdc54b7d..000000000 --- a/src/Detector/darknet/src/batchnorm_layer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef BATCHNORM_LAYER_H -#define BATCHNORM_LAYER_H - -#include "image.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_batchnorm_layer(int batch, int w, int h, int c, int train); -void forward_batchnorm_layer(layer l, network_state state); -void backward_batchnorm_layer(layer l, network_state state); -void update_batchnorm_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -void resize_batchnorm_layer(layer *l, int w, int h); - -#ifdef GPU -void forward_batchnorm_layer_gpu(layer l, network_state state); -void backward_batchnorm_layer_gpu(layer l, network_state state); -void update_batchnorm_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale); -void pull_batchnorm_layer(layer l); -void push_batchnorm_layer(layer l); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/blas.c b/src/Detector/darknet/src/blas.c deleted file mode 100644 index 6cd1deb06..000000000 --- a/src/Detector/darknet/src/blas.c +++ /dev/null @@ -1,891 +0,0 @@ -#include "blas.h" -#include "utils.h" - -#include -#include -#include -#include -#include -#include -void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride, int forward, float *out) -{ - int b,i,j,k; - int in_c = out_c/(stride*stride); - - //printf("\n out_c = %d, out_w = %d, out_h = %d, stride = %d, forward = %d \n", out_c, out_w, out_h, stride, forward); - //printf(" in_c = %d, in_w = %d, in_h = %d \n", in_c, out_w*stride, out_h*stride); - - for(b = 0; b < batch; ++b){ - for(k = 0; k < out_c; ++k){ - for(j = 0; j < out_h; ++j){ - for(i = 0; i < out_w; ++i){ - int in_index = i + out_w*(j + out_h*(k + out_c*b)); - int c2 = k % in_c; - int offset = k / in_c; - int w2 = i*stride + offset % stride; - int h2 = j*stride + offset / stride; - int out_index = w2 + out_w*stride*(h2 + out_h*stride*(c2 + in_c*b)); - if(forward) out[out_index] = x[in_index]; // used by default for forward (i.e. forward = 0) - else out[in_index] = x[out_index]; - } - } - } - } -} - -void flatten(float *x, int size, int layers, int batch, int forward) -{ - float* swap = (float*)xcalloc(size * layers * batch, sizeof(float)); - int i,c,b; - for(b = 0; b < batch; ++b){ - for(c = 0; c < layers; ++c){ - for(i = 0; i < size; ++i){ - int i1 = b*layers*size + c*size + i; - int i2 = b*layers*size + i*layers + c; - if (forward) swap[i2] = x[i1]; - else swap[i1] = x[i2]; - } - } - } - memcpy(x, swap, size*layers*batch*sizeof(float)); - free(swap); -} - -void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) -{ - int i; - for(i = 0; i < n; ++i){ - c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); - } -} - -void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc) -{ - int i; - for(i = 0; i < n; ++i){ - if(da) da[i] += dc[i] * s[i]; - if(db) db[i] += dc[i] * (1-s[i]); - ds[i] += dc[i] * (a[i] - b[i]); - } -} - -static float relu(float src) { - if (src > 0) return src; - return 0; -} - -void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 - - int id; - #pragma omp parallel for - for (id = 0; id < size; ++id) { - - int src_id = id; - const int src_i = src_id % src_outputs; - src_id /= src_outputs; - int src_b = src_id; - - float sum = 1, max_val = -FLT_MAX; - int i; - if (weights && weights_normalization) { - if (weights_normalization == SOFTMAX_NORMALIZATION) { - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - float w = weights[weights_index]; - if (max_val < w) max_val = w; - } - } - const float eps = 0.0001; - sum = eps; - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float w = weights[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) sum += relu(w); - else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); - } - } - - if (weights) { - float w = weights[src_i / step]; - if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - out[id] = in[id] * w; // [0 or c or (c, h ,w)] - } - else out[id] = in[id]; - - // layers - for (i = 0; i < n; ++i) { - int add_outputs = outputs_of_layers[i]; - if (src_i < add_outputs) { - int add_index = add_outputs*src_b + src_i; - int out_index = id; - - float *add = layers_output[i]; - - if (weights) { - const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] - float w = weights[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - out[out_index] += add[add_index] * w; // [0 or c or (c, h ,w)] - } - else out[out_index] += add[add_index]; - } - } - } -} - -void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, - float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 - - int id; - #pragma omp parallel for - for (id = 0; id < size; ++id) { - int src_id = id; - int src_i = src_id % src_outputs; - src_id /= src_outputs; - int src_b = src_id; - - float grad = 1, sum = 1, max_val = -FLT_MAX;; - int i; - if (weights && weights_normalization) { - if (weights_normalization == SOFTMAX_NORMALIZATION) { - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - float w = weights[weights_index]; - if (max_val < w) max_val = w; - } - } - const float eps = 0.0001; - sum = eps; - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float w = weights[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) sum += relu(w); - else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); - } - - /* - grad = 0; - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float delta_w = delta_in[id] * in[id]; - const float w = weights[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) grad += delta_w * relu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) grad += delta_w * expf(w - max_val) / sum; - } - */ - } - - if (weights) { - float w = weights[src_i / step]; - if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)] - weight_updates[src_i / step] += delta_in[id] * in[id] * grad; - } - else delta_out[id] += delta_in[id]; - - // layers - for (i = 0; i < n; ++i) { - int add_outputs = outputs_of_layers[i]; - if (src_i < add_outputs) { - int add_index = add_outputs*src_b + src_i; - int out_index = id; - - float *layer_delta = layers_delta[i]; - if (weights) { - float *add = layers_output[i]; - - const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] - float w = weights[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) w = relu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - layer_delta[add_index] += delta_in[id] * w; // [0 or c or (c, h ,w)] - weight_updates[weights_index] += delta_in[id] * add[add_index] * grad; - } - else layer_delta[add_index] += delta_in[id]; - } - } - } -} - -void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) -{ - int stride = w1/w2; - int sample = w2/w1; - assert(stride == h1/h2); - assert(sample == h2/h1); - if(stride < 1) stride = 1; - if(sample < 1) sample = 1; - int minw = (w1 < w2) ? w1 : w2; - int minh = (h1 < h2) ? h1 : h2; - int minc = (c1 < c2) ? c1 : c2; - - int i,j,k,b; - for(b = 0; b < batch; ++b){ - for(k = 0; k < minc; ++k){ - for(j = 0; j < minh; ++j){ - for(i = 0; i < minw; ++i){ - int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); - int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); - out[out_index] += add[add_index]; - } - } - } - } -} - -void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) -{ - float scale = 1./(batch * spatial); - int i,j,k; - for(i = 0; i < filters; ++i){ - mean[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - mean[i] += x[index]; - } - } - mean[i] *= scale; - } -} - -void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) -{ - float scale = 1./(batch * spatial - 1); - int i,j,k; - for(i = 0; i < filters; ++i){ - variance[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - variance[i] += pow((x[index] - mean[i]), 2); - } - } - variance[i] *= scale; - } -} - -void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) -{ - int b, f, i; - for(b = 0; b < batch; ++b){ - for(f = 0; f < filters; ++f){ - for(i = 0; i < spatial; ++i){ - int index = b*filters*spatial + f*spatial + i; - x[index] = (x[index] - mean[f])/(sqrt(variance[f] + .00001f)); - } - } - } -} - -void const_cpu(int N, float ALPHA, float *X, int INCX) -{ - int i; - for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; -} - -void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) -{ - int i; - for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; -} - -void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) -{ - int i; - for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); -} - -void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) -{ - int i; - for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; -} - -void scal_cpu(int N, float ALPHA, float *X, int INCX) -{ - int i; - for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; -} - -void scal_add_cpu(int N, float ALPHA, float BETA, float *X, int INCX) -{ - int i; - for (i = 0; i < N; ++i) X[i*INCX] = X[i*INCX] * ALPHA + BETA; -} - -void fill_cpu(int N, float ALPHA, float *X, int INCX) -{ - int i; - if (INCX == 1 && ALPHA == 0) { - memset(X, 0, N * sizeof(float)); - } - else { - for (i = 0; i < N; ++i) X[i*INCX] = ALPHA; - } -} - -void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) -{ - int i, j; - int index = 0; - for(j = 0; j < B; ++j) { - for(i = 0; i < NX; ++i){ - if(X) X[j*NX + i] += OUT[index]; - ++index; - } - for(i = 0; i < NY; ++i){ - if(Y) Y[j*NY + i] += OUT[index]; - ++index; - } - } -} - -void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT) -{ - int i, j; - int index = 0; - for(j = 0; j < B; ++j) { - for(i = 0; i < NX; ++i){ - OUT[index++] = X[j*NX + i]; - } - for(i = 0; i < NY; ++i){ - OUT[index++] = Y[j*NY + i]; - } - } -} - -void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) -{ - int i; - for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; -} - -void mult_add_into_cpu(int N, float *X, float *Y, float *Z) -{ - int i; - for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i]; -} - -void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) -{ - int i; - for(i = 0; i < n; ++i){ - float diff = truth[i] - pred[i]; - float abs_val = fabs(diff); - if(abs_val < 1) { - error[i] = diff * diff; - delta[i] = diff; - } - else { - error[i] = 2*abs_val - 1; - delta[i] = (diff > 0) ? 1 : -1; - } - } -} - -void l1_cpu(int n, float *pred, float *truth, float *delta, float *error) -{ - int i; - for(i = 0; i < n; ++i){ - float diff = truth[i] - pred[i]; - error[i] = fabs(diff); - delta[i] = diff > 0 ? 1 : -1; - } -} - -void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) -{ - int i; - for(i = 0; i < n; ++i){ - float t = truth[i]; - float p = pred[i]; - error[i] = (t) ? -log(p) : 0; - delta[i] = t-p; - } -} - -void logistic_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error) -{ - int i; - for(i = 0; i < n; ++i){ - float t = truth[i]; - float p = pred[i]; - error[i] = -t*log(p) - (1-t)*log(1-p); - delta[i] = t-p; - } -} - -void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) -{ - int i; - for(i = 0; i < n; ++i){ - float diff = truth[i] - pred[i]; - error[i] = diff * diff; - delta[i] = diff; - } -} - -float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) -{ - int i; - float dot = 0; - for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; - return dot; -} - -void softmax(float *input, int n, float temp, float *output, int stride) -{ - int i; - float sum = 0; - float largest = -FLT_MAX; - for(i = 0; i < n; ++i){ - if(input[i*stride] > largest) largest = input[i*stride]; - } - for(i = 0; i < n; ++i){ - float e = exp(input[i*stride]/temp - largest/temp); - sum += e; - output[i*stride] = e; - } - for(i = 0; i < n; ++i){ - output[i*stride] /= sum; - } -} - - -void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) -{ - int g, b; - for(b = 0; b < batch; ++b){ - for(g = 0; g < groups; ++g){ - softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride); - } - } -} - -void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) -{ - int i, j, k, b; - for (b = 0; b < batch; ++b) { - for (k = 0; k < c; ++k) { - for (j = 0; j < h*stride; ++j) { - for (i = 0; i < w*stride; ++i) { - int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride; - int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; - if (forward) out[out_index] = scale*in[in_index]; - else in[in_index] += scale*out[out_index]; - } - } - } - } -} - - -void constrain_cpu(int size, float ALPHA, float *X) -{ - int i; - for (i = 0; i < size; ++i) { - X[i] = fminf(ALPHA, fmaxf(-ALPHA, X[i])); - } -} - -void fix_nan_and_inf_cpu(float *input, size_t size) -{ - int i; - for (i = 0; i < size; ++i) { - float val = input[i]; - if (isnan(val) || isinf(val)) - input[i] = 1.0f / i; // pseudo random value - } -} - -void get_embedding(float *src, int src_w, int src_h, int src_c, int embedding_size, int cur_w, int cur_h, int cur_n, int cur_b, float *dst) -{ - int i; - for (i = 0; i < embedding_size; ++i) { - const int src_index = cur_b*(src_c*src_h*src_w) + cur_n*(embedding_size*src_h*src_w) + i*src_h*src_w + cur_h*(src_w) + cur_w; - - const float val = src[src_index]; - dst[i] = val; - //printf(" val = %f, ", val); - } -} - - -// Euclidean_norm -float math_vector_length(float *A, unsigned int feature_size) -{ - float sum = 0; - int i; - for (i = 0; i < feature_size; ++i) - { - sum += A[i] * A[i]; - } - float vector_length = sqrtf(sum); - return vector_length; -} - -float cosine_similarity(float *A, float *B, unsigned int feature_size) -{ - float mul = 0.0, d_a = 0.0, d_b = 0.0; - - int i; - for(i = 0; i < feature_size; ++i) - { - mul += A[i] * B[i]; - d_a += A[i] * A[i]; - d_b += B[i] * B[i]; - } - float similarity; - float divider = sqrtf(d_a) * sqrtf(d_b); - if (divider > 0) similarity = mul / divider; - else similarity = 0; - - return similarity; -} - -int get_sim_P_index(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) -{ - size_t z; - for (z = 0; z < contrast_p_size; ++z) { - if (contrast_p[z].i == i && contrast_p[z].j == j) break; - } - if (z == contrast_p_size) { - return -1; // not found - } - - return z; // found -} - -int check_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) -{ - size_t z; - for (z = 0; z < contrast_p_size; ++z) { - if (contrast_p[z].i == i && contrast_p[z].j == j) break; - } - if (z == contrast_p_size) { - return 0; // not found - } - - return 1; // found -} - -float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) -{ - size_t z; - for (z = 0; z < contrast_p_size; ++z) { - if (contrast_p[z].i == i && contrast_p[z].j == j) break; - } - if (z == contrast_p_size) { - printf(" Error: find_sim(): sim isn't found: i = %d, j = %d, z = %d \n", i, j, z); - getchar(); - } - - return contrast_p[z].sim; -} - -float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) -{ - size_t z; - for (z = 0; z < contrast_p_size; ++z) { - if (contrast_p[z].i == i && contrast_p[z].j == j) break; - } - if (z == contrast_p_size) { - printf(" Error: find_P_constrastive(): P isn't found: i = %d, j = %d, z = %d \n", i, j, z); - getchar(); - } - - return contrast_p[z].P; -} - -// num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) -{ - const float sim = contrast_p[il].sim; - const size_t i = contrast_p[il].i; - const size_t j = contrast_p[il].j; - - const float numerator = expf(sim / temperature); - - float denominator = 0; - int k; - for (k = 0; k < contrast_p_size; ++k) { - contrastive_params cp = contrast_p[k]; - //if (k != i && labels[k] != labels[i]) { - //if (k != i) { - if (cp.i != i && cp.j == j) { - //const float sim_den = cp.sim; - ////const float sim_den = find_sim(k, l, contrast_p, contrast_p_size); // cosine_similarity(z[k], z[l], feature_size); - //denominator += expf(sim_den / temperature); - denominator += cp.exp_sim; - } - } - - float result = 0.9999; - if (denominator != 0) result = numerator / denominator; - if (result > 1) result = 0.9999; - return result; -} - -// num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) -{ - if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); - getchar(); - } - - const float sim = find_sim(i, l, contrast_p, contrast_p_size); // cosine_similarity(z[i], z[l], feature_size); - const float numerator = expf(sim / temperature); - - float denominator = 0; - int k; - for (k = 0; k < contrast_p_size; ++k) { - contrastive_params cp = contrast_p[k]; - //if (k != i && labels[k] != labels[i]) { - //if (k != i) { - if (cp.i != i && cp.j == l) { - //const float sim_den = cp.sim; - ////const float sim_den = find_sim(k, l, contrast_p, contrast_p_size); // cosine_similarity(z[k], z[l], feature_size); - //denominator += expf(sim_den / temperature); - denominator += cp.exp_sim; - } - } - - float result = 0.9999; - if (denominator != 0) result = numerator / denominator; - if (result > 1) result = 0.9999; - return result; -} - -void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size) -{ - const float vec_len = math_vector_length(z[i], feature_size); - size_t j; - float N = 0; - for (j = 0; j < num_of_samples; ++j) { - if (labels[i] == labels[j] && labels[i] >= 0) N++; - } - if (N == 0 || temperature == 0 || vec_len == 0) { - fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n", - N, temperature, vec_len, labels[i]); - getchar(); - return; - } - const float mult = 1 / ((N - 1) * temperature * vec_len); - - for (j = 0; j < num_of_samples; ++j) { - //if (i != j && (i/2) == (j/2)) { - if (i != j && labels[i] == labels[j] && labels[i] >= 0) { - //printf(" i = %d, j = %d, num_of_samples = %d, labels[i] = %d, labels[j] = %d \n", - // i, j, num_of_samples, labels[i], labels[j]); - const int sim_P_i = get_sim_P_index(i, j, contrast_p, contrast_p_size); - if (sim_P_i < 0) continue; - const float sim = contrast_p[sim_P_i].sim; - const float P = contrast_p[sim_P_i].P; - //if (!check_sim(i, j, contrast_p, contrast_p_size)) continue; - //const float sim = find_sim(i, j, contrast_p, contrast_p_size); //cos_sim[i*num_of_samples + j]; // cosine_similarity(z[i], z[j], feature_size); - //const float P = find_P_constrastive(i, j, contrast_p, contrast_p_size); //p_constrastive[i*num_of_samples + j]; // P_constrastive(i, j, labels, num_of_samples, z, feature_size, temperature, cos_sim); - //const float custom_pos_mult = 1 - sim; - - - int m; - //const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // 1 - for (m = 0; m < feature_size; ++m) { - //const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // my - //const float d = mult*(sim * z[i][m] + sim * z[j][m] - z[j][m]) *(1 - P); // 1+2 - const float d = mult*(sim * z[i][m] - z[j][m]) *(1 - P); // 1 (70%) - //const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // 2 - // printf(" pos: z[j][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[j][m], z[i][m], d, sim); - const int out_i = m * wh; - delta[out_i] -= d; - } - } - } -} - -void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max) -{ - const float vec_len = math_vector_length(z[i], feature_size); - size_t j; - float N = 0; - for (j = 0; j < num_of_samples; ++j) { - if (labels[i] == labels[j] && labels[i] >= 0) N++; - } - if (N == 0 || temperature == 0 || vec_len == 0) { - fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f, labels[i] = %d \n", - N, temperature, vec_len, labels[i]); - getchar(); - return; - } - const float mult = 1 / ((N - 1) * temperature * vec_len); - - int neg_counter = 0; - - for (j = 0; j < num_of_samples; ++j) { - //if (i != j && (i/2) == (j/2)) { - if (labels[i] >= 0 && labels[i] == labels[j] && i != j) { - - size_t k; - for (k = 0; k < num_of_samples; ++k) { - //if (k != i && k != j && labels[k] != labels[i]) { - if (k != i && k != j && labels[k] != labels[i] && class_ids[j] == class_ids[k]) { - neg_counter++; - const int sim_P_i = get_sim_P_index(i, k, contrast_p, contrast_p_size); - if (sim_P_i < 0) continue; - const float sim = contrast_p[sim_P_i].sim; - const float P = contrast_p[sim_P_i].P; - //if (!check_sim(i, k, contrast_p, contrast_p_size)) continue; - //const float sim = find_sim(i, k, contrast_p, contrast_p_size); //cos_sim[i*num_of_samples + k]; // cosine_similarity(z[i], z[k], feature_size); - //const float P = find_P_constrastive(i, k, contrast_p, contrast_p_size); //p_constrastive[i*num_of_samples + k]; // P_constrastive(i, k, labels, num_of_samples, z, feature_size, temperature, cos_sim); - //const float custom_pos_mult = 1 + sim; - - int m; - //const float d = mult*(z[k][m] + sim * z[i][m]) * P; // my1 - for (m = 0; m < feature_size; ++m) { - //const float d = mult*(z[k][m] + sim * z[i][m]) * P; // 1 (70%) - //const float d = mult*(z[k][m] - sim * z[k][m] - sim * z[i][m]) * P; // 1+2 - const float d = mult*(z[k][m] - sim * z[i][m]) * P; // 1 (70%) - //const float d = mult*(z[k][m] - sim * z[k][m]) * P; // 2 - //printf(" neg: z[k][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[k][m], z[i][m], d, sim); - const int out_i = m * wh; - delta[out_i] -= d; - } - - if (neg_counter >= neg_max) return; - } - } - } - } -} - - - -// num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim) -{ - if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); - getchar(); - } - - //const float sim = cos_sim[i*num_of_samples + l]; // cosine_similarity(z[i], z[l], feature_size); - //const float numerator = expf(sim / temperature); - const float numerator = exp_cos_sim[i*num_of_samples + l]; - - float denominator = 0; - int k; - for (k = 0; k < num_of_samples; ++k) { - //if (k != i && labels[k] != labels[i]) { - if (k != i) { - //const float sim_den = cos_sim[k*num_of_samples + l]; // cosine_similarity(z[k], z[l], feature_size); - //denominator += expf(sim_den / temperature); - denominator += exp_cos_sim[k*num_of_samples + l]; - } - } - - float result = numerator / denominator; - return result; -} - -// i - id of the current sample in mini_batch -// labels[num_of_samples] - array with class_id for each sample in the current mini_batch -// z[feature_size][num_of_samples] - array of arrays with contrastive features (output of conv-layer, f.e. 128 floats for each sample) -// delta[feature_size] - array with deltas for backpropagation -// temperature - scalar temperature param (temperature > 0), f.e. temperature = 0.07: Supervised Contrastive Learning -void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) -{ - const float vec_len = math_vector_length(z[i], feature_size); - size_t j; - float N = 0; - for (j = 0; j < num_of_samples; ++j) { - if (labels[i] == labels[j]) N++; - } - if (N == 0 || temperature == 0 || vec_len == 0) { - fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); - getchar(); - } - const float mult = 1 / ((N - 1) * temperature * vec_len); - - for (j = 0; j < num_of_samples; ++j) { - //if (i != j && (i/2) == (j/2)) { - if (i != j && labels[i] == labels[j]) { - //printf(" i = %d, j = %d, num_of_samples = %d, labels[i] = %d, labels[j] = %d \n", - // i, j, num_of_samples, labels[i], labels[j]); - const float sim = cos_sim[i*num_of_samples + j]; // cosine_similarity(z[i], z[j], feature_size); - const float P = p_constrastive[i*num_of_samples + j]; // P_constrastive(i, j, labels, num_of_samples, z, feature_size, temperature, cos_sim); - //const float custom_pos_mult = 1 - sim; - - int m; - for (m = 0; m < feature_size; ++m) { - const float d = mult*(sim * z[i][m] - z[j][m]) * (1 - P); // good - //const float d = mult*(sim * z[j][m] - z[j][m]) * (1 - P); // bad - // printf(" pos: z[j][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[j][m], z[i][m], d, sim); - const int out_i = m * wh; - delta[out_i] -= d; - } - } - } -} - -// i - id of the current sample in mini_batch -// labels[num_of_samples] - array with class_id for each sample in the current mini_batch -// z[feature_size][num_of_samples] - array of arrays with contrastive features (output of conv-layer, f.e. 128 floats for each sample) -// delta[feature_size] - array with deltas for backpropagation -// temperature - scalar temperature param (temperature > 0), f.e. temperature = 0.07: Supervised Contrastive Learning -void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) -{ - const float vec_len = math_vector_length(z[i], feature_size); - size_t j; - float N = 0; - for (j = 0; j < num_of_samples; ++j) { - if (labels[i] == labels[j]) N++; - } - if (N == 0 || temperature == 0 || vec_len == 0) { - fprintf(stderr, " Error: N == 0 || temperature == 0 || vec_len == 0. N=%f, temperature=%f, vec_len=%f \n", N, temperature, vec_len); - getchar(); - } - const float mult = 1 / ((N - 1) * temperature * vec_len); - - for (j = 0; j < num_of_samples; ++j) { - //if (i != j && (i/2) == (j/2)) { - if (i != j && labels[i] == labels[j]) { - - size_t k; - for (k = 0; k < num_of_samples; ++k) { - //if (k != i && k != j && labels[k] != labels[i]) { - if (k != i && k != j && labels[k] >= 0) { - const float sim = cos_sim[i*num_of_samples + k]; // cosine_similarity(z[i], z[k], feature_size); - const float P = p_constrastive[i*num_of_samples + k]; // P_constrastive(i, k, labels, num_of_samples, z, feature_size, temperature, cos_sim); - //const float custom_pos_mult = 1 + sim; - - int m; - for (m = 0; m < feature_size; ++m) { - const float d = mult*(z[k][m] - sim * z[i][m]) * P; // good - //const float d = mult*(z[k][m] - sim * z[k][m]) * P; // bad - //printf(" neg: z[k][m] = %f, z[i][m] = %f, d = %f, sim = %f \n", z[k][m], z[i][m], d, sim); - const int out_i = m * wh; - delta[out_i] -= d; - } - } - } - } - } -} diff --git a/src/Detector/darknet/src/blas.h b/src/Detector/darknet/src/blas.h deleted file mode 100644 index eddca47b4..000000000 --- a/src/Detector/darknet/src/blas.h +++ /dev/null @@ -1,181 +0,0 @@ -#ifndef BLAS_H -#define BLAS_H -#include -#include "darknet.h" - -#ifdef GPU -#include "dark_cuda.h" -#include "tree.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif -void flatten(float *x, int size, int layers, int batch, int forward); -void pm(int M, int N, float *A); -float *random_matrix(int rows, int cols); -void time_random_matrix(int TA, int TB, int m, int k, int n); -void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); - -void test_blas(); - -void const_cpu(int N, float ALPHA, float *X, int INCX); -void constrain_ongpu(int N, float ALPHA, float * X, int INCX); -void constrain_min_max_ongpu(int N, float MIN, float MAX, float * X, int INCX); -void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); -void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); - -void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); -void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); -void scal_cpu(int N, float ALPHA, float *X, int INCX); -void scal_add_cpu(int N, float ALPHA, float BETA, float *X, int INCX); -void fill_cpu(int N, float ALPHA, float * X, int INCX); -float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); -void test_gpu_blas(); -void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); -void shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, float **layers_output, float *out, float *in, float *weights, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization); -void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int n, int *outputs_of_layers, - float **layers_delta, float *delta_out, float *delta_in, float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization); - -void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); -void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); -void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); - -void add_bias(float *output, float *biases, int batch, int n, int size); -void scale_bias(float *output, float *scales, int batch, int n, int size); -void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); -void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); -void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); -void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); - -void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); -void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); -void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); - -void softmax(float *input, int n, float temp, float *output, int stride); -void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); -void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); -void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); -void constrain_cpu(int size, float ALPHA, float *X); -void fix_nan_and_inf_cpu(float *input, size_t size); - - -int check_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); -float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); -void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size); -void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max); - -void get_embedding(float *src, int src_w, int src_h, int src_c, int embedding_size, int cur_w, int cur_h, int cur_n, int cur_b, float *dst); -float math_vector_length(float *A, unsigned int feature_size); -float cosine_similarity(float *A, float *B, unsigned int feature_size); -float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim); -void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); -void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); - - -#ifdef GPU - -void constrain_weight_updates_ongpu(int N, float coef, float *weights_gpu, float *weight_updates_gpu); -void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); -void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); -void simple_copy_ongpu(int size, float *src, float *dst); -void memcpy_ongpu(void *dst, void *src, int size_bytes); -void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); -void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); -void scal_ongpu(int N, float ALPHA, float * X, int INCX); -void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX); -void supp_ongpu(int N, float ALPHA, float * X, int INCX); -void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val); -void mask_ongpu(int N, float * X, float mask_num, float * mask); -void const_ongpu(int N, float ALPHA, float *X, int INCX); -void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); -void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY); -void fill_ongpu(int N, float ALPHA, float * X, int INCX); -void gradient_centralization_gpu(int w, int h, int c, int f, float *in); - -void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); -void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); -void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); - -void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); - -void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); -void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); - -void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); -void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); -void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, - const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon); -void inverse_variance_ongpu(int size, float *src, float *dst, float epsilon); -void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon); -void compare_2_arrays_gpu(float *one, float *two, int size); -void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); -void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization); -void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_delta_gpu, float *delta_out, float *delta_in, - float *weights, float *weight_updates, int nweights, float *in, float **layers_output, WEIGHTS_NORMALIZATION_T weights_normalization); -void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); -void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); -void mean_array_gpu(float *src, int size, float alpha, float *avg); -void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); -void add_bias_gpu(float *output, float *biases, int batch, int n, int size); -void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); - -void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error); -void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); -void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); -void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); -void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); -void mult_add_into_gpu(int num, float *a, float *b, float *c); - -void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); - -void softmax_gpu_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); -void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output); -void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); -void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); - -void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out); - -void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); - -void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier); - -void fix_nan_and_inf(float *input, size_t size); -void reset_nan_and_inf(float *input, size_t size); -int is_nan_or_inf(float *input, size_t size); - -void add_3_arrays_activate(float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst); -void sum_of_mults(float *a1, float *a2, float *b1, float *b2, size_t size, float *dst); -void activate_and_mult(float *a1, float *a2, size_t size, ACTIVATION a, float *dst); - -void scale_channels_gpu(float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out); -void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta); - - -void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_size, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta); - -void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out); - -void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse); -void stretch_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, float scale, int reverse); -void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse); -void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse); -void rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int reverse); -void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups); -void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups); -void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps); -void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); -void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type); - -#endif // GPU -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/blas_kernels.cu b/src/Detector/darknet/src/blas_kernels.cu deleted file mode 100644 index b8f7aed55..000000000 --- a/src/Detector/darknet/src/blas_kernels.cu +++ /dev/null @@ -1,2432 +0,0 @@ -#include -#include -#include -#include -#include - -#include "blas.h" -#include "dark_cuda.h" -#include "utils.h" -#include "tree.h" - -__inline__ __device__ -float warpAllReduceSum(float val) { - for (int mask = WARP_SIZE / 2; mask > 0; mask /= 2) -#if CUDART_VERSION >= 9000 - val += __shfl_xor_sync(0xffffffff, val, mask); -#else - val += __shfl_xor(val, mask); -#endif - return val; -} - -__global__ void compare_2_arrays_kernel(float *one, float *two, int size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= size) return; - - const float diff = 100 * fabs(one[index] - two[index]) / fabs(one[index]); - - if (diff > 10) printf(" i: %d - one = %f, two = %f, diff = %f %% \n", index, one[index], two[index], diff); -} - -void compare_2_arrays_gpu(float *one, float *two, int size) -{ - const int num_blocks = get_number_of_blocks(size, BLOCK); - - compare_2_arrays_kernel << > >(one, two, size); - CHECK_CUDA(cudaPeekAtLastError()); - CHECK_CUDA(cudaDeviceSynchronize()); -} - -__global__ void mean_array_kernel(float *src, int size, float alpha, float *avg) -{ - const int i = blockIdx.x*blockDim.x + threadIdx.x; - if (i >= size) return; - - avg[i] = avg[i] * (1 - alpha) + src[i] * alpha; - src[i] = avg[i]; -} - - -void mean_array_gpu(float *src, int size, float alpha, float *avg) -{ - const int num_blocks = get_number_of_blocks(size, BLOCK); - - mean_array_kernel << > >(src, size, alpha, avg); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void scale_bias_kernel(float *output, float *scale, int batch, int filters, int spatial, int current_size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= current_size) return; - - int f = (index / spatial) % filters; - output[index] *= scale[f]; -} - -void scale_bias_gpu(float *output, float *scale, int batch, int filters, int spatial) -{ - const int current_size = batch * filters * spatial; - const int num_blocks = get_number_of_blocks(current_size, BLOCK); - - scale_bias_kernel << > >(output, scale, batch, filters, spatial, current_size); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void backward_scale_kernel(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) -{ - __shared__ float part[BLOCK]; - int i,b; - int filter = blockIdx.x; - int p = threadIdx.x; - float sum = 0; - for(b = 0; b < batch; ++b){ - for(i = 0; i < size; i += BLOCK){ - int index = p + i + size*(filter + n*b); - sum += (p+i < size) ? delta[index]*x_norm[index] : 0; - } - } - part[p] = sum; - __syncthreads(); - if (p == 0) { - for(i = 0; i < BLOCK; ++i) scale_updates[filter] += part[i]; - } -} - -void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates) -{ - backward_scale_kernel<<>>(x_norm, delta, batch, n, size, scale_updates); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void add_bias_kernel(float *output, float *biases, int batch, int filters, int spatial, int current_size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= current_size) return; - - int f = (index / spatial) % filters; - output[index] += biases[f]; -} - -void add_bias_gpu(float *output, float *biases, int batch, int filters, int spatial) -{ - const int current_size = batch * filters * spatial; - const int num_blocks = get_number_of_blocks(current_size, BLOCK); - - add_bias_kernel << > >(output, biases, batch, filters, spatial, current_size); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) -{ - __shared__ float part[BLOCK]; - int i,b; - int filter = blockIdx.x; - int p = threadIdx.x; - float sum = 0; - for(b = 0; b < batch; ++b){ - for(i = 0; i < size; i += BLOCK){ - int index = p + i + size*(filter + n*b); - sum += (p+i < size) ? delta[index] : 0; - } - } - part[p] = sum; - __syncthreads(); - if (p == 0) { - for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; - } -} - -/* -__global__ void dot_kernel(float *output, float scale, int batch, int n, int size, float *delta) -{ - int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - int f1 = index / n; - int f2 = index % n; - if (f2 <= f1) return; - - float sum = 0; - float norm1 = 0; - float norm2 = 0; - int b, i; - for(b = 0; b < batch; ++b){ - for(i = 0; i < size; ++i){ - int i1 = b * size * n + f1 * size + i; - int i2 = b * size * n + f2 * size + i; - sum += output[i1] * output[i2]; - norm1 += output[i1] * output[i1]; - norm2 += output[i2] * output[i2]; - } - } - norm1 = sqrt(norm1); - norm2 = sqrt(norm2); - float norm = norm1 * norm2; - sum = sum / norm; - for(b = 0; b < batch; ++b){ - for(i = 0; i < size; ++i){ - int i1 = b * size * n + f1 * size + i; - int i2 = b * size * n + f2 * size + i; - delta[i1] += - scale * sum * output[i2] / norm; - delta[i2] += - scale * sum * output[i1] / norm; - } - } -} - -void dot_error_gpu(layer l) -{ - dot_kernel<<>>(l.output_gpu, l.dot, l.batch, l.n, l.out_w * l.out_h, l.delta_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} -*/ - -void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) -{ - backward_bias_kernel<<>>(bias_updates, delta, batch, n, size); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void adam_kernel(int N, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) -{ - int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (index >= N) return; - - float mhat = m[index] / (1.f - powf(B1, t)); - float vhat = v[index] / (1.f - powf(B2, t)); - - x[index] = x[index] + rate * mhat / (sqrtf(vhat) + eps); -} - -extern "C" void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t) -{ - adam_kernel << > >(n, x, m, v, B1, B2, rate, eps, t); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t) -{ - scal_ongpu(n, B1, m, 1); - scal_ongpu(n, B2, v, 1); - axpy_ongpu(n, -decay*batch, w, 1, d, 1); - - axpy_ongpu(n, (1 - B1), d, 1, m, 1); - mul_ongpu(n, d, 1, d, 1); - axpy_ongpu(n, (1 - B2), d, 1, v, 1); - - adam_gpu(n, w, m, v, B1, B2, rate, eps, t); - fill_ongpu(n, 0, d, 1); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void normalize_kernel(int N, float *x, float *mean, float *variance, int batch, int filters, int spatial) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= N) return; - int f = (index / spatial) % filters; - - x[index] = (x[index] - mean[f]) / (sqrtf(variance[f] + .00001f)); -} - -extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) -{ - const int current_size = batch * filters * spatial; - const int num_blocks = get_number_of_blocks(current_size, BLOCK); - - normalize_kernel << > >(current_size, x, mean, variance, batch, filters, spatial); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void normalize_delta_kernel(int N, float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) -{ - int index = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (index >= N) return; - int f = (index/spatial)%filters; - - delta[index] = delta[index] * 1.F/(sqrtf(variance[f]) + .000001f) + variance_delta[f] * 2. * (x[index] - mean[f]) / (spatial * batch) + mean_delta[f]/(spatial*batch); -} - -extern "C" void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta) -{ - size_t N = batch*filters*spatial; - normalize_delta_kernel<<>>(N, x, mean, variance, mean_delta, variance_delta, batch, filters, spatial, delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= filters) return; - int j,k; - variance_delta[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - variance_delta[i] += delta[index]*(x[index] - mean[i]); - } - } - variance_delta[i] *= -.5 * powf(variance[i] + .000001f, (float)(-3./2.)); -} - -__global__ void accumulate_kernel(float *x, int n, int groups, float *sum) -{ - int k; - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= groups) return; - sum[i] = 0; - for(k = 0; k < n; ++k){ - sum[i] += x[k*groups + i]; - } -} - -__global__ void fast_mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) -{ - const int threads = BLOCK; - __shared__ float local[threads]; - - int id = threadIdx.x; - local[id] = 0; - - int filter = blockIdx.x; - - int i, j; - for(j = 0; j < batch; ++j){ - for(i = 0; i < spatial; i += threads){ - int index = j*spatial*filters + filter*spatial + i + id; - local[id] += (i+id < spatial) ? delta[index] : 0; - } - } - __syncthreads(); - - if(id == 0){ - mean_delta[filter] = 0; - for(i = 0; i < threads; ++i){ - mean_delta[filter] += local[i]; - } - mean_delta[filter] *= (-1.F/sqrtf(variance[filter] + .000001f)); - } -} - -__global__ void fast_variance_delta_kernel(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) -{ - const int threads = BLOCK; - __shared__ float local[threads]; - - int id = threadIdx.x; - local[id] = 0; - - int filter = blockIdx.x; - - int i, j; - for(j = 0; j < batch; ++j){ - for(i = 0; i < spatial; i += threads){ - int index = j*spatial*filters + filter*spatial + i + id; - - local[id] += (i+id < spatial) ? delta[index]*(x[index] - mean[filter]) : 0; - } - } - __syncthreads(); - - if(id == 0){ - variance_delta[filter] = 0; - for(i = 0; i < threads; ++i){ - variance_delta[filter] += local[i]; - } - variance_delta[filter] *= -.5 * powf(variance[filter] + .000001f, (float)(-3./2.)); - } -} - - -__global__ void mean_delta_kernel(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= filters) return; - int j,k; - mean_delta[i] = 0; - for (j = 0; j < batch; ++j) { - for (k = 0; k < spatial; ++k) { - int index = j*filters*spatial + i*spatial + k; - mean_delta[i] += delta[index]; - } - } - mean_delta[i] *= (-1.F/sqrtf(variance[i] + .000001f)); -} - -extern "C" void mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) -{ - mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta) -{ - fast_mean_delta_kernel<<>>(delta, variance, batch, filters, spatial, mean_delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta) -{ - fast_variance_delta_kernel<<>>(x, delta, mean, variance, batch, filters, spatial, variance_delta); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void mean_kernel(float *x, int batch, int filters, int spatial, float *mean) -{ - float scale = 1.F/(batch * spatial); - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= filters) return; - int j,k; - mean[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - mean[i] += x[index]; - } - } - mean[i] *= scale; -} - -__global__ void variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) -{ - float scale = 1.F/(batch * spatial - 1); - int j,k; - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= filters) return; - variance[i] = 0; - for(j = 0; j < batch; ++j){ - for(k = 0; k < spatial; ++k){ - int index = j*filters*spatial + i*spatial + k; - variance[i] += powf((x[index] - mean[i]), 2); - } - } - variance[i] *= scale; -} - -__global__ void reorg_kernel(int N, float *x, int w, int h, int c, int batch, int stride, int forward, float *out) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i >= N) return; - int in_index = i; - int in_w = i%w; - i = i/w; - int in_h = i%h; - i = i/h; - int in_c = i%c; - i = i/c; - int b = i%batch; - - int out_c = c/(stride*stride); - - int c2 = in_c % out_c; - int offset = in_c / out_c; - int w2 = in_w*stride + offset % stride; - int h2 = in_h*stride + offset / stride; - //printf("%d\n", offset); - int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); - - // printf("%d %d %d\n", w2, h2, c2); - //printf("%d %d\n", in_index, out_index); - //if(out_index >= N || out_index < 0) printf("bad bad bad \n"); - - if(forward) out[out_index] = x[in_index]; - else out[in_index] = x[out_index]; - //if(forward) out[1] = x[1]; - //else out[0] = x[0]; -} - -__global__ void constrain_weight_updates_kernel(int N, float coef, float *weights_gpu, float *weight_updates_gpu) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < N) { - const float w = weights_gpu[i]; - const float wu = weight_updates_gpu[i]; - const float wu_sign = (wu == 0) ? 0 : (fabs(wu) / wu); - const float abs_limit = fabs(w * coef); - if (fabs(wu) > abs_limit) weight_updates_gpu[i] = abs_limit * wu_sign; - } -} - -extern "C" void constrain_weight_updates_ongpu(int N, float coef, float *weights_gpu, float *weight_updates_gpu) -{ - constrain_weight_updates_kernel << > >(N, coef, weights_gpu, weight_updates_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX]; -} - -__global__ void pow_kernel(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) Y[i*INCY] = powf(X[i*INCX], ALPHA); -} - -__global__ void const_kernel(int N, float ALPHA, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) X[i*INCX] = ALPHA; -} - -__global__ void constrain_kernel(int N, float ALPHA, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) X[i*INCX] = fminf(ALPHA, fmaxf(-ALPHA, X[i*INCX])); -} -__global__ void constrain_min_max_kernel(int N, float MIN, float MAX, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < N) X[i*INCX] = fminf(MAX, fmaxf(MIN, X[i*INCX])); -} - -__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) { - if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0; - } -} - -__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) X[i*INCX] *= ALPHA; -} - -__global__ void scal_add_kernel(int N, float ALPHA, float BETA, float *X, int INCX) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < N) X[i*INCX] = X[i*INCX] * ALPHA + BETA; -} - -__global__ void fill_kernel(int N, float ALPHA, float *X, int INCX) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= N) return; - X[index*INCX] = ALPHA; -} - -__global__ void mask_kernel_new_api(int n, float *x, float mask_num, float *mask, float val) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n && mask[i] == mask_num) x[i] = val; -} - -__global__ void mask_kernel(int n, float *x, float mask_num, float *mask) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n && mask[i] == mask_num) x[i] = mask_num; -} - -__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) Y[i*INCY + OFFY] = X[i*INCX + OFFX]; -} - -__global__ void simple_copy_kernel(int size, float *src, float *dst) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) - dst[index] = src[index]; -} - -__global__ void mul_kernel(int N, float *X, int INCX, float *Y, int INCY) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < N) Y[i*INCY] *= X[i*INCX]; -} - - -__global__ void fast_mean_kernel(float *x, int batch, int filters, int spatial, float *mean) -{ - const int threads = BLOCK; - __shared__ float local[threads]; - - int id = threadIdx.x; - local[id] = 0; - - int filter = blockIdx.x; - - int i, j; - for(j = 0; j < batch; ++j){ - for(i = 0; i < spatial; i += threads){ - int index = j*spatial*filters + filter*spatial + i + id; - local[id] += (i+id < spatial) ? x[index] : 0; - } - } - __syncthreads(); - - if(id == 0){ - float mean_tmp = 0; - for(i = 0; i < threads; ++i){ - mean_tmp += local[i]; - } - mean_tmp /= spatial * batch; - mean[filter] = mean_tmp; - } -} - -extern "C" void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean) -{ - fast_mean_kernel << > >(x, batch, filters, spatial, mean); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void fast_variance_kernel(float *x, float *mean, int batch, int filters, int spatial, float *variance) -{ - const int threads = BLOCK; - __shared__ float local[threads]; - - int id = threadIdx.x; - local[id] = 0; - - int filter = blockIdx.x; - - int i, j; - for(j = 0; j < batch; ++j){ - for(i = 0; i < spatial; i += threads){ - int index = j*spatial*filters + filter*spatial + i + id; - - local[id] += (i+id < spatial) ? powf((x[index] - mean[filter]), 2) : 0; - } - } - __syncthreads(); - - if(id == 0){ - float variance_tmp = 0; - for(i = 0; i < threads; ++i){ - variance_tmp += local[i]; - } - variance_tmp /= (spatial * batch);// -1); - variance[filter] = variance_tmp; - } -} - -extern "C" void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) -{ - fast_variance_kernel<<>>(x, mean, batch, filters, spatial, variance); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void fast_v_cbn_kernel(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, - const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon) -{ - const int threads = BLOCK; - __shared__ float local[threads]; - - int id = threadIdx.x; - local[id] = 0; - - int filter = blockIdx.x; - - int i, j; - for (j = 0; j < batch; ++j) { - for (i = 0; i < spatial; i += threads) { - int index = j*spatial*filters + filter*spatial + i + id; - - local[id] += (i + id < spatial) ? powf(x[index], 2) : 0; - } - } - __syncthreads(); - - if (id == 0) { - float v_tmp = 0; - v_tmp = 0; - for (i = 0; i < threads; ++i) { - v_tmp += local[i]; - } - v_tmp /= (spatial * batch - 1); - - v_tmp = fmax(v_tmp, powf(mean[filter], 2)); - - - const float alpha_cbn = 1.0f / minibatch_index; - - m_avg[filter] = alpha_cbn * mean[filter] + (1 - alpha_cbn) * m_avg[filter]; - mean[filter] = m_avg[filter]; - - v_avg[filter] = alpha_cbn * v_tmp + (1 - alpha_cbn) * v_avg[filter]; - - float variance_tmp = fmax(0.0f, v_avg[filter] - powf(m_avg[filter], 2)); - if (inverse_variance) variance[filter] = 1.0f / sqrtf(variance_tmp + epsilon); - else variance[filter] = variance_tmp; - - //if (max_minibatch_index == minibatch_index) - { - if(rolling_mean_gpu) rolling_mean_gpu[filter] = alpha * mean[filter] + (1 - alpha) * rolling_mean_gpu[filter]; - - if(rolling_variance_gpu) rolling_variance_gpu[filter] = alpha * variance_tmp + (1 - alpha) * rolling_variance_gpu[filter]; - } - } -} - -extern "C" void fast_v_cbn_gpu(const float *x, float *mean, int batch, int filters, int spatial, int minibatch_index, int max_minibatch_index, float *m_avg, float *v_avg, float *variance, - const float alpha, float *rolling_mean_gpu, float *rolling_variance_gpu, int inverse_variance, float epsilon) -{ - fast_v_cbn_kernel << > >(x, mean, batch, filters, spatial, minibatch_index, max_minibatch_index, m_avg, v_avg, variance, alpha, rolling_mean_gpu, rolling_variance_gpu, inverse_variance, epsilon); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void inverse_variance_kernel(int size, float *src, float *dst, float epsilon) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) - dst[index] = 1.0f / sqrtf(src[index] + epsilon); -} - -extern "C" void inverse_variance_ongpu(int size, float *src, float *dst, float epsilon) -{ - const int num_blocks = size / BLOCK + 1; - inverse_variance_kernel << > >(size, src, dst, epsilon); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void normalize_scale_bias_kernel(int N, float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= N) return; - int f = (index / spatial) % filters; - - float val = 0; - if(inverse_variance) val = (x[index] - mean[f]) * variance[f]; - else val = (x[index] - mean[f]) / (sqrtf(variance[f] + epsilon)); - val *= scales[f]; - val += biases[f]; - - if (!isnan(val) && !isinf(val)) - x[index] = val; -} - -extern "C" void normalize_scale_bias_gpu(float *x, float *mean, float *variance, float *scales, float *biases, int batch, int filters, int spatial, int inverse_variance, float epsilon) -{ - const int current_size = batch * filters * spatial; - const int num_blocks = get_number_of_blocks(current_size, BLOCK); - - normalize_scale_bias_kernel << > >(current_size, x, mean, variance, scales, biases, batch, filters, spatial, inverse_variance, epsilon); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void mean_gpu(float *x, int batch, int filters, int spatial, float *mean) -{ - mean_kernel<<>>(x, batch, filters, spatial, mean); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) -{ - variance_kernel<<>>(x, mean, batch, filters, spatial, variance); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) -{ - axpy_ongpu_offset(N, ALPHA, X, 0, INCX, Y, 0, INCY); -} - -extern "C" void pow_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY) -{ - pow_kernel<<>>(N, ALPHA, X, INCX, Y, INCY); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) -{ - axpy_kernel<<>>(N, ALPHA, X, OFFX, INCX, Y, OFFY, INCY); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY) -{ - copy_ongpu_offset(N, X, 0, INCX, Y, 0, INCY); -} - -extern "C" void simple_copy_ongpu(int size, float *src, float *dst) -{ - const int num_blocks = size / BLOCK + 1; - simple_copy_kernel << > >(size, src, dst); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void memcpy_ongpu(void *dst, void *src, int size_bytes) -{ - CHECK_CUDA(cudaMemcpyAsync(dst, src, size_bytes, cudaMemcpyDefault, get_cuda_stream())); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void mul_ongpu(int N, float * X, int INCX, float * Y, int INCY) -{ - mul_kernel<<>>(N, X, INCX, Y, INCY); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY) -{ - copy_kernel<<>>(N, X, OFFX, INCX, Y, OFFY, INCY); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void flatten_kernel(int N, float *x, int spatial, int layers, int batch, int forward, float *out) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i >= N) return; - int in_s = i%spatial; - i = i/spatial; - int in_c = i%layers; - i = i/layers; - int b = i; - - int i1 = b*layers*spatial + in_c*spatial + in_s; - int i2 = b*layers*spatial + in_s*layers + in_c; - - if (forward) out[i2] = x[i1]; - else out[i1] = x[i2]; -} - -extern "C" void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out) -{ - int size = spatial*batch*layers; - flatten_kernel<<>>(size, x, spatial, layers, batch, forward, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) -{ - int size = w*h*c*batch; - reorg_kernel<<>>(size, x, w, h, c, batch, stride, forward, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void mask_gpu_new_api(int N, float * X, float mask_num, float * mask, float val) -{ - mask_kernel_new_api <<>>(N, X, mask_num, mask, val); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void mask_ongpu(int N, float * X, float mask_num, float * mask) -{ - mask_kernel<<>>(N, X, mask_num, mask); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void const_ongpu(int N, float ALPHA, float * X, int INCX) -{ - const_kernel<<>>(N, ALPHA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void constrain_ongpu(int N, float ALPHA, float * X, int INCX) -{ - constrain_kernel<<>>(N, ALPHA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void constrain_min_max_ongpu(int N, float MIN, float MAX, float * X, int INCX) -{ - constrain_min_max_kernel << > >(N, MIN, MAX, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -extern "C" void scal_ongpu(int N, float ALPHA, float * X, int INCX) -{ - scal_kernel<<>>(N, ALPHA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void scal_add_ongpu(int N, float ALPHA, float BETA, float * X, int INCX) -{ - scal_add_kernel << > >(N, ALPHA, BETA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX) -{ - supp_kernel<<>>(N, ALPHA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX) -{ - //fill_kernel<<>>(N, ALPHA, X, INCX); - //CHECK_CUDA(cudaPeekAtLastError()); - fill_kernel << > >(N, ALPHA, X, INCX); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void gradient_centralization_kernel(int filters, int f_size, float *in) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int tid = index % WARP_SIZE; - const int f = index / WARP_SIZE; - - if (f >= filters) return; - - float mean = 0; - for (int i = 0; i < f_size; i += WARP_SIZE) { - mean += warpAllReduceSum(in[f*f_size + i + tid]); - } - mean = mean / f_size; - for (int i = 0; i < f_size; i += WARP_SIZE) { - in[f*f_size + i + tid] -= mean; - } - -} - -extern "C" void gradient_centralization_gpu(int w, int h, int c, int f, float *in) -{ - const int size = f * WARP_SIZE; - const int f_size = c * h * w; - if (f_size % WARP_SIZE == 0) { - - gradient_centralization_kernel << > > (f, f_size, in); - CHECK_CUDA(cudaPeekAtLastError()); - } -} - -__device__ float relu(float src) { - if (src > 0) return src; - return 0; -} - -__device__ float lrelu(float src) { - const float eps = 0.001; - if (src > eps) return src; - return eps; -} - -__device__ float grad_relu(float src) { - return (src > 0); -} - -__device__ float grad_lrelu(float src) { - const float eps = 0.001; - return (src > eps); -} - -__global__ void shortcut_singlelayer_simple_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - - int src_id = id; - const int src_i = src_id % src_outputs; - src_id /= src_outputs; - int src_b = src_id; - - float out_val = in[id]; - - int add_outputs = outputs_of_layers_gpu[0]; - if (src_i < add_outputs) { - int add_index = add_outputs*src_b + src_i; - - float *add = layers_output_gpu[0]; - out_val += add[add_index]; - } - out[id] = out_val; -} - -__global__ void shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - - // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 - - int src_id = id; - const int src_i = src_id % src_outputs; - src_id /= src_outputs; - int src_b = src_id; - - float sum = 1, max_val = -FLT_MAX; - if (weights_gpu && weights_normalization) { - if (weights_normalization == SOFTMAX_NORMALIZATION) { - for (int i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float w = weights_gpu[weights_index]; - if (max_val < w) max_val = w; - } - } - const float eps = 0.0001; - sum = eps; - for (int i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float w = weights_gpu[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) sum += lrelu(w); - else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); - } - } - - float out_val = 0; - - if (weights_gpu) { - float w = weights_gpu[src_i / step]; - if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - out_val = in[id] * w; // [0 or c or (c, h ,w)] - } - else out_val = in[id]; - - // layers - for (int i = 0; i < n; ++i) { - int add_outputs = outputs_of_layers_gpu[i]; - if (src_i < add_outputs) { - int add_index = add_outputs*src_b + src_i; - - float *add = layers_output_gpu[i]; - - if (weights_gpu) { - const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] - float w = weights_gpu[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - out_val += add[add_index] * w; // [0 or c or (c, h ,w)] - } - else out_val += add[add_index]; - } - } - out[id] = out_val; -} - -extern "C" void shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, float **layers_output_gpu, float *out, float *in, float *weights_gpu, int nweights, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); - int size = batch * src_outputs; - if (nweights == 0 && n == 1) { - shortcut_singlelayer_simple_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); - } - else { - shortcut_multilayer_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, layers_output_gpu, out, in, weights_gpu, nweights, weights_normalization); - } - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void backward_shortcut_multilayer_kernel(int size, int src_outputs, int batch, int n, int *outputs_of_layers_gpu, - float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - const int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - - // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 - - int src_id = id; - const int src_i = src_id % src_outputs; - src_id /= src_outputs; - int src_b = src_id; - - float grad = 1, sum = 1, max_val = -FLT_MAX; - int i; - if (weights_gpu && weights_normalization) { - if (weights_normalization == SOFTMAX_NORMALIZATION) { - for (int i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - float w = weights_gpu[weights_index]; - if (max_val < w) max_val = w; - } - } - const float eps = 0.0001; - sum = eps; - for (i = 0; i < (n + 1); ++i) { - const int weights_index = src_i / step + i*layer_step; // [0 or c or (c, h ,w)] - const float w = weights_gpu[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) sum += lrelu(w); - else if (weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); - } - - } - - if (weights_gpu) { - float w = weights_gpu[src_i / step]; - if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - if (weights_normalization == RELU_NORMALIZATION) grad = w; - else if (weights_normalization == SOFTMAX_NORMALIZATION) grad = w*(1-w); - - delta_out[id] += delta_in[id] * w; // [0 or c or (c, h ,w)] - float weights_update_tmp = delta_in[id] * in[id] * grad;// / step; - - if (layer_step == 1 && (size/32) > (id/32 + 1)) { - if (isnan(weights_update_tmp) || isinf(weights_update_tmp)) { - weights_update_tmp = 0; - } - float wu = warpAllReduceSum(weights_update_tmp); - if (threadIdx.x % 32 == 0) { - if (!isnan(wu) && !isinf(wu)) - atomicAdd(&weight_updates_gpu[src_i / step], wu); - } - } - else { - if (!isnan(weights_update_tmp) && !isinf(weights_update_tmp)) - atomicAdd(&weight_updates_gpu[src_i / step], weights_update_tmp); - //weight_updates_gpu[src_i / step] += weights_update_tmp; - } - } - else delta_out[id] += delta_in[id]; - - // layers - for (int i = 0; i < n; ++i) { - int add_outputs = outputs_of_layers_gpu[i]; - if (src_i < add_outputs) { - int add_index = add_outputs*src_b + src_i; - int out_index = id; - - float *layer_delta = layers_delta_gpu[i]; - if (weights_gpu) { - float *add = layers_output_gpu[i]; - - const int weights_index = src_i / step + (i + 1)*layer_step; // [0 or c or (c, h ,w)] - float w = weights_gpu[weights_index]; - if (weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum; - else if (weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - - if (weights_normalization == RELU_NORMALIZATION) grad = w; - else if (weights_normalization == SOFTMAX_NORMALIZATION) grad = w*(1 - w); - - layer_delta[add_index] += delta_in[id] * w; - float weights_update_tmp = delta_in[id] * add[add_index] * grad;// / step; - - if (layer_step == 1 && (size / 32) > (id / 32 + 1)) { - if (isnan(weights_update_tmp) || isinf(weights_update_tmp)) { - weights_update_tmp = 0; - } - float wu = warpAllReduceSum(weights_update_tmp); - if (threadIdx.x % 32 == 0) { - if (!isnan(wu) && !isinf(wu)) - atomicAdd(&weight_updates_gpu[weights_index], wu); - //if(weights_gpu[weights_index] != 1) printf(" wu = %f, weights_update_tmp = %f, w = %f, weights_gpu[weights_index] = %f, grad = %f, weights_normalization = %d ", - // wu, weights_update_tmp, w, weights_gpu[weights_index], grad, weights_normalization); - } - } - else { - if (!isnan(weights_update_tmp) && !isinf(weights_update_tmp)) - atomicAdd(&weight_updates_gpu[weights_index], weights_update_tmp); - //weight_updates_gpu[weights_index] += weights_update_tmp; - } - } - else layer_delta[add_index] += delta_in[id]; - } - } -} - -extern "C" void backward_shortcut_multilayer_gpu(int src_outputs, int batch, int n, int *outputs_of_layers_gpu, - float **layers_delta_gpu, float *delta_out, float *delta_in, float *weights_gpu, float *weight_updates_gpu, int nweights, float *in, float **layers_output_gpu, WEIGHTS_NORMALIZATION_T weights_normalization) -{ - const int layer_step = nweights / (n + 1); // 1 or l.c or (l.c * l.h * l.w) - int step = 0; - if (nweights > 0) step = src_outputs / layer_step; // (l.c * l.h * l.w) or (l.w*l.h) or 1 - //printf(" nweights = %d, n = %d, layer_step = %d, step = %d \n", nweights, n, layer_step, step); - - //printf(" src_outputs = %d, batch = %d, n = %d \n", src_outputs, batch, n); - int size = batch * src_outputs; - backward_shortcut_multilayer_kernel << > > (size, src_outputs, batch, n, outputs_of_layers_gpu, - layers_delta_gpu, delta_out, delta_in, weights_gpu, weight_updates_gpu, nweights, in, layers_output_gpu, weights_normalization); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void shortcut_kernel(int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - int i = id % minw; - id /= minw; - int j = id % minh; - id /= minh; - int k = id % minc; - id /= minc; - int b = id % batch; - - int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); - int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); - out[out_index] += add[add_index]; -} - -extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) -{ - int minw = (w1 < w2) ? w1 : w2; - int minh = (h1 < h2) ? h1 : h2; - int minc = (c1 < c2) ? c1 : c2; - - int stride = w1/w2; - int sample = w2/w1; - assert(stride == h1/h2); - assert(sample == h2/h1); - if(stride < 1) stride = 1; - if(sample < 1) sample = 1; - - int size = batch * minw * minh * minc; - shortcut_kernel<<>>(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void simple_input_shortcut_kernel(float *in, int size, float *add, float *out) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - - out[id] = in[id] + add[id]; -} - -__global__ void input_shortcut_kernel(float *in, int size, int minw, int minh, int minc, int stride, int sample, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= size) return; - int i = id % minw; - id /= minw; - int j = id % minh; - id /= minh; - int k = id % minc; - id /= minc; - int b = id % batch; - - int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); - int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); - out[out_index] = in[out_index] + add[add_index]; -} - -extern "C" void input_shortcut_gpu(float *in, int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) -{ - if (w1 == w2 && h1 == h2 && c1 == c2) { - int size = batch * w1 * h1 * c1; - simple_input_shortcut_kernel << > >(in, size, add, out); - CHECK_CUDA(cudaPeekAtLastError()); - return; - } - - int minw = (w1 < w2) ? w1 : w2; - int minh = (h1 < h2) ? h1 : h2; - int minc = (c1 < c2) ? c1 : c2; - - int stride = w1 / w2; - int sample = w2 / w1; - assert(stride == h1 / h2); - assert(sample == h2 / h1); - if (stride < 1) stride = 1; - if (sample < 1) sample = 1; - - int size = batch * minw * minh * minc; - //input_shortcut_kernel << > >(in, size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); - simple_copy_ongpu(w2 * h2 * c2 * batch, in, out); - shortcut_kernel << > >(size, minw, minh, minc, stride, sample, batch, w1, h1, c1, add, w2, h2, c2, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void smooth_l1_kernel(int n, float *pred, float *truth, float *delta, float *error) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n){ - float diff = truth[i] - pred[i]; - float abs_val = abs(diff); - if(abs_val < 1) { - error[i] = diff * diff; - delta[i] = diff; - } - else { - error[i] = 2*abs_val - 1; - delta[i] = (diff < 0) ? -1 : 1; - } - } -} - -extern "C" void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error) -{ - smooth_l1_kernel<<>>(n, pred, truth, delta, error); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void softmax_x_ent_kernel(int n, float *pred, float *truth, float *delta, float *error) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i < n) { - float t = truth[i]; - float p = pred[i]; - error[i] = (t) ? -log(p) : 0; - delta[i] = t - p; - } -} - -extern "C" void softmax_x_ent_gpu(int n, float *pred, float *truth, float *delta, float *error) -{ - softmax_x_ent_kernel << > >(n, pred, truth, delta, error); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void l2_kernel(int n, float *pred, float *truth, float *delta, float *error) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n){ - float diff = truth[i] - pred[i]; - error[i] = diff * diff; //I know this is technically wrong, deal with it. - delta[i] = diff; - } -} - -extern "C" void l2_gpu(int n, float *pred, float *truth, float *delta, float *error) -{ - l2_kernel<<>>(n, pred, truth, delta, error); - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n){ - c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); - } -} - -extern "C" void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c) -{ - weighted_sum_kernel<<>>(num, a, b, s, c); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void weighted_delta_kernel(int n, float *a, float *b, float *s, float *da, float *db, float *ds, float *dc) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n){ - if(da) da[i] += dc[i] * s[i]; - db[i] += dc[i] * (1-s[i]); - ds[i] += dc[i] * a[i] + dc[i] * -b[i]; - } -} - -extern "C" void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc) -{ - weighted_delta_kernel<<>>(num, a, b, s, da, db, ds, dc); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void mult_add_into_kernel(int n, float *a, float *b, float *c) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(i < n){ - c[i] += a[i]*b[i]; - } -} - -extern "C" void mult_add_into_gpu(int num, float *a, float *b, float *c) -{ - mult_add_into_kernel<<>>(num, a, b, c); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__device__ void softmax_device(int n, float *input, float temp, float *output) -{ - int i; - float sum = 0; - float largest = -INFINITY; - for(i = 0; i < n; ++i){ - int val = input[i]; - largest = (val>largest) ? val : largest; - } - for(i = 0; i < n; ++i){ - float e = exp(input[i]/temp - largest/temp); - sum += e; - output[i] = e; - } - for(i = 0; i < n; ++i){ - output[i] /= sum; - } -} - -__global__ void softmax_kernel(int n, int offset, int batch, float *input, float temp, float *output) -{ - int b = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(b >= batch) return; - softmax_device(n, input + b*offset, temp, output + b*offset); -} - -extern "C" void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output) -{ - int inputs = n; - int batch = groups; - softmax_kernel<<>>(inputs, offset, batch, input, temp, output); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__device__ void softmax_device_new_api(float *input, int n, float temp, int stride, float *output) -{ - int i; - float sum = 0; - float largest = -INFINITY; - for (i = 0; i < n; ++i) { - int val = input[i*stride]; - largest = (val>largest) ? val : largest; - } - for (i = 0; i < n; ++i) { - float e = expf(input[i*stride] / temp - largest / temp); - sum += e; - output[i*stride] = e; - } - for (i = 0; i < n; ++i) { - output[i*stride] /= sum; - } -} - -__global__ void softmax_kernel_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= batch*groups) return; - int b = id / groups; - int g = id % groups; - softmax_device_new_api(input + b*batch_offset + g*group_offset, n, temp, stride, output + b*batch_offset + g*group_offset); -} - -extern "C" void softmax_gpu_new_api(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output) -{ - softmax_kernel_new_api << > >(input, n, batch, batch_offset, groups, group_offset, stride, temp, output); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) -{ - size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= N) return; - int out_index = i; - int out_w = i % (w*stride); - i = i / (w*stride); - int out_h = i % (h*stride); - i = i / (h*stride); - int out_c = i%c; - i = i / c; - int b = i%batch; - - int in_w = out_w / stride; - int in_h = out_h / stride; - int in_c = out_c; - - int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; - - - if (forward) out[out_index] += scale * x[in_index]; - else atomicAdd(x + in_index, scale * out[out_index]); -} - -extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) -{ - size_t size = w*h*c*batch*stride*stride; - upsample_kernel << > >(size, in, w, h, c, batch, stride, forward, scale, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void softmax_tree_kernel(float *input, int spatial, int batch, int stride, float temp, float *output, int groups, int *group_size, int *group_offset) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= spatial*batch*groups) return; - int s = id % spatial; - id = id / spatial; - int g = id % groups; - int b = id / groups; - int goff = group_offset[g] * spatial; - int boff = b*stride; - softmax_device_new_api(input + goff + boff + s, group_size[g], temp, spatial, output + goff + boff + s); -} - -extern "C" void softmax_tree_gpu(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier) -{ - int *tree_groups_size = cuda_make_int_array_new_api(hier.group_size, hier.groups); - int *tree_groups_offset = cuda_make_int_array_new_api(hier.group_offset, hier.groups); - /* - static int *tree_groups_size = 0; - static int *tree_groups_offset = 0; - if(!tree_groups_size){ - tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups); - tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups); - } - */ - int num = spatial*batch*hier.groups; - softmax_tree_kernel <<>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset); - CHECK_CUDA(cudaPeekAtLastError()); - cuda_free((float *)tree_groups_size); - cuda_free((float *)tree_groups_offset); -} - - -__global__ void fix_nan_and_inf_kernel(float *input, size_t size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - float val = input[index]; - if (isnan(val) || isinf(val)) { - input[index] = 1.0f / (fabs((float)index) + 1); // pseudo random value - } - } -} - -extern "C" void fix_nan_and_inf(float *input, size_t size) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - fix_nan_and_inf_kernel << > >(input, size); - CHECK_CUDA(cudaPeekAtLastError()); - //CHECK_CUDA(cudaDeviceSynchronize()); -} - - -__global__ void reset_nan_and_inf_kernel(float *input, size_t size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - float val = input[index]; - if (isnan(val) || isinf(val)) { - input[index] = 0; - } - } -} - -extern "C" void reset_nan_and_inf(float *input, size_t size) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - reset_nan_and_inf_kernel << > >(input, size); - CHECK_CUDA(cudaPeekAtLastError()); - //CHECK_CUDA(cudaDeviceSynchronize()); -} - - - -__global__ void is_nan_or_inf_kernel(float *input, size_t size, int *pinned_return) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - float val = input[index]; - if (isnan(val) || isinf(val)) - *pinned_return = 1; - } -} - -extern "C" int is_nan_or_inf(float *input, size_t size) -{ - int *pinned_return; - CHECK_CUDA(cudaHostAlloc(&pinned_return, sizeof(int), cudaHostRegisterMapped)); - *pinned_return = 0; - - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - is_nan_or_inf_kernel << > >(input, size, pinned_return); - CHECK_CUDA(cudaDeviceSynchronize()); - int ret_val = *pinned_return; - - CHECK_CUDA(cudaFreeHost(pinned_return)); - return ret_val; -} - -__global__ void add_3_arrays_activate_kernel(float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - float val = 0; - if (a1) val += a1[index]; - if (a2) val += a2[index]; - if (a3) val += a3[index]; - if (a == LOGISTIC) val = 1.f / (1.f + expf(-val)); - else if (a == TANH) val = (2 / (1 + expf(-2 * val)) - 1); - else if (a == LEAKY) val = (val < 0) ? val*0.1 : val; - dst[index] = val; - } -} - -extern "C" void add_3_arrays_activate(float *a1, float *a2, float *a3, size_t size, ACTIVATION a, float *dst) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - if (!(a == LOGISTIC || a == TANH || a == LEAKY || a == LINEAR)) { - printf(" add_3_arrays_activate() doesn't support activation %d, it supports only LOGISTIC and TANH \n", a); - exit(EXIT_FAILURE); - } - add_3_arrays_activate_kernel << > >(a1, a2, a3, size, a, dst); -} - - -__global__ void sum_of_mults_kernel(float *a1, float *a2, float *b1, float *b2, size_t size, float *dst) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - dst[index] = a1[index] * a2[index] + b1[index] * b2[index]; - } -} - -extern "C" void sum_of_mults(float *a1, float *a2, float *b1, float *b2, size_t size, float *dst) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - sum_of_mults_kernel << > >(a1, a2, b1, b2, size, dst); -} - - -__global__ void activate_and_mult_kernel(float *a1, float *a2, size_t size, ACTIVATION a, float *dst) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - float val = a1[index]; - if (a == TANH) val = (2 / (1 + expf(-2 * val)) - 1); - else if (a == LEAKY) val = (val < 0) ? val*0.1 : val; - dst[index] = val * a2[index]; - } -} - -extern "C" void activate_and_mult(float *a1, float *a2, size_t size, ACTIVATION a, float *dst) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - if (!(a == TANH || a == LEAKY || a == LINEAR)) { - printf(" activat_and_mult() doesn't support activation %d, it supports only TANH \n", a); - exit(EXIT_FAILURE); - } - activate_and_mult_kernel << > >(a1, a2, size, a, dst); -} - - - -__global__ void scale_channels_kernel(float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - if (scale_wh) { - int osd_index = index % channel_size + (index / batch_size)*channel_size; - - out[index] = in_w_h_c[index] * scales_c[osd_index]; - } - else { - out[index] = in_w_h_c[index] * scales_c[index / channel_size]; - } - } -} - -extern "C" void scale_channels_gpu(float *in_w_h_c, int size, int channel_size, int batch_size, int scale_wh, float *scales_c, float *out) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - scale_channels_kernel << > >(in_w_h_c, size, channel_size, batch_size, scale_wh, scales_c, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - - - - -__global__ void backward_scale_channels_kernel(float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - - if (index < size) { - - if (scale_wh) - { - int osd_index = index % channel_size + (index / batch_size)*channel_size; - - //out_state_delta[osd_index] += in_w_h_c_delta[index] * in_from_output[index]; // l.delta * from (should be divided by channel_size?) - atomicAdd(&out_state_delta[osd_index], in_w_h_c_delta[index] * in_from_output[index] / channel_size); // l.delta * from - - out_from_delta[index] += in_scales_c[osd_index] * in_w_h_c_delta[index]; // input * l.delta // atomic isn't required here - - } - else { - int osd_index = index / channel_size; - //out_state_delta[osd_index] += in_w_h_c_delta[index] * in_from_output[index]; // l.delta * from (should be divided by channel_size?) - - int warp_id = index / 32; - int index_warp_start = warp_id * 32; - int osd_index_warp_start = index_warp_start / channel_size; - int osd_index_warp_end = (index_warp_start + 31) / channel_size; - - if (osd_index_warp_start == osd_index_warp_end) // all thread in warp process the same channel - { - float sum = warpAllReduceSum(in_w_h_c_delta[index] * in_from_output[index]); // l.delta * from - if (threadIdx.x % 32 == 0) { - atomicAdd(&out_state_delta[osd_index], sum); - //out_state_delta[osd_index] += sum; - } - } - else { - atomicAdd(&out_state_delta[osd_index], in_w_h_c_delta[index] * in_from_output[index]); // l.delta * from - } - - out_from_delta[index] += in_scales_c[osd_index] * in_w_h_c_delta[index]; // input * l.delta // atomic isn't required here - } - } -} - -extern "C" void backward_scale_channels_gpu(float *in_w_h_c_delta, int size, int channel_size, int batch_size, int scale_wh, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - backward_scale_channels_kernel << > > (in_w_h_c_delta, size, channel_size, batch_size, scale_wh, - in_scales_c, out_from_delta, - in_from_output, out_state_delta); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void sam_kernel(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - out[index] = in_w_h_c[index] * scales_c[index]; - } -} - -extern "C" void sam_gpu(float *in_w_h_c, int size, int channel_size, float *scales_c, float *out) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - sam_kernel << > >(in_w_h_c, size, channel_size, scales_c, out); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void backward_sam_kernel(float *in_w_h_c_delta, int size, int channel_size, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index < size) { - out_state_delta[index] += in_w_h_c_delta[index] * in_from_output[index]; // l.delta * from (should be divided by channel_size?) - out_from_delta[index] += in_scales_c[index] * in_w_h_c_delta[index]; // input * l.delta - - //out_state_delta[index] += in_w_h_c_delta[index]; - //out_from_delta[index] = in_w_h_c_delta[index]; - } -} - -extern "C" void backward_sam_gpu(float *in_w_h_c_delta, int size, int channel_size, - float *in_scales_c, float *out_from_delta, - float *in_from_output, float *out_state_delta) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - backward_sam_kernel << > > (in_w_h_c_delta, size, channel_size, - in_scales_c, out_from_delta, - in_from_output, out_state_delta); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void smooth_rotate_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int angle, int reverse) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int kernel_area = kernel_size * kernel_size; - const int i = index * kernel_area; - - const int stage_step = (nweights / kernel_area) / 4; // 4 stages - const int stage_id = index / stage_step; - - // nweights = (c / groups) * n * size * size; - // kernel_area = size*size - - if (i < nweights) - { - // rotate left or right - if (reverse) angle = -angle; - - const float cos_a = cosf(angle * 3.14159265 / 180); - const float sin_a = sinf(angle * 3.14159265 / 180); - const int x_c = kernel_size / 2; - const int y_c = kernel_size / 2; - - float dropout_sum = 0; - - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - // Xsource = x*cos(alpha) + y*sin(alpha) - // Ysource = -x*sin(alpha) + y*cos(alpha) - - float x_s = x_c + (x - x_c)*cos_a + (y - y_c)*sin_a; - float y_s = y_c - (x - x_c)*sin_a + (y - y_c)*cos_a; - - int x_0 = floor(x_s); // round down - int x_1 = ceil(x_s); // round up - if (x_0 == x_1) x_1 = x_0 + 1; - int y_0 = floor(y_s); - int y_1 = ceil(y_s); - if (y_0 == y_1) y_1 = y_0 + 1; - - float c_x_0 = x_1 - x_s; - float c_x_1 = x_s - x_0; - float c_y_0 = y_1 - y_s; - float c_y_1 = y_s - y_0; - - - float val = 0; - if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0; - else dropout_sum += c_x_0 * c_y_0; - - if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0; - else dropout_sum += c_x_1 * c_y_0; - - if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1; - else dropout_sum += c_x_0 * c_y_1; - - if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1; - else dropout_sum += c_x_1 * c_y_1; - - weight_deform_gpu[x + y*kernel_size + i] = val; - } - } - - // compensate for dropped items - const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum); - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - weight_deform_gpu[x + y*kernel_size + i] *= coef; - } - } - } -} - - -extern "C" void smooth_rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse) -{ - const int kernel_area = size*size; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - smooth_rotate_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void stretch_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, float scale, int reverse) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int kernel_area = kernel_size * kernel_size; - const int i = index * kernel_area; - - const int stage_step = (nweights / kernel_area) / 4; // 4 stages - const int stage_id = index / stage_step; - - // nweights = (c / groups) * n * size * size; - // kernel_area = size*size - - if (i < nweights) - { - - if (stage_id == 0) { - // simple copy - for (int x = 0; x < kernel_size; ++x) { - for (int y = 0; y < kernel_size; ++y) { - weight_deform_gpu[x + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i]; - } - } - } - else if (stage_id > 0) - { - if (stage_id == 1) scale = 0.65; - else if (stage_id == 2) scale = 0.8; - else if (stage_id == 3) scale = 1.3; - - if (reverse) scale = 1 / scale; - - const int x_c = kernel_size / 2; - const int y_c = kernel_size / 2; - - float dropout_sum = 0; - - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - // Xsource = x_c + (x_d - x_c) / scale - // Ysource = y_c + (y_d - y_c) / scale - - float x_s = x_c + (x - x_c) / scale; - float y_s = y_c + (y - y_c) / scale; - - int x_0 = floor(x_s); // round down - int x_1 = ceil(x_s); // round up - if (x_0 == x_1) x_1 = x_0 + 1; - int y_0 = floor(y_s); - int y_1 = ceil(y_s); - if (y_0 == y_1) y_1 = y_0 + 1; - - float c_x_0 = x_1 - x_s; - float c_x_1 = x_s - x_0; - float c_y_0 = y_1 - y_s; - float c_y_1 = y_s - y_0; - - float val = 0; - if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0; - else dropout_sum += c_x_0 * c_y_0; - - if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0; - else dropout_sum += c_x_1 * c_y_0; - - if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1; - else dropout_sum += c_x_0 * c_y_1; - - if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1; - else dropout_sum += c_x_1 * c_y_1; - - weight_deform_gpu[x + y*kernel_size + i] = val; - } - } - - // compensate for dropped items - //const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum); - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - //if (scale < 1) weight_deform_gpu[x + y*kernel_size + i] /= scale;// *= coef; - weight_deform_gpu[x + y*kernel_size + i] /= scale;// *= coef; - } - } - } - } -} - - -extern "C" void stretch_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, float scale, int reverse) -{ - const int kernel_area = size*size; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - stretch_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, scale, reverse); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void sway_and_flip_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int angle, int reverse) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int kernel_area = kernel_size * kernel_size; - const int i = index * kernel_area; - - const int stage_step = (nweights / kernel_area) / 4; // 4 stages - const int stage_id = index / stage_step; - - // nweights = (c / groups) * n * size * size; - // kernel_area = size*size - - if (i < nweights) - { - - if (stage_id == 0) { - // simple copy - for (int x = 0; x < kernel_size; ++x) { - for (int y = 0; y < kernel_size; ++y) { - weight_deform_gpu[x + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i]; - } - } - } - else if (stage_id == 1 || stage_id == 2) - { - // rotate left or right - if (stage_id == 2) angle = -angle; - if (reverse) angle = -angle; - - const float cos_a = cosf(angle * 3.14159265 / 180); - const float sin_a = sinf(angle * 3.14159265 / 180); - const int x_c = kernel_size / 2; - const int y_c = kernel_size / 2; - - float dropout_sum = 0; - - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - // Xsource = x*cos(alpha) + y*sin(alpha) - // Ysource = -x*sin(alpha) + y*cos(alpha) - - float x_s = x_c + (x - x_c)*cos_a + (y - y_c)*sin_a; - float y_s = y_c - (x - x_c)*sin_a + (y - y_c)*cos_a; - - int x_0 = floor(x_s); // round down - int x_1 = ceil(x_s); // round up - if (x_0 == x_1) x_1 = x_0 + 1; - int y_0 = floor(y_s); - int y_1 = ceil(y_s); - if (y_0 == y_1) y_1 = y_0 + 1; - - float c_x_0 = x_1 - x_s; - float c_x_1 = x_s - x_0; - float c_y_0 = y_1 - y_s; - float c_y_1 = y_s - y_0; - - float val = 0; - if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0; - else dropout_sum += c_x_0 * c_y_0; - - if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0; - else dropout_sum += c_x_1 * c_y_0; - - if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1; - else dropout_sum += c_x_0 * c_y_1; - - if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1; - else dropout_sum += c_x_1 * c_y_1; - - weight_deform_gpu[x + y*kernel_size + i] = val; - } - } - - // compensate for dropped items - const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum); - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - weight_deform_gpu[x + y*kernel_size + i] *= coef; - } - } - } - else if (stage_id == 3) - { - // flip - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - weight_deform_gpu[(kernel_size - x - 1) + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i]; - } - } - } - } -} - - -extern "C" void sway_and_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse) -{ - const int kernel_area = size*size; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - sway_and_flip_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - - - - - -__global__ void rotate_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, int reverse) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int kernel_area = kernel_size * kernel_size; - const int i = index * kernel_area; - - const int stage_step = (nweights / kernel_area) / 4; // 4 stages - const int stage_id = index / stage_step; - - // nweights = (c / groups) * n * size * size; - // kernel_area = size*size - - if (i < nweights) - { - // if(reverse) - - if (stage_id == 0) { - // simple copy - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - const int src_i = x + y*kernel_size + i; - const int dst_i = x + y*kernel_size + i; - if (reverse) weight_deform_gpu[src_i] = src_weight_gpu[dst_i]; - else weight_deform_gpu[dst_i] = src_weight_gpu[src_i]; - } - } - } - else if (stage_id == 1) - { - // 90 degree clockwise rotation - 1 - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - const int src_i = x + y*kernel_size + i; - const int dst_i = (kernel_size - 1 - y) + x*kernel_size + i; - if (reverse) weight_deform_gpu[src_i] = src_weight_gpu[dst_i]; - else weight_deform_gpu[dst_i] = src_weight_gpu[src_i]; - } - } - } - else if (stage_id == 2) - { - // 180 degree clockwise rotation - 2 - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - const int src_i = x + y*kernel_size + i; - const int dst_i = (kernel_size - 1 - x) + (kernel_size - 1 - y)*kernel_size + i; - if (reverse) weight_deform_gpu[src_i] = src_weight_gpu[dst_i]; - else weight_deform_gpu[dst_i] = src_weight_gpu[src_i]; - } - } - } - else if (stage_id == 3) - { - // 270 degree clockwise rotation - 3 - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - const int src_i = x + y*kernel_size + i; - const int dst_i = y + (kernel_size - 1 - x)*kernel_size + i; - if (reverse) weight_deform_gpu[src_i] = src_weight_gpu[dst_i]; - else weight_deform_gpu[dst_i] = src_weight_gpu[src_i]; - } - } - } - } -} - - -extern "C" void rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int reverse) -{ - const int kernel_area = size*size; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - rotate_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, reverse); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void stretch_sway_flip_weights_kernel(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int kernel_size, float angle, int reverse) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - const int kernel_area = kernel_size * kernel_size; - const int i = index * kernel_area; - - const int stage_step = (nweights / kernel_area) / 8; // 8 stages - const int stage_id = index / stage_step; - - // nweights = (c / groups) * n * size * size; - // kernel_area = size*size - - if (i < nweights) - { - - if (stage_id == 0) { - // simple copy - for (int x = 0; x < kernel_size; ++x) { - for (int y = 0; y < kernel_size; ++y) { - weight_deform_gpu[x + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i]; - } - } - } - else if (stage_id == 1 || stage_id == 2 || stage_id == 3 || stage_id == 4) - { - float scale = 0.5; - if (stage_id == 1) scale = 0.65; - else if (stage_id == 2) scale = 0.8; - else if (stage_id == 3) scale = 1.2; - else if (stage_id == 4) scale = 1.4; - - if (reverse) scale = 1 / scale; - - const int x_c = kernel_size / 2; - const int y_c = kernel_size / 2; - - float dropout_sum = 0; - - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - // Xsource = x_c + (x_d - x_c) / scale - // Ysource = y_c + (y_d - y_c) / scale - - float x_s = x_c + (x - x_c) / scale; - float y_s = y_c + (y - y_c) / scale; - - int x_0 = floor(x_s); // round down - int x_1 = ceil(x_s); // round up - if (x_0 == x_1) x_1 = x_0 + 1; - int y_0 = floor(y_s); - int y_1 = ceil(y_s); - if (y_0 == y_1) y_1 = y_0 + 1; - - float c_x_0 = x_1 - x_s; - float c_x_1 = x_s - x_0; - float c_y_0 = y_1 - y_s; - float c_y_1 = y_s - y_0; - - float val = 0; - if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0; - else dropout_sum += c_x_0 * c_y_0; - - if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0; - else dropout_sum += c_x_1 * c_y_0; - - if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1; - else dropout_sum += c_x_0 * c_y_1; - - if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1; - else dropout_sum += c_x_1 * c_y_1; - - weight_deform_gpu[x + y*kernel_size + i] = val; - } - } - - // compensate for dropped items - //const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum); - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - if(scale > 1) - weight_deform_gpu[x + y*kernel_size + i] /= scale;// *= coef; - } - } - } - else if (stage_id == 5 || stage_id == 6) - { - // rotate left or right - if (stage_id == 6) angle = -angle; - if (reverse) angle = -angle; - - const float cos_a = cosf(angle * 3.14159265 / 180); - const float sin_a = sinf(angle * 3.14159265 / 180); - const int x_c = kernel_size / 2; - const int y_c = kernel_size / 2; - - float dropout_sum = 0; - - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - // Xsource = x*cos(alpha) + y*sin(alpha) - // Ysource = -x*sin(alpha) + y*cos(alpha) - - float x_s = x_c + (x - x_c)*cos_a + (y - y_c)*sin_a; - float y_s = y_c - (x - x_c)*sin_a + (y - y_c)*cos_a; - - int x_0 = floor(x_s); // round down - int x_1 = ceil(x_s); // round up - if (x_0 == x_1) x_1 = x_0 + 1; - int y_0 = floor(y_s); - int y_1 = ceil(y_s); - if (y_0 == y_1) y_1 = y_0 + 1; - - float c_x_0 = x_1 - x_s; - float c_x_1 = x_s - x_0; - float c_y_0 = y_1 - y_s; - float c_y_1 = y_s - y_0; - - float val = 0; - if (x_0 >= 0 && x_0 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_0 + y_0*kernel_size + i] * c_x_0 * c_y_0; - else dropout_sum += c_x_0 * c_y_0; - - if (x_1 >= 0 && x_1 < kernel_size && y_0 >= 0 && y_0 < kernel_size) val += src_weight_gpu[x_1 + y_0*kernel_size + i] * c_x_1 * c_y_0; - else dropout_sum += c_x_1 * c_y_0; - - if (x_0 >= 0 && x_0 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_0 + y_1*kernel_size + i] * c_x_0 * c_y_1; - else dropout_sum += c_x_0 * c_y_1; - - if (x_1 >= 0 && x_1 < kernel_size && y_1 >= 0 && y_1 < kernel_size) val += src_weight_gpu[x_1 + y_1*kernel_size + i] * c_x_1 * c_y_1; - else dropout_sum += c_x_1 * c_y_1; - - weight_deform_gpu[x + y*kernel_size + i] = val; - } - } - - // compensate for dropped items - const float coef = (kernel_size*kernel_size) / (kernel_size*kernel_size - dropout_sum); - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - weight_deform_gpu[x + y*kernel_size + i] *= coef; - } - } - } - else if (stage_id == 7) - { - // flip - for (int y = 0; y < kernel_size; ++y) { - for (int x = 0; x < kernel_size; ++x) { - weight_deform_gpu[(kernel_size - x - 1) + y*kernel_size + i] = src_weight_gpu[x + y*kernel_size + i]; - } - } - } - } -} - - -extern "C" void stretch_sway_flip_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, int nweights, int n, int size, int angle, int reverse) -{ - const int kernel_area = size*size; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(nweights / kernel_area, block_size); - stretch_sway_flip_weights_kernel << > > (src_weight_gpu, weight_deform_gpu, nweights, n, size, angle, reverse); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void reduce_and_expand_array_kernel(const float *src_gpu, float *dst_gpu, int current_size, int groups) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - - if (index < current_size) { - float val = 0; - for (int i = 0; i < groups; ++i) { - val += src_gpu[index + i*current_size]; - } - for (int i = 0; i < groups; ++i) { - dst_gpu[index + i*current_size] = val / groups; - } - } -} - -extern "C" void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups) -{ - const int current_size = size / groups; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(current_size, block_size); - reduce_and_expand_array_kernel << > > (src_gpu, dst_gpu, current_size, groups); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void expand_array_kernel(const float *src_gpu, float *dst_gpu, int current_size, int groups) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - - if (index < current_size) { - for (int i = 0; i < groups; ++i) { - dst_gpu[index + i*current_size] = src_gpu[index]; - } - } -} - -extern "C" void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups) -{ - const int current_size = size / groups; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(current_size, block_size); - expand_array_kernel << > > (src_gpu, dst_gpu, current_size, groups); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void mult_inverse_array_kernel(const float *src_gpu, float *dst_gpu, int size, const float eps) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - - if (index < size) { - float val = src_gpu[index]; - float sign = (val < 0) ? -1 : 1; - // eps = 1 by default - // eps = 2 - lower delta - // eps = 0 - higher delta (linear) - // eps = -1 - high delta (inverse number) - dst_gpu[index] = powf(fabs(val), eps) * sign; - } -} - -extern "C" void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - mult_inverse_array_kernel << > > (src_gpu, dst_gpu, size, eps); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - -__global__ void P_constrastive_f_det_kernel(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size) -{ - const int il = blockIdx.x*blockDim.x + threadIdx.x; - - if (il < contrast_p_size) { - const float sim = contrast_p[il].sim; - const size_t i = contrast_p[il].i; - const size_t j = contrast_p[il].j; - - const float numerator = expf(sim / temperature); - - float denominator = 0; - int k; - for (k = 0; k < contrast_p_size; ++k) { - contrastive_params cp = contrast_p[k]; - //if (k != i && labels[k] != labels[i]) { - //if (k != i) { - if (cp.i != i && cp.j == j) { - //const float sim_den = cp.sim; - ////const float sim_den = find_sim(k, l, contrast_p, contrast_p_size); // cosine_similarity(z[k], z[l], feature_size); - //denominator += expf(sim_den / temperature); - denominator += cp.exp_sim; - } - } - - float result = 0.9999; - if (denominator != 0) result = numerator / denominator; - if (result > 1) result = 0.9999; - - contrast_p[il].P = result; - } -} - - -extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(contrast_p_size, block_size); - P_constrastive_f_det_kernel << > > (labels, feature_size, temperature, contrast_p, contrast_p_size); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - - - -__global__ void coord_conv_kernel(float *dst, int w, int h, int chan, int batch, int type) -{ - int i = blockIdx.x*blockDim.x + threadIdx.x; - - const int x = i % w; - i = i / w; - const int y = i % h; - i = i / h; - const int c = i % chan; - //i = i / chan; - //const int b = i % batch; - - if (type == 0) { - if (c == 0) { - const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1) - dst[i] = x_val; // x - coord - } - else if (c == 1) { - const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1) - dst[i] = y_val; // y - coord - } - else if (c == 2) { - const float x_val = (2.0f * x) / w - 1.0f; // [-1; 1) - const float y_val = (2.0f * y) / h - 1.0f; // [-1; 1) - const float rad_val = sqrtf(x_val*x_val + y_val*y_val); // [0; 1.414) - dst[i] = rad_val; // rad - coord - } - } - else if (type == 1) { - if (c >= 0 && c <= 2) { - dst[i] = 0; - } - } -} - -extern "C" void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type) -{ - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - coord_conv_kernel << > > (dst, w, h, chan, b, type); - - CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file diff --git a/src/Detector/darknet/src/box.c b/src/Detector/darknet/src/box.c deleted file mode 100644 index 201ab1334..000000000 --- a/src/Detector/darknet/src/box.c +++ /dev/null @@ -1,950 +0,0 @@ -#include "box.h" -#include "utils.h" -#include -#include -#include - -#ifndef M_PI -#define M_PI 3.141592 -#endif - -box float_to_box(float *f) -{ - box b; - b.x = f[0]; - b.y = f[1]; - b.w = f[2]; - b.h = f[3]; - return b; -} - -box float_to_box_stride(float *f, int stride) -{ - box b = { 0 }; - b.x = f[0]; - b.y = f[1 * stride]; - b.w = f[2 * stride]; - b.h = f[3 * stride]; - return b; -} - - -dbox derivative(box a, box b) -{ - dbox d; - d.dx = 0; - d.dw = 0; - d.dy = 0; - d.dh = 0; - d.dx = a.x < b.x ? 1.0 : -1.0; - d.dy = a.y < b.y ? 1.0 : -1.0; - d.dw = a.w < b.w ? 1.0 : -1.0; - d.dh = a.h < b.h ? 1.0 : -1.0; - return d; -} - - -/* -dbox derivative(box a, box b) -{ - dbox d; - d.dx = 0; - d.dw = 0; - float l1 = a.x - a.w/2; - float l2 = b.x - b.w/2; - if (l1 > l2){ - d.dx -= 1; - d.dw += .5; - } - float r1 = a.x + a.w/2; - float r2 = b.x + b.w/2; - if(r1 < r2){ - d.dx += 1; - d.dw += .5; - } - if (l1 > r2) { - d.dx = -1; - d.dw = 0; - } - if (r1 < l2){ - d.dx = 1; - d.dw = 0; - } - - d.dy = 0; - d.dh = 0; - float t1 = a.y - a.h/2; - float t2 = b.y - b.h/2; - if (t1 > t2){ - d.dy -= 1; - d.dh += .5; - } - float b1 = a.y + a.h/2; - float b2 = b.y + b.h/2; - if(b1 < b2){ - d.dy += 1; - d.dh += .5; - } - if (t1 > b2) { - d.dy = -1; - d.dh = 0; - } - if (b1 < t2){ - d.dy = 1; - d.dh = 0; - } - return d; -} -*/ - -// where c is the smallest box that fully encompases a and b -boxabs box_c(box a, box b) { - boxabs ba = { 0 }; - ba.top = fmin(a.y - a.h / 2, b.y - b.h / 2); - ba.bot = fmax(a.y + a.h / 2, b.y + b.h / 2); - ba.left = fmin(a.x - a.w / 2, b.x - b.w / 2); - ba.right = fmax(a.x + a.w / 2, b.x + b.w / 2); - return ba; -} - -// representation from x, y, w, h to top, left, bottom, right -boxabs to_tblr(box a) { - boxabs tblr = { 0 }; - float t = a.y - (a.h / 2); - float b = a.y + (a.h / 2); - float l = a.x - (a.w / 2); - float r = a.x + (a.w / 2); - tblr.top = t; - tblr.bot = b; - tblr.left = l; - tblr.right = r; - return tblr; -} - -float overlap(float x1, float w1, float x2, float w2) -{ - float l1 = x1 - w1/2; - float l2 = x2 - w2/2; - float left = l1 > l2 ? l1 : l2; - float r1 = x1 + w1/2; - float r2 = x2 + w2/2; - float right = r1 < r2 ? r1 : r2; - return right - left; -} - -float box_intersection(box a, box b) -{ - float w = overlap(a.x, a.w, b.x, b.w); - float h = overlap(a.y, a.h, b.y, b.h); - if(w < 0 || h < 0) return 0; - float area = w*h; - return area; -} - -float box_union(box a, box b) -{ - float i = box_intersection(a, b); - float u = a.w*a.h + b.w*b.h - i; - return u; -} - -float box_iou_kind(box a, box b, IOU_LOSS iou_kind) -{ - //IOU, GIOU, MSE, DIOU, CIOU - switch(iou_kind) { - case IOU: return box_iou(a, b); - case GIOU: return box_giou(a, b); - case DIOU: return box_diou(a, b); - case CIOU: return box_ciou(a, b); - } - return box_iou(a, b); -} - -float box_iou(box a, box b) -{ - //return box_intersection(a, b)/box_union(a, b); - - float I = box_intersection(a, b); - float U = box_union(a, b); - if (I == 0 || U == 0) { - return 0; - } - return I / U; -} - -float box_giou(box a, box b) -{ - boxabs ba = box_c(a, b); - float w = ba.right - ba.left; - float h = ba.bot - ba.top; - float c = w*h; - float iou = box_iou(a, b); - if (c == 0) { - return iou; - } - float u = box_union(a, b); - float giou_term = (c - u) / c; -#ifdef DEBUG_PRINTS - printf(" c: %f, u: %f, giou_term: %f\n", c, u, giou_term); -#endif - return iou - giou_term; -} - -// https://github.com/Zzh-tju/DIoU-darknet -// https://arxiv.org/abs/1911.08287 -float box_diou(box a, box b) -{ - boxabs ba = box_c(a, b); - float w = ba.right - ba.left; - float h = ba.bot - ba.top; - float c = w * w + h * h; - float iou = box_iou(a, b); - if (c == 0) { - return iou; - } - float d = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); - float u = pow(d / c, 0.6); - float diou_term = u; -#ifdef DEBUG_PRINTS - printf(" c: %f, u: %f, riou_term: %f\n", c, u, diou_term); -#endif - return iou - diou_term; -} - -float box_diounms(box a, box b, float beta1) -{ - boxabs ba = box_c(a, b); - float w = ba.right - ba.left; - float h = ba.bot - ba.top; - float c = w * w + h * h; - float iou = box_iou(a, b); - if (c == 0) { - return iou; - } - float d = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); - float u = pow(d / c, beta1); - float diou_term = u; -#ifdef DEBUG_PRINTS - printf(" c: %f, u: %f, riou_term: %f\n", c, u, diou_term); -#endif - return iou - diou_term; -} - -// https://github.com/Zzh-tju/DIoU-darknet -// https://arxiv.org/abs/1911.08287 -float box_ciou(box a, box b) -{ - boxabs ba = box_c(a, b); - float w = ba.right - ba.left; - float h = ba.bot - ba.top; - float c = w * w + h * h; - float iou = box_iou(a, b); - if (c == 0) { - return iou; - } - float u = (a.x - b.x) * (a.x - b.x) + (a.y - b.y) * (a.y - b.y); - float d = u / c; - float ar_gt = b.w / b.h; - float ar_pred = a.w / a.h; - float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)); - float alpha = ar_loss / (1 - iou + ar_loss + 0.000001); - float ciou_term = d + alpha * ar_loss; //ciou -#ifdef DEBUG_PRINTS - printf(" c: %f, u: %f, riou_term: %f\n", c, u, ciou_term); -#endif - return iou - ciou_term; -} - -dxrep dx_box_iou(box pred, box truth, IOU_LOSS iou_loss) { - boxabs pred_tblr = to_tblr(pred); - float pred_t = fmin(pred_tblr.top, pred_tblr.bot); - float pred_b = fmax(pred_tblr.top, pred_tblr.bot); - float pred_l = fmin(pred_tblr.left, pred_tblr.right); - float pred_r = fmax(pred_tblr.left, pred_tblr.right); - //dbox dover = derivative(pred,truth); - //dbox diouu = diou(pred, truth); - boxabs truth_tblr = to_tblr(truth); -#ifdef DEBUG_PRINTS - printf("\niou: %f, giou: %f\n", box_iou(pred, truth), box_giou(pred, truth)); - printf("pred: x,y,w,h: (%f, %f, %f, %f) -> t,b,l,r: (%f, %f, %f, %f)\n", pred.x, pred.y, pred.w, pred.h, pred_tblr.top, pred_tblr.bot, pred_tblr.left, pred_tblr.right); - printf("truth: x,y,w,h: (%f, %f, %f, %f) -> t,b,l,r: (%f, %f, %f, %f)\n", truth.x, truth.y, truth.w, truth.h, truth_tblr.top, truth_tblr.bot, truth_tblr.left, truth_tblr.right); -#endif - //printf("pred (t,b,l,r): (%f, %f, %f, %f)\n", pred_t, pred_b, pred_l, pred_r); - //printf("trut (t,b,l,r): (%f, %f, %f, %f)\n", truth_tblr.top, truth_tblr.bot, truth_tblr.left, truth_tblr.right); - dxrep ddx = {0}; - float X = (pred_b - pred_t) * (pred_r - pred_l); - float Xhat = (truth_tblr.bot - truth_tblr.top) * (truth_tblr.right - truth_tblr.left); - float Ih = fmin(pred_b, truth_tblr.bot) - fmax(pred_t, truth_tblr.top); - float Iw = fmin(pred_r, truth_tblr.right) - fmax(pred_l, truth_tblr.left); - float I = Iw * Ih; - float U = X + Xhat - I; - float S = (pred.x-truth.x)*(pred.x-truth.x)+(pred.y-truth.y)*(pred.y-truth.y); - float giou_Cw = fmax(pred_r, truth_tblr.right) - fmin(pred_l, truth_tblr.left); - float giou_Ch = fmax(pred_b, truth_tblr.bot) - fmin(pred_t, truth_tblr.top); - float giou_C = giou_Cw * giou_Ch; - //float IoU = I / U; -//#ifdef DEBUG_PRINTS - //printf("X: %f", X); - //printf(", Xhat: %f", Xhat); - //printf(", Ih: %f", Ih); - //printf(", Iw: %f", Iw); - //printf(", I: %f", I); - //printf(", U: %f", U); - //printf(", IoU: %f\n", I / U); -//#endif - - //Partial Derivatives, derivatives - float dX_wrt_t = -1 * (pred_r - pred_l); - float dX_wrt_b = pred_r - pred_l; - float dX_wrt_l = -1 * (pred_b - pred_t); - float dX_wrt_r = pred_b - pred_t; - // UNUSED - //// Ground truth - //float dXhat_wrt_t = -1 * (truth_tblr.right - truth_tblr.left); - //float dXhat_wrt_b = truth_tblr.right - truth_tblr.left; - //float dXhat_wrt_l = -1 * (truth_tblr.bot - truth_tblr.top); - //float dXhat_wrt_r = truth_tblr.bot - truth_tblr.top; - - // gradient of I min/max in IoU calc (prediction) - float dI_wrt_t = pred_t > truth_tblr.top ? (-1 * Iw) : 0; - float dI_wrt_b = pred_b < truth_tblr.bot ? Iw : 0; - float dI_wrt_l = pred_l > truth_tblr.left ? (-1 * Ih) : 0; - float dI_wrt_r = pred_r < truth_tblr.right ? Ih : 0; - // derivative of U with regard to x - float dU_wrt_t = dX_wrt_t - dI_wrt_t; - float dU_wrt_b = dX_wrt_b - dI_wrt_b; - float dU_wrt_l = dX_wrt_l - dI_wrt_l; - float dU_wrt_r = dX_wrt_r - dI_wrt_r; - // gradient of C min/max in IoU calc (prediction) - float dC_wrt_t = pred_t < truth_tblr.top ? (-1 * giou_Cw) : 0; - float dC_wrt_b = pred_b > truth_tblr.bot ? giou_Cw : 0; - float dC_wrt_l = pred_l < truth_tblr.left ? (-1 * giou_Ch) : 0; - float dC_wrt_r = pred_r > truth_tblr.right ? giou_Ch : 0; - - float p_dt = 0; - float p_db = 0; - float p_dl = 0; - float p_dr = 0; - if (U > 0 ) { - p_dt = ((U * dI_wrt_t) - (I * dU_wrt_t)) / (U * U); - p_db = ((U * dI_wrt_b) - (I * dU_wrt_b)) / (U * U); - p_dl = ((U * dI_wrt_l) - (I * dU_wrt_l)) / (U * U); - p_dr = ((U * dI_wrt_r) - (I * dU_wrt_r)) / (U * U); - } - // apply grad from prediction min/max for correct corner selection - p_dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; - p_db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; - p_dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; - p_dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; - - if (iou_loss == GIOU) { - if (giou_C > 0) { - // apply "C" term from gIOU - p_dt += ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); - p_db += ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C); - p_dl += ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); - p_dr += ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); - } - if (Iw<=0||Ih<=0) { - p_dt = ((giou_C * dU_wrt_t) - (U * dC_wrt_t)) / (giou_C * giou_C); - p_db = ((giou_C * dU_wrt_b) - (U * dC_wrt_b)) / (giou_C * giou_C); - p_dl = ((giou_C * dU_wrt_l) - (U * dC_wrt_l)) / (giou_C * giou_C); - p_dr = ((giou_C * dU_wrt_r) - (U * dC_wrt_r)) / (giou_C * giou_C); - } - } - - float Ct = fmin(pred.y - pred.h / 2,truth.y - truth.h / 2); - float Cb = fmax(pred.y + pred.h / 2,truth.y + truth.h / 2); - float Cl = fmin(pred.x - pred.w / 2,truth.x - truth.w / 2); - float Cr = fmax(pred.x + pred.w / 2,truth.x + truth.w / 2); - float Cw = Cr - Cl; - float Ch = Cb - Ct; - float C = Cw * Cw + Ch * Ch; - - float dCt_dx = 0; - float dCt_dy = pred_t < truth_tblr.top ? 1 : 0; - float dCt_dw = 0; - float dCt_dh = pred_t < truth_tblr.top ? -0.5 : 0; - - float dCb_dx = 0; - float dCb_dy = pred_b > truth_tblr.bot ? 1 : 0; - float dCb_dw = 0; - float dCb_dh = pred_b > truth_tblr.bot ? 0.5: 0; - - float dCl_dx = pred_l < truth_tblr.left ? 1 : 0; - float dCl_dy = 0; - float dCl_dw = pred_l < truth_tblr.left ? -0.5 : 0; - float dCl_dh = 0; - - float dCr_dx = pred_r > truth_tblr.right ? 1 : 0; - float dCr_dy = 0; - float dCr_dw = pred_r > truth_tblr.right ? 0.5 : 0; - float dCr_dh = 0; - - float dCw_dx = dCr_dx - dCl_dx; - float dCw_dy = dCr_dy - dCl_dy; - float dCw_dw = dCr_dw - dCl_dw; - float dCw_dh = dCr_dh - dCl_dh; - - float dCh_dx = dCb_dx - dCt_dx; - float dCh_dy = dCb_dy - dCt_dy; - float dCh_dw = dCb_dw - dCt_dw; - float dCh_dh = dCb_dh - dCt_dh; - - // UNUSED - //// ground truth - //float dI_wrt_xhat_t = pred_t < truth_tblr.top ? (-1 * Iw) : 0; - //float dI_wrt_xhat_b = pred_b > truth_tblr.bot ? Iw : 0; - //float dI_wrt_xhat_l = pred_l < truth_tblr.left ? (-1 * Ih) : 0; - //float dI_wrt_xhat_r = pred_r > truth_tblr.right ? Ih : 0; - - // Final IOU loss (prediction) (negative of IOU gradient, we want the negative loss) - float p_dx = 0; - float p_dy = 0; - float p_dw = 0; - float p_dh = 0; - - p_dx = p_dl + p_dr; //p_dx, p_dy, p_dw and p_dh are the gradient of IoU or GIoU. - p_dy = p_dt + p_db; - p_dw = (p_dr - p_dl); //For dw and dh, we do not divided by 2. - p_dh = (p_db - p_dt); - - // https://github.com/Zzh-tju/DIoU-darknet - // https://arxiv.org/abs/1911.08287 - if (iou_loss == DIOU) { - if (C > 0) { - p_dx += (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); - p_dy += (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); - p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); - p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); - } - if (Iw<=0||Ih<=0){ - p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); - p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); - p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C); - p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C); - } - } - //The following codes are calculating the gradient of ciou. - - if (iou_loss == CIOU) { - float ar_gt = truth.w / truth.h; - float ar_pred = pred.w / pred.h; - float ar_loss = 4 / (M_PI * M_PI) * (atan(ar_gt) - atan(ar_pred)) * (atan(ar_gt) - atan(ar_pred)); - float alpha = ar_loss / (1 - I/U + ar_loss + 0.000001); - float ar_dw=8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.h; - float ar_dh=-8/(M_PI*M_PI)*(atan(ar_gt)-atan(ar_pred))*pred.w; - if (C > 0) { - // dar* - p_dx += (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); - p_dy += (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); - p_dw += (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; - p_dh += (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh; - } - if (Iw<=0||Ih<=0){ - p_dx = (2*(truth.x-pred.x)*C-(2*Cw*dCw_dx+2*Ch*dCh_dx)*S) / (C * C); - p_dy = (2*(truth.y-pred.y)*C-(2*Cw*dCw_dy+2*Ch*dCh_dy)*S) / (C * C); - p_dw = (2*Cw*dCw_dw+2*Ch*dCh_dw)*S / (C * C) + alpha * ar_dw; - p_dh = (2*Cw*dCw_dh+2*Ch*dCh_dh)*S / (C * C) + alpha * ar_dh; - } - } - - ddx.dt = p_dx; //We follow the original code released from GDarknet. So in yolo_layer.c, dt, db, dl, dr are already dx, dy, dw, dh. - ddx.db = p_dy; - ddx.dl = p_dw; - ddx.dr = p_dh; - - // UNUSED - //// ground truth - //float gt_dt = ((U * dI_wrt_xhat_t) - (I * (dXhat_wrt_t - dI_wrt_xhat_t))) / (U * U); - //float gt_db = ((U * dI_wrt_xhat_b) - (I * (dXhat_wrt_b - dI_wrt_xhat_b))) / (U * U); - //float gt_dl = ((U * dI_wrt_xhat_l) - (I * (dXhat_wrt_l - dI_wrt_xhat_l))) / (U * U); - //float gt_dr = ((U * dI_wrt_xhat_r) - (I * (dXhat_wrt_r - dI_wrt_xhat_r))) / (U * U); - - // no min/max grad applied - //dx.dt = dt; - //dx.db = db; - //dx.dl = dl; - //dx.dr = dr; - - //// sum in gt -- THIS DOESNT WORK - //dx.dt += gt_dt; - //dx.db += gt_db; - //dx.dl += gt_dl; - //dx.dr += gt_dr; - - //// instead, look at the change between pred and gt, and weight t/b/l/r appropriately... - //// need the real derivative here (I think?) - //float delta_t = fmax(truth_tblr.top, pred_t) - fmin(truth_tblr.top, pred_t); - //float delta_b = fmax(truth_tblr.bot, pred_b) - fmin(truth_tblr.bot, pred_b); - //float delta_l = fmax(truth_tblr.left, pred_l) - fmin(truth_tblr.left, pred_l); - //float delta_r = fmax(truth_tblr.right, pred_r) - fmin(truth_tblr.right, pred_r); - - //dx.dt *= delta_t / (delta_t + delta_b); - //dx.db *= delta_b / (delta_t + delta_b); - //dx.dl *= delta_l / (delta_l + delta_r); - //dx.dr *= delta_r / (delta_l + delta_r); - - // UNUSED - //// ground truth - //float gt_dt = ((U * dI_wrt_xhat_t) - (I * (dXhat_wrt_t - dI_wrt_xhat_t))) / (U * U); - //float gt_db = ((U * dI_wrt_xhat_b) - (I * (dXhat_wrt_b - dI_wrt_xhat_b))) / (U * U); - //float gt_dl = ((U * dI_wrt_xhat_l) - (I * (dXhat_wrt_l - dI_wrt_xhat_l))) / (U * U); - //float gt_dr = ((U * dI_wrt_xhat_r) - (I * (dXhat_wrt_r - dI_wrt_xhat_r))) / (U * U); - - // no min/max grad applied - //dx.dt = dt; - //dx.db = db; - //dx.dl = dl; - //dx.dr = dr; - - // apply grad from prediction min/max for correct corner selection - //dx.dt = pred_tblr.top < pred_tblr.bot ? p_dt : p_db; - //dx.db = pred_tblr.top < pred_tblr.bot ? p_db : p_dt; - //dx.dl = pred_tblr.left < pred_tblr.right ? p_dl : p_dr; - //dx.dr = pred_tblr.left < pred_tblr.right ? p_dr : p_dl; - - //// sum in gt -- THIS DOESNT WORK - //dx.dt += gt_dt; - //dx.db += gt_db; - //dx.dl += gt_dl; - //dx.dr += gt_dr; - - //// instead, look at the change between pred and gt, and weight t/b/l/r appropriately... - //// need the real derivative here (I think?) - //float delta_t = fmax(truth_tblr.top, pred_t) - fmin(truth_tblr.top, pred_t); - //float delta_b = fmax(truth_tblr.bot, pred_b) - fmin(truth_tblr.bot, pred_b); - //float delta_l = fmax(truth_tblr.left, pred_l) - fmin(truth_tblr.left, pred_l); - //float delta_r = fmax(truth_tblr.right, pred_r) - fmin(truth_tblr.right, pred_r); - - //dx.dt *= delta_t / (delta_t + delta_b); - //dx.db *= delta_b / (delta_t + delta_b); - //dx.dl *= delta_l / (delta_l + delta_r); - //dx.dr *= delta_r / (delta_l + delta_r); - -//#ifdef DEBUG_PRINTS - /*printf(" directions dt: "); - if ((pred_tblr.top < truth_tblr.top && dx.dt > 0) || (pred_tblr.top > truth_tblr.top && dx.dt < 0)) { - printf("✓"); - } else { - printf("𝒙"); - } - printf(", "); - if ((pred_tblr.bot < truth_tblr.bot && dx.db > 0) || (pred_tblr.bot > truth_tblr.bot && dx.db < 0)) { - printf("✓"); - } else { - printf("𝒙"); - } - printf(", "); - if ((pred_tblr.left < truth_tblr.left && dx.dl > 0) || (pred_tblr.left > truth_tblr.left && dx.dl < 0)) { - printf("✓"); - } else { - printf("𝒙"); - } - printf(", "); - if ((pred_tblr.right < truth_tblr.right && dx.dr > 0) || (pred_tblr.right > truth_tblr.right && dx.dr < 0)) { - printf("✓"); - } else { - printf("𝒙"); - } - printf("\n"); - - printf("dx dt:%f", dx.dt); - printf(", db: %f", dx.db); - printf(", dl: %f", dx.dl); - printf(", dr: %f | ", dx.dr); -#endif - -#ifdef DEBUG_NAN - if (isnan(dx.dt)) { printf("dt isnan\n"); } - if (isnan(dx.db)) { printf("db isnan\n"); } - if (isnan(dx.dl)) { printf("dl isnan\n"); } - if (isnan(dx.dr)) { printf("dr isnan\n"); } -#endif - -// // No update if 0 or nan -// if (dx.dt == 0 || isnan(dx.dt)) { dx.dt = 1; } -// if (dx.db == 0 || isnan(dx.db)) { dx.db = 1; } -// if (dx.dl == 0 || isnan(dx.dl)) { dx.dl = 1; } -// if (dx.dr == 0 || isnan(dx.dr)) { dx.dr = 1; } -// -//#ifdef DEBUG_PRINTS -// printf("dx dt:%f (t: %f, p: %f)", dx.dt, gt_dt, p_dt); -// printf(", db: %f (t: %f, p: %f)", dx.db, gt_db, p_db); -// printf(", dl: %f (t: %f, p: %f)", dx.dl, gt_dl, p_dl); -// printf(", dr: %f (t: %f, p: %f) | ", dx.dr, gt_dr, p_dr); -//#endif */ - return ddx; -} - -float box_rmse(box a, box b) -{ - return sqrt(pow(a.x-b.x, 2) + - pow(a.y-b.y, 2) + - pow(a.w-b.w, 2) + - pow(a.h-b.h, 2)); -} - -dbox dintersect(box a, box b) -{ - float w = overlap(a.x, a.w, b.x, b.w); - float h = overlap(a.y, a.h, b.y, b.h); - dbox dover = derivative(a, b); - dbox di; - - di.dw = dover.dw*h; - di.dx = dover.dx*h; - di.dh = dover.dh*w; - di.dy = dover.dy*w; - - return di; -} - -dbox dunion(box a, box b) -{ - dbox du; - - dbox di = dintersect(a, b); - du.dw = a.h - di.dw; - du.dh = a.w - di.dh; - du.dx = -di.dx; - du.dy = -di.dy; - - return du; -} - - -void test_dunion() -{ - box a = {0, 0, 1, 1}; - box dxa= {0+.0001, 0, 1, 1}; - box dya= {0, 0+.0001, 1, 1}; - box dwa= {0, 0, 1+.0001, 1}; - box dha= {0, 0, 1, 1+.0001}; - - box b = {.5, .5, .2, .2}; - dbox di = dunion(a,b); - printf("Union: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); - float inter = box_union(a, b); - float xinter = box_union(dxa, b); - float yinter = box_union(dya, b); - float winter = box_union(dwa, b); - float hinter = box_union(dha, b); - xinter = (xinter - inter)/(.0001); - yinter = (yinter - inter)/(.0001); - winter = (winter - inter)/(.0001); - hinter = (hinter - inter)/(.0001); - printf("Union Manual %f %f %f %f\n", xinter, yinter, winter, hinter); -} -void test_dintersect() -{ - box a = {0, 0, 1, 1}; - box dxa= {0+.0001, 0, 1, 1}; - box dya= {0, 0+.0001, 1, 1}; - box dwa= {0, 0, 1+.0001, 1}; - box dha= {0, 0, 1, 1+.0001}; - - box b = {.5, .5, .2, .2}; - dbox di = dintersect(a,b); - printf("Inter: %f %f %f %f\n", di.dx, di.dy, di.dw, di.dh); - float inter = box_intersection(a, b); - float xinter = box_intersection(dxa, b); - float yinter = box_intersection(dya, b); - float winter = box_intersection(dwa, b); - float hinter = box_intersection(dha, b); - xinter = (xinter - inter)/(.0001); - yinter = (yinter - inter)/(.0001); - winter = (winter - inter)/(.0001); - hinter = (hinter - inter)/(.0001); - printf("Inter Manual %f %f %f %f\n", xinter, yinter, winter, hinter); -} - -void test_box() -{ - test_dintersect(); - test_dunion(); - box a = {0, 0, 1, 1}; - box dxa= {0+.00001, 0, 1, 1}; - box dya= {0, 0+.00001, 1, 1}; - box dwa= {0, 0, 1+.00001, 1}; - box dha= {0, 0, 1, 1+.00001}; - - box b = {.5, 0, .2, .2}; - - float iou = box_iou(a,b); - iou = (1-iou)*(1-iou); - printf("%f\n", iou); - dbox d = diou(a, b); - printf("%f %f %f %f\n", d.dx, d.dy, d.dw, d.dh); - - float xiou = box_iou(dxa, b); - float yiou = box_iou(dya, b); - float wiou = box_iou(dwa, b); - float hiou = box_iou(dha, b); - xiou = ((1-xiou)*(1-xiou) - iou)/(.00001); - yiou = ((1-yiou)*(1-yiou) - iou)/(.00001); - wiou = ((1-wiou)*(1-wiou) - iou)/(.00001); - hiou = ((1-hiou)*(1-hiou) - iou)/(.00001); - printf("manual %f %f %f %f\n", xiou, yiou, wiou, hiou); -} - -dbox diou(box a, box b) -{ - float u = box_union(a, b); - float i = box_intersection(a, b); - dbox di = dintersect(a, b); - dbox du = dunion(a, b); - dbox dd = { 0,0,0,0 }; - - if (i <= 0 || 1) { - dd.dx = b.x - a.x; - dd.dy = b.y - a.y; - dd.dw = b.w - a.w; - dd.dh = b.h - a.h; - return dd; - } - - dd.dx = (di.dx*u - du.dx*i) / (u*u); - dd.dy = (di.dy*u - du.dy*i) / (u*u); - dd.dw = (di.dw*u - du.dw*i) / (u*u); - dd.dh = (di.dh*u - du.dh*i) / (u*u); - return dd; -} - -typedef struct{ - int index; - int class_id; - float **probs; -} sortable_bbox; - -int nms_comparator(const void *pa, const void *pb) -{ - sortable_bbox a = *(sortable_bbox *)pa; - sortable_bbox b = *(sortable_bbox *)pb; - float diff = a.probs[a.index][b.class_id] - b.probs[b.index][b.class_id]; - if(diff < 0) return 1; - else if(diff > 0) return -1; - return 0; -} - -void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh) -{ - int i, j, k; - sortable_bbox* s = (sortable_bbox*)xcalloc(total, sizeof(sortable_bbox)); - - for(i = 0; i < total; ++i){ - s[i].index = i; - s[i].class_id = 0; - s[i].probs = probs; - } - - for(k = 0; k < classes; ++k){ - for(i = 0; i < total; ++i){ - s[i].class_id = k; - } - qsort(s, total, sizeof(sortable_bbox), nms_comparator); - for(i = 0; i < total; ++i){ - if(probs[s[i].index][k] == 0) continue; - box a = boxes[s[i].index]; - for(j = i+1; j < total; ++j){ - box b = boxes[s[j].index]; - if (box_iou(a, b) > thresh){ - probs[s[j].index][k] = 0; - } - } - } - } - free(s); -} - -int nms_comparator_v3(const void *pa, const void *pb) -{ - detection a = *(detection *)pa; - detection b = *(detection *)pb; - float diff = 0; - if (b.sort_class >= 0) { - diff = a.prob[b.sort_class] - b.prob[b.sort_class]; // there is already: prob = objectness*prob - } - else { - diff = a.objectness - b.objectness; - } - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; -} - -void do_nms_obj(detection *dets, int total, int classes, float thresh) -{ - int i, j, k; - k = total - 1; - for (i = 0; i <= k; ++i) { - if (dets[i].objectness == 0) { - detection swap = dets[i]; - dets[i] = dets[k]; - dets[k] = swap; - --k; - --i; - } - } - total = k + 1; - - for (i = 0; i < total; ++i) { - dets[i].sort_class = -1; - } - - qsort(dets, total, sizeof(detection), nms_comparator_v3); - for (i = 0; i < total; ++i) { - if (dets[i].objectness == 0) continue; - box a = dets[i].bbox; - for (j = i + 1; j < total; ++j) { - if (dets[j].objectness == 0) continue; - box b = dets[j].bbox; - if (box_iou(a, b) > thresh) { - dets[j].objectness = 0; - for (k = 0; k < classes; ++k) { - dets[j].prob[k] = 0; - } - } - } - } -} - -void do_nms_sort(detection *dets, int total, int classes, float thresh) -{ - int i, j, k; - k = total - 1; - for (i = 0; i <= k; ++i) { - if (dets[i].objectness == 0) { - detection swap = dets[i]; - dets[i] = dets[k]; - dets[k] = swap; - --k; - --i; - } - } - total = k + 1; - - for (k = 0; k < classes; ++k) { - for (i = 0; i < total; ++i) { - dets[i].sort_class = k; - } - qsort(dets, total, sizeof(detection), nms_comparator_v3); - for (i = 0; i < total; ++i) { - //printf(" k = %d, \t i = %d \n", k, i); - if (dets[i].prob[k] == 0) continue; - box a = dets[i].bbox; - for (j = i + 1; j < total; ++j) { - box b = dets[j].bbox; - if (box_iou(a, b) > thresh) { - dets[j].prob[k] = 0; - } - } - } - } -} - -void do_nms(box *boxes, float **probs, int total, int classes, float thresh) -{ - int i, j, k; - for(i = 0; i < total; ++i){ - int any = 0; - for(k = 0; k < classes; ++k) any = any || (probs[i][k] > 0); - if(!any) { - continue; - } - for(j = i+1; j < total; ++j){ - if (box_iou(boxes[i], boxes[j]) > thresh){ - for(k = 0; k < classes; ++k){ - if (probs[i][k] < probs[j][k]) probs[i][k] = 0; - else probs[j][k] = 0; - } - } - } - } -} - -// https://github.com/Zzh-tju/DIoU-darknet -// https://arxiv.org/abs/1911.08287 -void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1) -{ - int i, j, k; - k = total - 1; - for (i = 0; i <= k; ++i) { - if (dets[i].objectness == 0) { - detection swap = dets[i]; - dets[i] = dets[k]; - dets[k] = swap; - --k; - --i; - } - } - total = k + 1; - - for (k = 0; k < classes; ++k) { - for (i = 0; i < total; ++i) { - dets[i].sort_class = k; - } - qsort(dets, total, sizeof(detection), nms_comparator_v3); - for (i = 0; i < total; ++i) - { - if (dets[i].prob[k] == 0) continue; - box a = dets[i].bbox; - for (j = i + 1; j < total; ++j) { - box b = dets[j].bbox; - if (box_iou(a, b) > thresh && nms_kind == CORNERS_NMS) - { - float sum_prob = pow(dets[i].prob[k], 2) + pow(dets[j].prob[k], 2); - float alpha_prob = pow(dets[i].prob[k], 2) / sum_prob; - float beta_prob = pow(dets[j].prob[k], 2) / sum_prob; - //dets[i].bbox.x = (dets[i].bbox.x*alpha_prob + dets[j].bbox.x*beta_prob); - //dets[i].bbox.y = (dets[i].bbox.y*alpha_prob + dets[j].bbox.y*beta_prob); - //dets[i].bbox.w = (dets[i].bbox.w*alpha_prob + dets[j].bbox.w*beta_prob); - //dets[i].bbox.h = (dets[i].bbox.h*alpha_prob + dets[j].bbox.h*beta_prob); - /* - if (dets[j].points == YOLO_CENTER && (dets[i].points & dets[j].points) == 0) { - dets[i].bbox.x = (dets[i].bbox.x*alpha_prob + dets[j].bbox.x*beta_prob); - dets[i].bbox.y = (dets[i].bbox.y*alpha_prob + dets[j].bbox.y*beta_prob); - } - else if ((dets[i].points & dets[j].points) == 0) { - dets[i].bbox.w = (dets[i].bbox.w*alpha_prob + dets[j].bbox.w*beta_prob); - dets[i].bbox.h = (dets[i].bbox.h*alpha_prob + dets[j].bbox.h*beta_prob); - } - dets[i].points |= dets[j].points; - */ - dets[j].prob[k] = 0; - } - else if (box_diou(a, b) > thresh && nms_kind == GREEDY_NMS) { - dets[j].prob[k] = 0; - } - else { - if (box_diounms(a, b, beta1) > thresh && nms_kind == DIOU_NMS) { - dets[j].prob[k] = 0; - } - } - } - - //if ((nms_kind == CORNERS_NMS) && (dets[i].points != (YOLO_CENTER | YOLO_LEFT_TOP | YOLO_RIGHT_BOTTOM))) - // dets[i].prob[k] = 0; - } - } -} - -box encode_box(box b, box anchor) -{ - box encode; - encode.x = (b.x - anchor.x) / anchor.w; - encode.y = (b.y - anchor.y) / anchor.h; - encode.w = log2(b.w / anchor.w); - encode.h = log2(b.h / anchor.h); - return encode; -} - -box decode_box(box b, box anchor) -{ - box decode; - decode.x = b.x * anchor.w + anchor.x; - decode.y = b.y * anchor.h + anchor.y; - decode.w = pow(2., b.w) * anchor.w; - decode.h = pow(2., b.h) * anchor.h; - return decode; -} diff --git a/src/Detector/darknet/src/box.h b/src/Detector/darknet/src/box.h deleted file mode 100644 index 4b720653c..000000000 --- a/src/Detector/darknet/src/box.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef BOX_H -#define BOX_H - -#include "darknet.h" - -//typedef struct{ -// float x, y, w, h; -//} box; - -typedef struct{ - float dx, dy, dw, dh; -} dbox; - -//typedef struct detection { -// box bbox; -// int classes; -// float *prob; -// float *mask; -// float objectness; -// int sort_class; -//} detection; - -typedef struct detection_with_class { - detection det; - // The most probable class id: the best class index in this->prob. - // Is filled temporary when processing results, otherwise not initialized - int best_class; -} detection_with_class; - -#ifdef __cplusplus -extern "C" { -#endif -box float_to_box(float *f); -box float_to_box_stride(float *f, int stride); -float box_iou(box a, box b); -float box_iou_kind(box a, box b, IOU_LOSS iou_kind); -float box_rmse(box a, box b); -dxrep dx_box_iou(box a, box b, IOU_LOSS iou_loss); -float box_giou(box a, box b); -float box_diou(box a, box b); -float box_ciou(box a, box b); -dbox diou(box a, box b); -boxabs to_tblr(box a); -void do_nms(box *boxes, float **probs, int total, int classes, float thresh); -void do_nms_sort_v2(box *boxes, float **probs, int total, int classes, float thresh); -//LIB_API void do_nms_sort(detection *dets, int total, int classes, float thresh); -//LIB_API void do_nms_obj(detection *dets, int total, int classes, float thresh); -//LIB_API void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIND nms_kind, float beta1); -box decode_box(box b, box anchor); -box encode_box(box b, box anchor); - -// Creates array of detections with prob > thresh and fills best_class for them -// Return number of selected detections in *selected_detections_num -detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/captcha.c b/src/Detector/darknet/src/captcha.c deleted file mode 100644 index 0cc159152..000000000 --- a/src/Detector/darknet/src/captcha.c +++ /dev/null @@ -1,363 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" - -void fix_data_captcha(data d, int mask) -{ - matrix labels = d.y; - int i, j; - for(i = 0; i < d.y.rows; ++i){ - for(j = 0; j < d.y.cols; j += 2){ - if (mask){ - if(!labels.vals[i][j]){ - labels.vals[i][j] = SECRET_NUM; - labels.vals[i][j+1] = SECRET_NUM; - }else if(labels.vals[i][j+1]){ - labels.vals[i][j] = 0; - } - } else{ - if (labels.vals[i][j]) { - labels.vals[i][j+1] = 0; - } else { - labels.vals[i][j+1] = 1; - } - } - } - } -} - -void train_captcha(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = 1024; - int i = *net.seen/imgs; - int solved = 1; - list *plist; - char** labels = get_labels("data/captcha/reimgs.labels.list"); - if (solved){ - plist = get_paths("data/captcha/reimgs.solved.list"); - }else{ - plist = get_paths("data/captcha/reimgs.raw.list"); - } - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - clock_t time; - pthread_t load_thread; - data train; - data buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.classes = 26; - args.n = imgs; - args.m = plist->size; - args.labels = labels; - args.d = &buffer; - args.type = CLASSIFICATION_DATA; - - load_thread = load_data_in_thread(args); - while(1){ - ++i; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - fix_data_captcha(train, solved); - - /* - image im = float_to_image(256, 256, 3, train.X.vals[114]); - show_image(im, "training"); - cvWaitKey(0); - */ - - load_thread = load_data_in_thread(args); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); - free_data(train); - if(i%100==0){ - char buff[256]; - sprintf(buff, "imagenet_backup/%s_%d.weights", base, i); - save_weights(net, buff); - } - } -} - -void test_captcha(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - int i = 0; - char** names = get_labels("data/captcha/reimgs.labels.list"); - char buff[256]; - char *input = buff; - int indexes[26]; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - //printf("Enter Image Path: "); - //fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input, net.w, net.h); - float *X = im.data; - float *predictions = network_predict(net, X); - top_predictions(net, 26, indexes); - //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - for(i = 0; i < 26; ++i){ - int index = indexes[i]; - if(i != 0) printf(", "); - printf("%s %f", names[index], predictions[index]); - } - printf("\n"); - fflush(stdout); - free_image(im); - if (filename) break; - } -} - -void valid_captcha(char *cfgfile, char *weightfile, char *filename) -{ - char** labels = get_labels("data/captcha/reimgs.labels.list"); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - list* plist = get_paths("data/captcha/reimgs.fg.list"); - char **paths = (char **)list_to_array(plist); - int N = plist->size; - int outputs = net.outputs; - - set_batch_network(&net, 1); - srand(2222222); - int i, j; - for(i = 0; i < N; ++i){ - if (i%100 == 0) fprintf(stderr, "%d\n", i); - image im = load_image_color(paths[i], net.w, net.h); - float *X = im.data; - float *predictions = network_predict(net, X); - //printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - int truth = -1; - for(j = 0; j < 13; ++j){ - if (strstr(paths[i], labels[j])) truth = j; - } - if (truth == -1){ - fprintf(stderr, "bad: %s\n", paths[i]); - return; - } - printf("%d, ", truth); - for(j = 0; j < outputs; ++j){ - if (j != 0) printf(", "); - printf("%f", predictions[j]); - } - printf("\n"); - fflush(stdout); - free_image(im); - if (filename) break; - } -} - -/* - void train_captcha(char *cfgfile, char *weightfile) - { - float avg_loss = -1; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = 1024; - int i = net.seen/imgs; - list *plist = get_paths("/data/captcha/train.auto5"); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - clock_t time; - while(1){ - ++i; - time=clock(); - data train = load_data_captcha(paths, imgs, plist->size, 10, 200, 60); - translate_data_rows(train, -128); - scale_data_rows(train, 1./128); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - net.seen += imgs; - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); - free_data(train); - if(i%10==0){ - char buff[256]; - sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); - save_weights(net, buff); - } - } - } - - void decode_captcha(char *cfgfile, char *weightfile) - { - setbuf(stdout, NULL); - srand(time(0)); - network net = parse_network_cfg(cfgfile); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - char filename[256]; - while(1){ - printf("Enter filename: "); - fgets(filename, 256, stdin); - strtok(filename, "\n"); - image im = load_image_color(filename, 300, 57); - scale_image(im, 1./255.); - float *X = im.data; - float *predictions = network_predict(net, X); - image out = float_to_image(300, 57, 1, predictions); - show_image(out, "decoded"); -#ifdef OPENCV -cvWaitKey(0); -#endif -free_image(im); -} -} - -void encode_captcha(char *cfgfile, char *weightfile) -{ -float avg_loss = -1; -srand(time(0)); -char *base = basecfg(cfgfile); -printf("%s\n", base); -network net = parse_network_cfg(cfgfile); -if(weightfile){ - load_weights(&net, weightfile); -} -printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); -int imgs = 1024; -int i = net.seen/imgs; -list *plist = get_paths("/data/captcha/encode.list"); -char **paths = (char **)list_to_array(plist); -printf("%d\n", plist->size); -clock_t time; -while(1){ - ++i; - time=clock(); - data train = load_data_captcha_encode(paths, imgs, plist->size, 300, 57); - scale_data_rows(train, 1./255); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - net.seen += imgs; - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); - free_matrix(train.X); - if(i%100==0){ - char buff[256]; - sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); - save_weights(net, buff); - } -} -} - -void validate_captcha(char *cfgfile, char *weightfile) -{ - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int numchars = 37; - list *plist = get_paths("/data/captcha/solved.hard"); - char **paths = (char **)list_to_array(plist); - int imgs = plist->size; - data valid = load_data_captcha(paths, imgs, 0, 10, 200, 60); - translate_data_rows(valid, -128); - scale_data_rows(valid, 1./128); - matrix pred = network_predict_data(net, valid); - int i, k; - int correct = 0; - int total = 0; - int accuracy = 0; - for(i = 0; i < imgs; ++i){ - int allcorrect = 1; - for(k = 0; k < 10; ++k){ - char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); - char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); - if (truth != prediction) allcorrect=0; - if (truth != '.' && truth == prediction) ++correct; - if (truth != '.' || truth != prediction) ++total; - } - accuracy += allcorrect; - } - printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); - free_data(valid); -} - -void test_captcha(char *cfgfile, char *weightfile) -{ - setbuf(stdout, NULL); - srand(time(0)); - //char *base = basecfg(cfgfile); - //printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - char filename[256]; - while(1){ - //printf("Enter filename: "); - fgets(filename, 256, stdin); - strtok(filename, "\n"); - image im = load_image_color(filename, 200, 60); - translate_image(im, -128); - scale_image(im, 1/128.); - float *X = im.data; - float *predictions = network_predict(net, X); - print_letters(predictions, 10); - free_image(im); - } -} - */ -void run_captcha(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "train")) train_captcha(cfg, weights); - else if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights, filename); - else if(0==strcmp(argv[2], "valid")) valid_captcha(cfg, weights, filename); - //if(0==strcmp(argv[2], "test")) test_captcha(cfg, weights); - //else if(0==strcmp(argv[2], "encode")) encode_captcha(cfg, weights); - //else if(0==strcmp(argv[2], "decode")) decode_captcha(cfg, weights); - //else if(0==strcmp(argv[2], "valid")) validate_captcha(cfg, weights); -} diff --git a/src/Detector/darknet/src/cifar.c b/src/Detector/darknet/src/cifar.c deleted file mode 100644 index 1b87cb5f0..000000000 --- a/src/Detector/darknet/src/cifar.c +++ /dev/null @@ -1,271 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" -#include "option_list.h" -#include "blas.h" - -void train_cifar(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - - char* backup_directory = "backup/"; - int classes = 10; - int N = 50000; - - char **labels = get_labels("data/cifar/labels.txt"); - int epoch = (*net.seen)/N; - data train = load_all_cifar10(); - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - clock_t time=clock(); - - float loss = train_network_sgd(net, train, 1); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); - } - if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s.backup",backup_directory,base); - save_weights(net, buff); - } - } - char buff[256]; - sprintf(buff, "%s/%s.weights", backup_directory, base); - save_weights(net, buff); - - free_network(net); - free_ptrs((void**)labels, classes); - free(base); - free_data(train); -} - -void train_cifar_distill(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - - char* backup_directory = "backup/"; - int classes = 10; - int N = 50000; - - char **labels = get_labels("data/cifar/labels.txt"); - int epoch = (*net.seen)/N; - - data train = load_all_cifar10(); - matrix soft = csv_to_matrix("results/ensemble.csv"); - - float weight = .9; - scale_matrix(soft, weight); - scale_matrix(train.y, 1. - weight); - matrix_add_matrix(soft, train.y); - - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - clock_t time=clock(); - - float loss = train_network_sgd(net, train, 1); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); - } - if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s.backup",backup_directory,base); - save_weights(net, buff); - } - } - char buff[256]; - sprintf(buff, "%s/%s.weights", backup_directory, base); - save_weights(net, buff); - - free_network(net); - free_ptrs((void**)labels, classes); - free(base); - free_data(train); -} - -void test_cifar_multi(char *filename, char *weightfile) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(time(0)); - - float avg_acc = 0; - data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); - - int i; - for(i = 0; i < test.X.rows; ++i){ - image im = float_to_image(32, 32, 3, test.X.vals[i]); - - float pred[10] = {0}; - - float *p = network_predict(net, im.data); - axpy_cpu(10, 1, p, 1, pred, 1); - flip_image(im); - p = network_predict(net, im.data); - axpy_cpu(10, 1, p, 1, pred, 1); - - int index = max_index(pred, 10); - int class_id = max_index(test.y.vals[i], 10); - if(index == class_id) avg_acc += 1; - free_image(im); - printf("%4d: %.2f%%\n", i, 100.*avg_acc/(i+1)); - } -} - -void test_cifar(char *filename, char *weightfile) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - clock_t time; - float avg_acc = 0; - float avg_top5 = 0; - data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); - - time=clock(); - - float *acc = network_accuracies(net, test, 2); - avg_acc += acc[0]; - avg_top5 += acc[1]; - printf("top1: %f, %lf seconds, %d images\n", avg_acc, sec(clock()-time), test.X.rows); - free_data(test); -} - -void extract_cifar() -{ -char *labels[] = {"airplane","automobile","bird","cat","deer","dog","frog","horse","ship","truck"}; - int i; - data train = load_all_cifar10(); - data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); - for(i = 0; i < train.X.rows; ++i){ - image im = float_to_image(32, 32, 3, train.X.vals[i]); - int class_id = max_index(train.y.vals[i], 10); - char buff[256]; - sprintf(buff, "data/cifar/train/%d_%s",i,labels[class_id]); - save_image_png(im, buff); - } - for(i = 0; i < test.X.rows; ++i){ - image im = float_to_image(32, 32, 3, test.X.vals[i]); - int class_id = max_index(test.y.vals[i], 10); - char buff[256]; - sprintf(buff, "data/cifar/test/%d_%s",i,labels[class_id]); - save_image_png(im, buff); - } -} - -void test_cifar_csv(char *filename, char *weightfile) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); - - matrix pred = network_predict_data(net, test); - - int i; - for(i = 0; i < test.X.rows; ++i){ - image im = float_to_image(32, 32, 3, test.X.vals[i]); - flip_image(im); - } - matrix pred2 = network_predict_data(net, test); - scale_matrix(pred, .5); - scale_matrix(pred2, .5); - matrix_add_matrix(pred2, pred); - - matrix_to_csv(pred); - fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); - free_data(test); -} - -void test_cifar_csvtrain(char *filename, char *weightfile) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - data test = load_all_cifar10(); - - matrix pred = network_predict_data(net, test); - - int i; - for(i = 0; i < test.X.rows; ++i){ - image im = float_to_image(32, 32, 3, test.X.vals[i]); - flip_image(im); - } - matrix pred2 = network_predict_data(net, test); - scale_matrix(pred, .5); - scale_matrix(pred2, .5); - matrix_add_matrix(pred2, pred); - - matrix_to_csv(pred); - fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); - free_data(test); -} - -void eval_cifar_csv() -{ - data test = load_cifar10_data("data/cifar/cifar-10-batches-bin/test_batch.bin"); - - matrix pred = csv_to_matrix("results/combined.csv"); - fprintf(stderr, "%d %d\n", pred.rows, pred.cols); - - fprintf(stderr, "Accuracy: %f\n", matrix_topk_accuracy(test.y, pred, 1)); - free_data(test); - free_matrix(pred); -} - - -void run_cifar(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - if(0==strcmp(argv[2], "train")) train_cifar(cfg, weights); - else if(0==strcmp(argv[2], "extract")) extract_cifar(); - else if(0==strcmp(argv[2], "distill")) train_cifar_distill(cfg, weights); - else if(0==strcmp(argv[2], "test")) test_cifar(cfg, weights); - else if(0==strcmp(argv[2], "multi")) test_cifar_multi(cfg, weights); - else if(0==strcmp(argv[2], "csv")) test_cifar_csv(cfg, weights); - else if(0==strcmp(argv[2], "csvtrain")) test_cifar_csvtrain(cfg, weights); - else if(0==strcmp(argv[2], "eval")) eval_cifar_csv(); -} diff --git a/src/Detector/darknet/src/classifier.c b/src/Detector/darknet/src/classifier.c deleted file mode 100644 index f84e1dc2a..000000000 --- a/src/Detector/darknet/src/classifier.c +++ /dev/null @@ -1,1417 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" -#include "option_list.h" -#include "blas.h" -#include "assert.h" -#include "classifier.h" -#include "dark_cuda.h" -#ifdef WIN32 -#include -#include "gettimeofday.h" -#else -#include -#endif - -float validate_classifier_single(char *datacfg, char *filename, char *weightfile, network *existing_net, int topk_custom); - -float *get_regression_values(char **labels, int n) -{ - float* v = (float*)xcalloc(n, sizeof(float)); - int i; - for(i = 0; i < n; ++i){ - char *p = strchr(labels[i], ' '); - *p = 0; - v[i] = atof(p+1); - } - return v; -} - -void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dontuse_opencv, int dont_show, int mjpeg_port, int calc_topk, int show_imgs, char* chart_path) -{ - int i; - - float avg_loss = -1; - float avg_contrastive_acc = 0; - char *base = basecfg(cfgfile); - printf("%s\n", base); - printf("%d\n", ngpus); - network* nets = (network*)xcalloc(ngpus, sizeof(network)); - - srand(time(0)); - int seed = rand(); - for(i = 0; i < ngpus; ++i){ - srand(seed); -#ifdef GPU - cuda_set_device(gpus[i]); -#endif - nets[i] = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&nets[i], weightfile); - } - if (clear) { - *nets[i].seen = 0; - *nets[i].cur_iteration = 0; - } - nets[i].learning_rate *= ngpus; - } - srand(time(0)); - network net = nets[0]; - - int imgs = net.batch * net.subdivisions * ngpus; - - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - list *options = read_data_cfg(datacfg); - - char *backup_directory = option_find_str(options, "backup", "/backup/"); - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *train_list = option_find_str(options, "train", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk_data = option_find_int(options, "top", 5); - char topk_buff[10]; - sprintf(topk_buff, "top%d", topk_data); - layer l = net.layers[net.n - 1]; - if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { - printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", - l.outputs, classes); - getchar(); - } - - char **labels = get_labels(label_list); - if (net.unsupervised) { - free(labels); - labels = NULL; - } - list *plist = get_paths(train_list); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - int train_images_num = plist->size; - clock_t time; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.c = net.c; - args.threads = 32; - if (net.contrastive && args.threads > net.batch/2) args.threads = net.batch / 2; - args.hierarchy = net.hierarchy; - - args.contrastive = net.contrastive; - args.dontuse_opencv = dontuse_opencv; - args.min = net.min_crop; - args.max = net.max_crop; - args.flip = net.flip; - args.blur = net.blur; - args.angle = net.angle; - args.aspect = net.aspect; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - args.size = net.w > net.h ? net.w : net.h; - - args.label_smooth_eps = net.label_smooth_eps; - args.mixup = net.mixup; - if (dont_show && show_imgs) show_imgs = 2; - args.show_imgs = show_imgs; - - args.paths = paths; - args.classes = classes; - args.n = imgs; - args.m = train_images_num; - args.labels = labels; - args.type = CLASSIFICATION_DATA; - -#ifdef OPENCV - //args.threads = 3; - mat_cv* img = NULL; - float max_img_loss = net.max_chart_loss; - int number_of_lines = 100; - int img_size = 1000; - char windows_name[100]; - sprintf(windows_name, "chart_%s.png", base); - if (!dontuse_opencv) img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show, chart_path); -#endif //OPENCV - - data train; - data buffer; - pthread_t load_thread; - args.d = &buffer; - load_thread = load_data(args); - - int iter_save = get_current_batch(net); - int iter_save_last = get_current_batch(net); - int iter_topk = get_current_batch(net); - float topk = 0; - - int count = 0; - double start, time_remaining, avg_time = -1, alpha_time = 0.01; - start = what_time_is_it_now(); - - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - time=clock(); - - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - - float loss = 0; -#ifdef GPU - if(ngpus == 1){ - loss = train_network(net, train); - } else { - loss = train_networks(nets, ngpus, train, 4); - } -#else - loss = train_network(net, train); -#endif - if(avg_loss == -1 || isnan(avg_loss) || isinf(avg_loss)) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - i = get_current_batch(net); - - int calc_topk_for_each = iter_topk + 2 * train_images_num / (net.batch * net.subdivisions); // calculate TOPk for each 2 Epochs - calc_topk_for_each = fmax(calc_topk_for_each, net.burn_in); - calc_topk_for_each = fmax(calc_topk_for_each, 100); - if (i % 10 == 0) { - if (calc_topk) { - fprintf(stderr, "\n (next TOP%d calculation at %d iterations) ", topk_data, calc_topk_for_each); - if (topk > 0) fprintf(stderr, " Last accuracy TOP%d = %2.2f %% \n", topk_data, topk * 100); - } - - if (net.cudnn_half) { - if (i < net.burn_in * 3) fprintf(stderr, " Tensor Cores are disabled until the first %d iterations are reached.\n", 3 * net.burn_in); - else fprintf(stderr, " Tensor Cores are used.\n"); - } - } - - int draw_precision = 0; - if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) { - iter_topk = i; - if (net.contrastive && l.type != SOFTMAX && l.type != COST) { - int k; - for (k = 0; k < net.n; ++k) if (net.layers[k].type == CONTRASTIVE) break; - topk = *(net.layers[k].loss) / 100; - sprintf(topk_buff, "Contr"); - } - else { - topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n - printf("\n accuracy %s = %f \n", topk_buff, topk); - } - draw_precision = 1; - } - - time_remaining = ((net.max_batches - i) / ngpus) * (what_time_is_it_now() - start) / 60 / 60; - // set initial value, even if resume training from 10000 iteration - if (avg_time < 0) avg_time = time_remaining; - else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time; - start = what_time_is_it_now(); - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images, %f hours left\n", get_current_batch(net), (float)(*net.seen)/ train_images_num, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen, avg_time); -#ifdef OPENCV - if (net.contrastive) { - float cur_con_acc = -1; - int k; - for (k = 0; k < net.n; ++k) - if (net.layers[k].type == CONTRASTIVE) cur_con_acc = *net.layers[k].loss; - if (cur_con_acc >= 0) avg_contrastive_acc = avg_contrastive_acc*0.99 + cur_con_acc * 0.01; - printf(" avg_contrastive_acc = %f \n", avg_contrastive_acc); - } - if (!dontuse_opencv) draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, i, net.max_batches, topk, draw_precision, topk_buff, avg_contrastive_acc / 100, dont_show, mjpeg_port, avg_time); -#endif // OPENCV - - if (i >= (iter_save + 1000)) { - iter_save = i; -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - - if (i >= (iter_save_last + 100)) { - iter_save_last = i; -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_last.weights", backup_directory, base); - save_weights(net, buff); - } - free_data(train); - } -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); - -#ifdef OPENCV - release_mat(&img); - destroy_all_windows_cv(); -#endif - - pthread_join(load_thread, 0); - free_data(buffer); - - //free_network(net); - for (i = 0; i < ngpus; ++i) free_network(nets[i]); - free(nets); - - //free_ptrs((void**)labels, classes); - if(labels) free(labels); - free_ptrs((void**)paths, plist->size); - free_list(plist); - free(base); - - free_list_contents_kvp(options); - free_list(options); - -} - - -/* - void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear) - { - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - if(clear) *net.seen = 0; - - int imgs = net.batch * net.subdivisions; - - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - list *options = read_data_cfg(datacfg); - - char *backup_directory = option_find_str(options, "backup", "/backup/"); - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *train_list = option_find_str(options, "train", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - - char **labels = get_labels(label_list); - list *plist = get_paths(train_list); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - int N = plist->size; - clock_t time; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.threads = 8; - - args.min = net.min_crop; - args.max = net.max_crop; - args.flip = net.flip; - args.angle = net.angle; - args.aspect = net.aspect; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - args.size = net.w; - args.hierarchy = net.hierarchy; - - args.paths = paths; - args.classes = classes; - args.n = imgs; - args.m = N; - args.labels = labels; - args.type = CLASSIFICATION_DATA; - - data train; - data buffer; - pthread_t load_thread; - args.d = &buffer; - load_thread = load_data(args); - - int epoch = (*net.seen)/N; - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - time=clock(); - - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - -#ifdef OPENCV -if(0){ -int u; -for(u = 0; u < imgs; ++u){ - image im = float_to_image(net.w, net.h, 3, train.X.vals[u]); - show_image(im, "loaded"); - cvWaitKey(0); -} -} -#endif - -float loss = train_network(net, train); -free_data(train); - -if(avg_loss == -1) avg_loss = loss; -avg_loss = avg_loss*.9 + loss*.1; -printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); -if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); -} -if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s.backup",backup_directory,base); - save_weights(net, buff); -} -} -char buff[256]; -sprintf(buff, "%s/%s.weights", backup_directory, base); -save_weights(net, buff); - -free_network(net); -free_ptrs((void**)labels, classes); -free_ptrs((void**)paths, plist->size); -free_list(plist); -free(base); -} -*/ - -void validate_classifier_crop(char *datacfg, char *filename, char *weightfile) -{ - int i = 0; - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *valid_list = option_find_str(options, "valid", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "top", 1); - if (topk > classes) topk = classes; - - char **labels = get_labels(label_list); - list *plist = get_paths(valid_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - clock_t time; - float avg_acc = 0; - float avg_topk = 0; - int splits = m/1000; - int num = (i+1)*m/splits - i*m/splits; - - data val, buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - - args.paths = paths; - args.classes = classes; - args.n = num; - args.m = 0; - args.labels = labels; - args.d = &buffer; - args.type = OLD_CLASSIFICATION_DATA; - - pthread_t load_thread = load_data_in_thread(args); - for(i = 1; i <= splits; ++i){ - time=clock(); - - pthread_join(load_thread, 0); - val = buffer; - - num = (i+1)*m/splits - i*m/splits; - char **part = paths+(i*m/splits); - if(i != splits){ - args.paths = part; - load_thread = load_data_in_thread(args); - } - printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); - - time=clock(); - float *acc = network_accuracies(net, val, topk); - avg_acc += acc[0]; - avg_topk += acc[1]; - printf("%d: top 1: %f, top %d: %f, %lf seconds, %d images\n", i, avg_acc/i, topk, avg_topk/i, sec(clock()-time), val.X.rows); - free_data(val); - } -} - -void validate_classifier_10(char *datacfg, char *filename, char *weightfile) -{ - int i, j; - network net = parse_network_cfg(filename); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *valid_list = option_find_str(options, "valid", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "top", 1); - if (topk > classes) topk = classes; - - char **labels = get_labels(label_list); - list *plist = get_paths(valid_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - float avg_acc = 0; - float avg_topk = 0; - int* indexes = (int*)xcalloc(topk, sizeof(int)); - - for(i = 0; i < m; ++i){ - int class_id = -1; - char *path = paths[i]; - for(j = 0; j < classes; ++j){ - if(strstr(path, labels[j])){ - class_id = j; - break; - } - } - int w = net.w; - int h = net.h; - int shift = 32; - image im = load_image_color(paths[i], w+shift, h+shift); - image images[10]; - images[0] = crop_image(im, -shift, -shift, w, h); - images[1] = crop_image(im, shift, -shift, w, h); - images[2] = crop_image(im, 0, 0, w, h); - images[3] = crop_image(im, -shift, shift, w, h); - images[4] = crop_image(im, shift, shift, w, h); - flip_image(im); - images[5] = crop_image(im, -shift, -shift, w, h); - images[6] = crop_image(im, shift, -shift, w, h); - images[7] = crop_image(im, 0, 0, w, h); - images[8] = crop_image(im, -shift, shift, w, h); - images[9] = crop_image(im, shift, shift, w, h); - float* pred = (float*)xcalloc(classes, sizeof(float)); - for(j = 0; j < 10; ++j){ - float *p = network_predict(net, images[j].data); - if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1); - axpy_cpu(classes, 1, p, 1, pred, 1); - free_image(images[j]); - } - free_image(im); - top_k(pred, classes, topk, indexes); - free(pred); - if(indexes[0] == class_id) avg_acc += 1; - for(j = 0; j < topk; ++j){ - if(indexes[j] == class_id) avg_topk += 1; - } - - printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); - } - free(indexes); -} - -void validate_classifier_full(char *datacfg, char *filename, char *weightfile) -{ - int i, j; - network net = parse_network_cfg(filename); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *valid_list = option_find_str(options, "valid", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "top", 1); - if (topk > classes) topk = classes; - - char **labels = get_labels(label_list); - list *plist = get_paths(valid_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - float avg_acc = 0; - float avg_topk = 0; - int* indexes = (int*)xcalloc(topk, sizeof(int)); - - int size = net.w; - for(i = 0; i < m; ++i){ - int class_id = -1; - char *path = paths[i]; - for(j = 0; j < classes; ++j){ - if(strstr(path, labels[j])){ - class_id = j; - break; - } - } - image im = load_image_color(paths[i], 0, 0); - image resized = resize_min(im, size); - resize_network(&net, resized.w, resized.h); - //show_image(im, "orig"); - //show_image(crop, "cropped"); - //cvWaitKey(0); - float *pred = network_predict(net, resized.data); - if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1); - - free_image(im); - free_image(resized); - top_k(pred, classes, topk, indexes); - - if(indexes[0] == class_id) avg_acc += 1; - for(j = 0; j < topk; ++j){ - if(indexes[j] == class_id) avg_topk += 1; - } - - printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); - } - free(indexes); -} - - -float validate_classifier_single(char *datacfg, char *filename, char *weightfile, network *existing_net, int topk_custom) -{ - int i, j; - network net; - int old_batch = -1; - if (existing_net) { - net = *existing_net; // for validation during training - old_batch = net.batch; - set_batch_network(&net, 1); - } - else { - net = parse_network_cfg_custom(filename, 1, 0); - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *leaf_list = option_find_str(options, "leaves", 0); - if(leaf_list) change_leaves(net.hierarchy, leaf_list); - char *valid_list = option_find_str(options, "valid", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "top", 1); - if (topk_custom > 0) topk = topk_custom; // for validation during training - if (topk > classes) topk = classes; - printf(" TOP calculation...\n"); - - char **labels = get_labels(label_list); - list *plist = get_paths(valid_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - float avg_acc = 0; - float avg_topk = 0; - int* indexes = (int*)xcalloc(topk, sizeof(int)); - - for(i = 0; i < m; ++i){ - int class_id = -1; - char *path = paths[i]; - for(j = 0; j < classes; ++j){ - if(strstr(path, labels[j])){ - class_id = j; - break; - } - } - image im = load_image_color(paths[i], 0, 0); - image resized = resize_min(im, net.w); - image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h); - //show_image(im, "orig"); - //show_image(crop, "cropped"); - //cvWaitKey(0); - float *pred = network_predict(net, crop.data); - if(net.hierarchy) hierarchy_predictions(pred, net.outputs, net.hierarchy, 1); - - if(resized.data != im.data) free_image(resized); - free_image(im); - free_image(crop); - top_k(pred, classes, topk, indexes); - - if(indexes[0] == class_id) avg_acc += 1; - for(j = 0; j < topk; ++j){ - if(indexes[j] == class_id) avg_topk += 1; - } - - if (existing_net) printf("\r"); - else printf("\n"); - printf("%d: top 1: %f, top %d: %f", i, avg_acc/(i+1), topk, avg_topk/(i+1)); - } - free(indexes); - if (existing_net) { - set_batch_network(&net, old_batch); - } - float topk_result = avg_topk / i; - return topk_result; -} - -void validate_classifier_multi(char *datacfg, char *filename, char *weightfile) -{ - int i, j; - network net = parse_network_cfg(filename); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "labels", "data/labels.list"); - char *valid_list = option_find_str(options, "valid", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - int topk = option_find_int(options, "top", 1); - if (topk > classes) topk = classes; - - char **labels = get_labels(label_list); - list *plist = get_paths(valid_list); - int scales[] = {224, 288, 320, 352, 384}; - int nscales = sizeof(scales)/sizeof(scales[0]); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - float avg_acc = 0; - float avg_topk = 0; - int* indexes = (int*)xcalloc(topk, sizeof(int)); - - for(i = 0; i < m; ++i){ - int class_id = -1; - char *path = paths[i]; - for(j = 0; j < classes; ++j){ - if(strstr(path, labels[j])){ - class_id = j; - break; - } - } - float* pred = (float*)xcalloc(classes, sizeof(float)); - image im = load_image_color(paths[i], 0, 0); - for(j = 0; j < nscales; ++j){ - image r = resize_min(im, scales[j]); - resize_network(&net, r.w, r.h); - float *p = network_predict(net, r.data); - if(net.hierarchy) hierarchy_predictions(p, net.outputs, net.hierarchy, 1); - axpy_cpu(classes, 1, p, 1, pred, 1); - flip_image(r); - p = network_predict(net, r.data); - axpy_cpu(classes, 1, p, 1, pred, 1); - if(r.data != im.data) free_image(r); - } - free_image(im); - top_k(pred, classes, topk, indexes); - free(pred); - if(indexes[0] == class_id) avg_acc += 1; - for(j = 0; j < topk; ++j){ - if(indexes[j] == class_id) avg_topk += 1; - } - - printf("%d: top 1: %f, top %d: %f\n", i, avg_acc/(i+1), topk, avg_topk/(i+1)); - } - free(indexes); -} - -void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) -{ - network net = parse_network_cfg_custom(cfgfile, 1, 0); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - - list *options = read_data_cfg(datacfg); - - char *name_list = option_find_str(options, "names", 0); - if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); - int classes = option_find_int(options, "classes", 2); - int top = option_find_int(options, "top", 1); - if (top > classes) top = classes; - - char **names = get_labels(name_list); - clock_t time; - int* indexes = (int*)xcalloc(top, sizeof(int)); - char buff[256]; - char *input = buff; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) break; - strtok(input, "\n"); - } - image orig = load_image_color(input, 0, 0); - image r = resize_min(orig, 256); - image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); - float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; - float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; - float var[3]; - var[0] = std[0]*std[0]; - var[1] = std[1]*std[1]; - var[2] = std[2]*std[2]; - - normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); - - float *X = im.data; - time=clock(); - float *predictions = network_predict(net, X); - - layer l = net.layers[layer_num]; - int i; - for(i = 0; i < l.c; ++i){ - if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); - } -#ifdef GPU - cuda_pull_array(l.output_gpu, l.output, l.outputs); -#endif - for(i = 0; i < l.outputs; ++i){ - printf("%f\n", l.output[i]); - } - /* - - printf("\n\nWeights\n"); - for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ - printf("%f\n", l.filters[i]); - } - - printf("\n\nBiases\n"); - for(i = 0; i < l.n; ++i){ - printf("%f\n", l.biases[i]); - } - */ - - top_predictions(net, top, indexes); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - for(i = 0; i < top; ++i){ - int index = indexes[i]; - printf("%s: %f\n", names[index], predictions[index]); - } - free_image(im); - if (filename) break; - } - free(indexes); -} - -void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top) -{ - network net = parse_network_cfg_custom(cfgfile, 1, 0); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - - list *options = read_data_cfg(datacfg); - - char *name_list = option_find_str(options, "names", 0); - if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); - int classes = option_find_int(options, "classes", 2); - printf(" classes = %d, output in cfg = %d \n", classes, net.layers[net.n - 1].c); - layer l = net.layers[net.n - 1]; - if (classes != l.outputs && (l.type == SOFTMAX || l.type == COST)) { - printf("\n Error: num of filters = %d in the last conv-layer in cfg-file doesn't match to classes = %d in data-file \n", - l.outputs, classes); - getchar(); - } - if (top == 0) top = option_find_int(options, "top", 1); - if (top > classes) top = classes; - - int i = 0; - char **names = get_labels(name_list); - clock_t time; - int* indexes = (int*)xcalloc(top, sizeof(int)); - char buff[256]; - char *input = buff; - //int size = net.w; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) break; - strtok(input, "\n"); - } - image im = load_image_color(input, 0, 0); - image resized = resize_min(im, net.w); - image cropped = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h); - printf("%d %d\n", cropped.w, cropped.h); - - float *X = cropped.data; - - double time = get_time_point(); - float *predictions = network_predict(net, X); - printf("%s: Predicted in %lf milli-seconds.\n", input, ((double)get_time_point() - time) / 1000); - - if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 0); - top_k(predictions, net.outputs, top, indexes); - - for(i = 0; i < top; ++i){ - int index = indexes[i]; - if(net.hierarchy) printf("%d, %s: %f, parent: %s \n",index, names[index], predictions[index], (net.hierarchy->parent[index] >= 0) ? names[net.hierarchy->parent[index]] : "Root"); - else printf("%s: %f\n",names[index], predictions[index]); - } - - free_image(cropped); - if (resized.data != im.data) { - free_image(resized); - } - free_image(im); - - if (filename) break; - } - free(indexes); - free_network(net); - free_list_contents_kvp(options); - free_list(options); -} - - -void label_classifier(char *datacfg, char *filename, char *weightfile) -{ - int i; - network net = parse_network_cfg(filename); - set_batch_network(&net, 1); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *options = read_data_cfg(datacfg); - - char *label_list = option_find_str(options, "names", "data/labels.list"); - char *test_list = option_find_str(options, "test", "data/train.list"); - int classes = option_find_int(options, "classes", 2); - - char **labels = get_labels(label_list); - list *plist = get_paths(test_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - for(i = 0; i < m; ++i){ - image im = load_image_color(paths[i], 0, 0); - image resized = resize_min(im, net.w); - image crop = crop_image(resized, (resized.w - net.w)/2, (resized.h - net.h)/2, net.w, net.h); - float *pred = network_predict(net, crop.data); - - if(resized.data != im.data) free_image(resized); - free_image(im); - free_image(crop); - int ind = max_index(pred, classes); - - printf("%s\n", labels[ind]); - } -} - - -void test_classifier(char *datacfg, char *cfgfile, char *weightfile, int target_layer) -{ - int curr = 0; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - - list *options = read_data_cfg(datacfg); - - char *test_list = option_find_str(options, "test", "data/test.list"); - int classes = option_find_int(options, "classes", 2); - - list *plist = get_paths(test_list); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - clock_t time; - - data val, buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.classes = classes; - args.n = net.batch; - args.m = 0; - args.labels = 0; - args.d = &buffer; - args.type = OLD_CLASSIFICATION_DATA; - - pthread_t load_thread = load_data_in_thread(args); - for(curr = net.batch; curr < m; curr += net.batch){ - time=clock(); - - pthread_join(load_thread, 0); - val = buffer; - - if(curr < m){ - args.paths = paths + curr; - if (curr + net.batch > m) args.n = m - curr; - load_thread = load_data_in_thread(args); - } - fprintf(stderr, "Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); - - time=clock(); - matrix pred = network_predict_data(net, val); - - int i, j; - if (target_layer >= 0){ - //layer l = net.layers[target_layer]; - } - - for(i = 0; i < pred.rows; ++i){ - printf("%s", paths[curr-net.batch+i]); - for(j = 0; j < pred.cols; ++j){ - printf("\t%g", pred.vals[i][j]); - } - printf("\n"); - } - - free_matrix(pred); - - fprintf(stderr, "%lf seconds, %d images, %d total\n", sec(clock()-time), val.X.rows, curr); - free_data(val); - } -} - - -void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) -{ -#ifdef OPENCV - float threat = 0; - float roll = .2; - - printf("Classifier Demo\n"); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - list *options = read_data_cfg(datacfg); - - srand(2222222); - cap_cv * cap; - - if (filename) { - //cap = cvCaptureFromFile(filename); - cap = get_capture_video_stream(filename); - } - else { - //cap = cvCaptureFromCAM(cam_index); - cap = get_capture_webcam(cam_index); - } - - int classes = option_find_int(options, "classes", 2); - int top = option_find_int(options, "top", 1); - if (top > classes) top = classes; - - char *name_list = option_find_str(options, "names", 0); - char **names = get_labels(name_list); - - int* indexes = (int*)xcalloc(top, sizeof(int)); - - if(!cap) error("Couldn't connect to webcam.\n"); - create_window_cv("Threat", 0, 512, 512); - float fps = 0; - int i; - - int count = 0; - - while(1){ - ++count; - struct timeval tval_before, tval_after, tval_result; - gettimeofday(&tval_before, NULL); - - //image in = get_image_from_stream(cap); - image in = get_image_from_stream_cpp(cap); - if(!in.data) break; - image in_s = resize_image(in, net.w, net.h); - - image out = in; - int x1 = out.w / 20; - int y1 = out.h / 20; - int x2 = 2*x1; - int y2 = out.h - out.h/20; - - int border = .01*out.h; - int h = y2 - y1 - 2*border; - int w = x2 - x1 - 2*border; - - float *predictions = network_predict(net, in_s.data); - float curr_threat = 0; - if(1){ - curr_threat = predictions[0] * 0 + - predictions[1] * .6 + - predictions[2]; - } else { - curr_threat = predictions[218] + - predictions[539] + - predictions[540] + - predictions[368] + - predictions[369] + - predictions[370]; - } - threat = roll * curr_threat + (1-roll) * threat; - - draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0); - if(threat > .97) { - draw_box_width(out, x2 + .5 * w + border, - y1 + .02*h - 2*border, - x2 + .5 * w + 6*border, - y1 + .02*h + 3*border, 3*border, 1,0,0); - } - draw_box_width(out, x2 + .5 * w + border, - y1 + .02*h - 2*border, - x2 + .5 * w + 6*border, - y1 + .02*h + 3*border, .5*border, 0,0,0); - draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0); - if(threat > .57) { - draw_box_width(out, x2 + .5 * w + border, - y1 + .42*h - 2*border, - x2 + .5 * w + 6*border, - y1 + .42*h + 3*border, 3*border, 1,1,0); - } - draw_box_width(out, x2 + .5 * w + border, - y1 + .42*h - 2*border, - x2 + .5 * w + 6*border, - y1 + .42*h + 3*border, .5*border, 0,0,0); - - draw_box_width(out, x1, y1, x2, y2, border, 0,0,0); - for(i = 0; i < threat * h ; ++i){ - float ratio = (float) i / h; - float r = (ratio < .5) ? (2*(ratio)) : 1; - float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5); - draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0); - } - top_predictions(net, top, indexes); - char buff[256]; - sprintf(buff, "tmp/threat_%06d", count); - //save_image(out, buff); - -#ifndef _WIN32 - printf("\033[2J"); - printf("\033[1;1H"); -#endif - printf("\nFPS:%.0f\n",fps); - - for(i = 0; i < top; ++i){ - int index = indexes[i]; - printf("%.1f%%: %s\n", predictions[index]*100, names[index]); - } - - if(1){ - show_image(out, "Threat"); - wait_key_cv(10); - } - free_image(in_s); - free_image(in); - - gettimeofday(&tval_after, NULL); - timersub(&tval_after, &tval_before, &tval_result); - float curr = 1000000.f/((long int)tval_result.tv_usec); - fps = .9*fps + .1*curr; - } -#endif -} - - -void gun_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename) -{ -#ifdef OPENCV_DISABLE - int bad_cats[] = {218, 539, 540, 1213, 1501, 1742, 1911, 2415, 4348, 19223, 368, 369, 370, 1133, 1200, 1306, 2122, 2301, 2537, 2823, 3179, 3596, 3639, 4489, 5107, 5140, 5289, 6240, 6631, 6762, 7048, 7171, 7969, 7984, 7989, 8824, 8927, 9915, 10270, 10448, 13401, 15205, 18358, 18894, 18895, 19249, 19697}; - - printf("Classifier Demo\n"); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - list *options = read_data_cfg(datacfg); - - srand(2222222); - CvCapture * cap; - - if (filename) { - //cap = cvCaptureFromFile(filename); - cap = get_capture_video_stream(filename); - } - else { - //cap = cvCaptureFromCAM(cam_index); - cap = get_capture_webcam(cam_index); - } - - int classes = option_find_int(options, "classes", 2); - int top = option_find_int(options, "top", 1); - if (top > classes) top = classes; - - char *name_list = option_find_str(options, "names", 0); - char **names = get_labels(name_list); - - int* indexes = (int*)xcalloc(top, sizeof(int)); - - if(!cap) error("Couldn't connect to webcam.\n"); - cvNamedWindow("Threat Detection", CV_WINDOW_NORMAL); - cvResizeWindow("Threat Detection", 512, 512); - float fps = 0; - int i; - - while(1){ - struct timeval tval_before, tval_after, tval_result; - gettimeofday(&tval_before, NULL); - - //image in = get_image_from_stream(cap); - image in = get_image_from_stream_cpp(cap); - image in_s = resize_image(in, net.w, net.h); - show_image(in, "Threat Detection"); - - float *predictions = network_predict(net, in_s.data); - top_predictions(net, top, indexes); - - printf("\033[2J"); - printf("\033[1;1H"); - - int threat = 0; - for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ - int index = bad_cats[i]; - if(predictions[index] > .01){ - printf("Threat Detected!\n"); - threat = 1; - break; - } - } - if(!threat) printf("Scanning...\n"); - for(i = 0; i < sizeof(bad_cats)/sizeof(bad_cats[0]); ++i){ - int index = bad_cats[i]; - if(predictions[index] > .01){ - printf("%s\n", names[index]); - } - } - - free_image(in_s); - free_image(in); - - cvWaitKey(10); - - gettimeofday(&tval_after, NULL); - timersub(&tval_after, &tval_before, &tval_result); - float curr = 1000000.f/((long int)tval_result.tv_usec); - fps = .9*fps + .1*curr; - } -#endif -} - -void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename, int benchmark, int benchmark_layers) -{ -#ifdef OPENCV - printf("Classifier Demo\n"); - network net = parse_network_cfg_custom(cfgfile, 1, 0); - if(weightfile){ - load_weights(&net, weightfile); - } - net.benchmark_layers = benchmark_layers; - set_batch_network(&net, 1); - list *options = read_data_cfg(datacfg); - - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - - srand(2222222); - cap_cv * cap; - - if(filename){ - cap = get_capture_video_stream(filename); - }else{ - cap = get_capture_webcam(cam_index); - } - - int classes = option_find_int(options, "classes", 2); - int top = option_find_int(options, "top", 1); - if (top > classes) top = classes; - - char *name_list = option_find_str(options, "names", 0); - char **names = get_labels(name_list); - - int* indexes = (int*)xcalloc(top, sizeof(int)); - - if(!cap) error("Couldn't connect to webcam.\n"); - if (!benchmark) create_window_cv("Classifier", 0, 512, 512); - float fps = 0; - int i; - - double start_time = get_time_point(); - float avg_fps = 0; - int frame_counter = 0; - - while(1){ - struct timeval tval_before, tval_after, tval_result; - gettimeofday(&tval_before, NULL); - - //image in = get_image_from_stream(cap); - image in_s, in; - if (!benchmark) { - in = get_image_from_stream_cpp(cap); - in_s = resize_image(in, net.w, net.h); - show_image(in, "Classifier"); - } - else { - static image tmp; - if (!tmp.data) tmp = make_image(net.w, net.h, 3); - in_s = tmp; - } - - double time = get_time_point(); - float *predictions = network_predict(net, in_s.data); - double frame_time_ms = (get_time_point() - time)/1000; - frame_counter++; - - if(net.hierarchy) hierarchy_predictions(predictions, net.outputs, net.hierarchy, 1); - top_predictions(net, top, indexes); - -#ifndef _WIN32 - printf("\033[2J"); - printf("\033[1;1H"); -#endif - - - if (!benchmark) { - printf("\rFPS: %.2f (use -benchmark command line flag for correct measurement)\n", fps); - for (i = 0; i < top; ++i) { - int index = indexes[i]; - printf("%.1f%%: %s\n", predictions[index] * 100, names[index]); - } - printf("\n"); - - free_image(in_s); - free_image(in); - - int c = wait_key_cv(10);// cvWaitKey(10); - if (c == 27 || c == 1048603) break; - } - else { - printf("\rFPS: %.2f \t AVG_FPS = %.2f ", fps, avg_fps); - } - - //gettimeofday(&tval_after, NULL); - //timersub(&tval_after, &tval_before, &tval_result); - //float curr = 1000000.f/((long int)tval_result.tv_usec); - float curr = 1000.f / frame_time_ms; - if (fps == 0) fps = curr; - else fps = .9*fps + .1*curr; - - float spent_time = (get_time_point() - start_time) / 1000000; - if (spent_time >= 3.0f) { - //printf(" spent_time = %f \n", spent_time); - avg_fps = frame_counter / spent_time; - frame_counter = 0; - start_time = get_time_point(); - } - } -#endif -} - - -void run_classifier(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); - char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); - int *gpus = 0; - int gpu = 0; - int ngpus = 0; - if(gpu_list){ - printf("%s\n", gpu_list); - int len = strlen(gpu_list); - ngpus = 1; - int i; - for(i = 0; i < len; ++i){ - if (gpu_list[i] == ',') ++ngpus; - } - gpus = (int*)xcalloc(ngpus, sizeof(int)); - for(i = 0; i < ngpus; ++i){ - gpus[i] = atoi(gpu_list); - gpu_list = strchr(gpu_list, ',')+1; - } - } else { - gpu = gpu_index; - gpus = &gpu; - ngpus = 1; - } - - int dont_show = find_arg(argc, argv, "-dont_show"); - int benchmark = find_arg(argc, argv, "-benchmark"); - int benchmark_layers = find_arg(argc, argv, "-benchmark_layers"); - if (benchmark_layers) benchmark = 1; - int dontuse_opencv = find_arg(argc, argv, "-dontuse_opencv"); - int show_imgs = find_arg(argc, argv, "-show_imgs"); - int calc_topk = find_arg(argc, argv, "-topk"); - int cam_index = find_int_arg(argc, argv, "-c", 0); - int top = find_int_arg(argc, argv, "-t", 0); - int clear = find_arg(argc, argv, "-clear"); - char *data = argv[3]; - char *cfg = argv[4]; - char *weights = (argc > 5) ? argv[5] : 0; - char *filename = (argc > 6) ? argv[6]: 0; - char *layer_s = (argc > 7) ? argv[7]: 0; - int layer = layer_s ? atoi(layer_s) : -1; - char* chart_path = find_char_arg(argc, argv, "-chart", 0); - if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename, top); - else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s)); - else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, gpus, ngpus, clear, dontuse_opencv, dont_show, mjpeg_port, calc_topk, show_imgs, chart_path); - else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename, benchmark, benchmark_layers); - else if(0==strcmp(argv[2], "gun")) gun_classifier(data, cfg, weights, cam_index, filename); - else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename); - else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer); - else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights, NULL, -1); - else if(0==strcmp(argv[2], "validmulti")) validate_classifier_multi(data, cfg, weights); - else if(0==strcmp(argv[2], "valid10")) validate_classifier_10(data, cfg, weights); - else if(0==strcmp(argv[2], "validcrop")) validate_classifier_crop(data, cfg, weights); - else if(0==strcmp(argv[2], "validfull")) validate_classifier_full(data, cfg, weights); - - if (gpus && gpu_list && ngpus > 1) free(gpus); -} diff --git a/src/Detector/darknet/src/classifier.h b/src/Detector/darknet/src/classifier.h deleted file mode 100644 index d94417d3f..000000000 --- a/src/Detector/darknet/src/classifier.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef CLASSIFIER_H -#define CLASSIFIER_H - -#include "list.h" -#ifdef __cplusplus -extern "C" { -#endif -list *read_data_cfg(char *filename); -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/coco.c b/src/Detector/darknet/src/coco.c deleted file mode 100644 index 8c386a224..000000000 --- a/src/Detector/darknet/src/coco.c +++ /dev/null @@ -1,416 +0,0 @@ -#include - -#include "network.h" -#include "detection_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" -#include "demo.h" - -char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"}; - -int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90}; - -void train_coco(char *cfgfile, char *weightfile) -{ - //char *train_images = "/home/pjreddie/data/voc/test/train.txt"; - //char *train_images = "/home/pjreddie/data/coco/train.txt"; - char *train_images = "data/coco.trainval.txt"; - //char *train_images = "data/bags.train.list"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - data train, buffer; - - - layer l = net.layers[net.n - 1]; - - int side = l.side; - int classes = l.classes; - float jitter = l.jitter; - - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.classes = classes; - args.jitter = jitter; - args.num_boxes = side; - args.d = &buffer; - args.type = REGION_DATA; - - args.angle = net.angle; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - /* - image im = float_to_image(net.w, net.h, 3, train.X.vals[113]); - image copy = copy_image(im); - draw_coco(copy, train.y.vals[113], 7, "truth"); - cvWaitKey(0); - free_image(copy); - */ - - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0 || (i < 1000 && i%100 == 0)){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void print_cocos(FILE *fp, int image_id, box *boxes, float **probs, int num_boxes, int classes, int w, int h) -{ - int i, j; - for(i = 0; i < num_boxes; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - float bx = xmin; - float by = ymin; - float bw = xmax - xmin; - float bh = ymax - ymin; - - for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fp, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, probs[i][j]); - } - } -} - -int get_coco_image_id(char *filename) -{ - char *p = strrchr(filename, '_'); - return atoi(p+1); -} - -void validate_coco(char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); - - char *base = "results/"; - list *plist = get_paths("data/coco_val_5k.list"); - //list *plist = get_paths("/home/pjreddie/data/people-art/test.txt"); - //list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt"); - char **paths = (char **)list_to_array(plist); - - layer l = net.layers[net.n-1]; - int classes = l.classes; - int side = l.side; - - int j; - char buff[1024]; - snprintf(buff, 1024, "%s/coco_results.json", base); - FILE *fp = fopen(buff, "w"); - fprintf(fp, "[\n"); - - box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*)); - for(j = 0; j < side*side*l.n; ++j) probs[j] = (float*)xcalloc(classes, sizeof(float)); - - int m = plist->size; - int i=0; - int t; - - float thresh = .01; - int nms = 1; - float iou_thresh = .5; - - int nthreads = 8; - image* val = (image*)xcalloc(nthreads, sizeof(image)); - image* val_resized = (image*)xcalloc(nthreads, sizeof(image)); - image* buf = (image*)xcalloc(nthreads, sizeof(image)); - image* buf_resized = (image*)xcalloc(nthreads, sizeof(image)); - pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t)); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.type = IMAGE_DATA; - - for(t = 0; t < nthreads; ++t){ - args.path = paths[i+t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for(i = nthreads; i < m+nthreads; i += nthreads){ - fprintf(stderr, "%d\n", i); - for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for(t = 0; t < nthreads && i+t < m; ++t){ - args.path = paths[i+t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ - char *path = paths[i+t-nthreads]; - int image_id = get_coco_image_id(path); - float *X = val_resized[t].data; - network_predict(net, X); - int w = val[t].w; - int h = val[t].h; - get_detection_boxes(l, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort_v2(boxes, probs, side*side*l.n, classes, iou_thresh); - print_cocos(fp, image_id, boxes, probs, side*side*l.n, classes, w, h); - free_image(val[t]); - free_image(val_resized[t]); - } - } -#ifdef WIN32 - fseek(fp, -3, SEEK_CUR); -#else - fseek(fp, -2, SEEK_CUR); -#endif - fprintf(fp, "\n]\n"); - fclose(fp); - - if (val) free(val); - if (val_resized) free(val_resized); - if (buf) free(buf); - if (buf_resized) free(buf_resized); - if (thr) free(thr); - - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); -} - -void validate_coco_recall(char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); - - char *base = "results/comp4_det_test_"; - list* plist = get_paths("data/voc/test/2007_test.txt"); - char **paths = (char **)list_to_array(plist); - - layer l = net.layers[net.n-1]; - int classes = l.classes; - int side = l.side; - - int j, k; - /* unused code,why? - FILE** fps = (FILE**)xcalloc(classes, sizeof(FILE*)); - for(j = 0; j < classes; ++j){ - char buff[1024]; - snprintf(buff, 1024, "%s%s.txt", base, coco_classes[j]); - fps[j] = fopen(buff, "w"); - } - */ - box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*)); - for(j = 0; j < side*side*l.n; ++j) { - probs[j] = (float*)xcalloc(classes, sizeof(float)); - } - - int m = plist->size; - int i=0; - - float thresh = .001; - int nms = 0; - float iou_thresh = .5; - float nms_thresh = .5; - - int total = 0; - int correct = 0; - int proposals = 0; - float avg_iou = 0; - - for(i = 0; i < m; ++i){ - char *path = paths[i]; - image orig = load_image_color(path, 0, 0); - image sized = resize_image(orig, net.w, net.h); - char *id = basecfg(path); - network_predict(net, sized.data); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh); - - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - for(k = 0; k < side*side*l.n; ++k){ - if(probs[k][0] > thresh){ - ++proposals; - } - } - for (j = 0; j < num_labels; ++j) { - ++total; - box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; - float best_iou = 0; - for(k = 0; k < side*side*l.n; ++k){ - float iou = box_iou(boxes[k], t); - if(probs[k][0] > thresh && iou > best_iou){ - best_iou = iou; - } - } - avg_iou += best_iou; - if(best_iou > iou_thresh){ - ++correct; - } - } - - fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); - - //if (fps) free(fps); - if (id) free(id); - free_image(orig); - free_image(sized); - } - free(boxes); - for(j = 0; j < side*side*l.n; ++j) { - free(probs[j]); - } - free(probs); -} - -void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) -{ - image **alphabet = load_alphabet(); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - detection_layer l = net.layers[net.n-1]; - set_batch_network(&net, 1); - srand(2222222); - float nms = .4; - clock_t time; - char buff[256]; - char *input = buff; - int j; - box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(l.side * l.side * l.n, sizeof(float*)); - for(j = 0; j < l.side*l.side*l.n; ++j) { - probs[j] = (float*)xcalloc(l.classes, sizeof(float)); - } - while(1){ - if(filename){ - strncpy(input, filename, 256); - } else { - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) break; - strtok(input, "\n"); - } - image im = load_image_color(input,0,0); - image sized = resize_image(im, net.w, net.h); - float *X = sized.data; - time=clock(); - network_predict(net, X); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0); - if (nms) do_nms_sort_v2(boxes, probs, l.side*l.side*l.n, l.classes, nms); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, alphabet, 80); - save_image(im, "prediction"); - show_image(im, "predictions"); - free_image(im); - free_image(sized); - - wait_until_press_key_cv(); - destroy_all_windows_cv(); - - if (filename) break; - } - free(boxes); - for(j = 0; j < l.side*l.side*l.n; ++j) { - free(probs[j]); - } - free(probs); -} - -void run_coco(int argc, char **argv) -{ - int dont_show = find_arg(argc, argv, "-dont_show"); - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); - int json_port = find_int_arg(argc, argv, "-json_port", -1); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); - char *prefix = find_char_arg(argc, argv, "-prefix", 0); - float thresh = find_float_arg(argc, argv, "-thresh", .2); - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); - int cam_index = find_int_arg(argc, argv, "-c", 0); - int frame_skip = find_int_arg(argc, argv, "-s", 0); - int ext_output = find_arg(argc, argv, "-ext_output"); - - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "test")) test_coco(cfg, weights, filename, thresh); - else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights); - else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights); - else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, coco_classes, 80, 1, frame_skip, - prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0); -} diff --git a/src/Detector/darknet/src/col2im.c b/src/Detector/darknet/src/col2im.c deleted file mode 100644 index 4b8cb592e..000000000 --- a/src/Detector/darknet/src/col2im.c +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include -#include "col2im.h" -void col2im_add_pixel(float *im, int height, int width, int channels, - int row, int col, int channel, int pad, float val) -{ - row -= pad; - col -= pad; - - if (row < 0 || col < 0 || - row >= height || col >= width) return; - im[col + width*(row + height*channel)] += val; -} -//This one might be too, can't remember. -void col2im_cpu(float* data_col, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_im) -{ - int c,h,w; - int height_col = (height + 2*pad - ksize) / stride + 1; - int width_col = (width + 2*pad - ksize) / stride + 1; - - int channels_col = channels * ksize * ksize; - for (c = 0; c < channels_col; ++c) { - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = 0; h < height_col; ++h) { - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h * stride; - int im_col = w_offset + w * stride; - int col_index = (c * height_col + h) * width_col + w; - float val = data_col[col_index]; - col2im_add_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad, val); - } - } - } -} -// ---------------------------------------- -void caffe_set(const int N, const float alpha, float* Y) { - if (alpha == 0) { - memset(Y, 0, sizeof(float) * N); // NOLINT(caffe/alt_fn) - return; - } - int i; - for (i = 0; i < N; ++i) { - Y[i] = alpha; - } -} - -inline static int is_a_ge_zero_and_a_lt_b(int a, int b) { - return (unsigned)(a) < (unsigned)(b); -} - -// https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp -void col2im_cpu_ext(const float* data_col, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_im) -{ - caffe_set(height * width * channels, 0.0F, data_im); - const int output_h = (height + 2 * pad_h - - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int output_w = (width + 2 * pad_w - - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - const int channel_size = height * width; - int channel, kernel_row, kernel_col, output_rows, output_col; - for (channel = channels; channel--; data_im += channel_size) { - for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { - for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { - int input_row = -pad_h + kernel_row * dilation_h; - for (output_rows = output_h; output_rows; output_rows--) { - if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { - data_col += output_w; - } - else { - int input_col = -pad_w + kernel_col * dilation_w; - for (output_col = output_w; output_col; output_col--) { - if (is_a_ge_zero_and_a_lt_b(input_col, width)) { - data_im[input_row * width + input_col] += *data_col; - } - data_col++; - input_col += stride_w; - } - } - input_row += stride_h; - } - } - } - } -} \ No newline at end of file diff --git a/src/Detector/darknet/src/col2im.h b/src/Detector/darknet/src/col2im.h deleted file mode 100644 index 984f7c4b7..000000000 --- a/src/Detector/darknet/src/col2im.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef COL2IM_H -#define COL2IM_H - -#ifdef __cplusplus -extern "C" { -#endif -void col2im_cpu(float* data_col, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_im); - -void col2im_cpu_ext(const float* data_col, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_im); - -#ifdef GPU -void col2im_ongpu(float *data_col, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_im); - - -void col2im_gpu_ext(const float* data_col, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, - const int stride_w, const int dilation_h, const int dilation_w, - float* data_im); -#endif -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/col2im_kernels.cu b/src/Detector/darknet/src/col2im_kernels.cu deleted file mode 100644 index 0e07bc37a..000000000 --- a/src/Detector/darknet/src/col2im_kernels.cu +++ /dev/null @@ -1,136 +0,0 @@ -#include -#include -#include - -#include "col2im.h" -#include "dark_cuda.h" - -// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu -// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE - -__global__ void col2im_gpu_kernel(const int n, const float* data_col, - const int height, const int width, const int ksize, - const int pad, - const int stride, - const int height_col, const int width_col, - float *data_im) { - int index = blockIdx.x*blockDim.x+threadIdx.x; - for(; index < n; index += blockDim.x*gridDim.x){ - float val = 0; - int w = index % width + pad; - int h = (index / width) % height + pad; - int c = index / (width * height); - // compute the start and end of the output - int w_col_start = (w < ksize) ? 0 : (w - ksize) / stride + 1; - int w_col_end = min(w / stride + 1, width_col); - int h_col_start = (h < ksize) ? 0 : (h - ksize) / stride + 1; - int h_col_end = min(h / stride + 1, height_col); - // equivalent implementation - int offset = - (c * ksize * ksize + h * ksize + w) * height_col * width_col; - int coeff_h_col = (1 - stride * ksize * height_col) * width_col; - int coeff_w_col = (1 - stride * height_col * width_col); - for (int h_col = h_col_start; h_col < h_col_end; ++h_col) { - for (int w_col = w_col_start; w_col < w_col_end; ++w_col) { - val += data_col[offset + h_col * coeff_h_col + w_col * coeff_w_col]; - } - } - data_im[index] += val; - } -} - -void col2im_ongpu(float *data_col, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_im){ - // We are going to launch channels * height_col * width_col kernels, each - // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad - ksize) / stride + 1; - int width_col = (width + 2 * pad - ksize) / stride + 1; - int num_kernels = channels * height * width; - col2im_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, - BLOCK, 0, get_cuda_stream() >>>( - num_kernels, data_col, height, width, ksize, pad, - stride, height_col, - width_col, data_im); - - CHECK_CUDA(cudaPeekAtLastError()); -} -// ----------------------------------------- - -// CUDA: use 512 threads per block -const int CAFFE_CUDA_NUM_THREADS = 512; - -// CUDA: number of blocks for threads. -inline int CAFFE_GET_BLOCKS(const int N) { - return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; -} - -// CUDA: grid stride looping -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -// https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu -__global__ void col2im_gpu_kernel_ext(const int n, const float* data_col, - const int height, const int width, const int channels, - const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int height_col, const int width_col, - float* data_im) { - CUDA_KERNEL_LOOP(index, n) { - float val = 0; - const int w_im = index % width + pad_w; - const int h_im = (index / width) % height + pad_h; - const int c_im = index / (width * height); - int kernel_extent_w = (kernel_w - 1) * dilation_w + 1; - int kernel_extent_h = (kernel_h - 1) * dilation_h + 1; - // compute the start and end of the output - const int w_col_start = - (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; - const int w_col_end = min(w_im / stride_w + 1, width_col); - const int h_col_start = - (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; - const int h_col_end = min(h_im / stride_h + 1, height_col); - // TODO: use LCM of stride and dilation to avoid unnecessary loops - for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) { - for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) { - int h_k = (h_im - h_col * stride_h); - int w_k = (w_im - w_col * stride_w); - if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { - h_k /= dilation_h; - w_k /= dilation_w; - int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * - height_col + h_col) * width_col + w_col; - val += data_col[data_col_index]; - } - } - } - data_im[index] = val; - } -} - -void col2im_gpu_ext(const float* data_col, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, const int stride_h, - const int stride_w, const int dilation_h, const int dilation_w, - float* data_im) -{ - int height_col = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / - stride_h + 1; - int width_col = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / - stride_w + 1; - int num_kernels = channels * height * width; - // To avoid involving atomic operations, we will launch one kernel per - // bottom dimension, and then in the kernel add up the top dimensions. - // NOLINT_NEXT_LINE(whitespace/operators) - col2im_gpu_kernel_ext<< > >( - num_kernels, data_col, height, width, channels, kernel_h, kernel_w, - pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w, - height_col, width_col, data_im); - - CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file diff --git a/src/Detector/darknet/src/compare.c b/src/Detector/darknet/src/compare.c deleted file mode 100644 index 62edabe96..000000000 --- a/src/Detector/darknet/src/compare.c +++ /dev/null @@ -1,352 +0,0 @@ -#include - -#include "network.h" -#include "detection_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" - -void train_compare(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - char* backup_directory = "backup/"; - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = 1024; - list *plist = get_paths("data/compare.train.list"); - char **paths = (char **)list_to_array(plist); - int N = plist->size; - printf("%d\n", N); - clock_t time; - pthread_t load_thread; - data train; - data buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.classes = 20; - args.n = imgs; - args.m = N; - args.d = &buffer; - args.type = COMPARE_DATA; - - load_thread = load_data_in_thread(args); - int epoch = *net.seen/N; - int i = 0; - while(1){ - ++i; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - - load_thread = load_data_in_thread(args); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%.3f: %f, %f avg, %lf seconds, %ld images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen); - free_data(train); - if(i%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i); - save_weights(net, buff); - } - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - i = 0; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); - if(epoch%22 == 0) net.learning_rate *= .1; - } - } - pthread_join(load_thread, 0); - free_data(buffer); - free_network(net); - free_ptrs((void**)paths, plist->size); - free_list(plist); - free(base); -} - -void validate_compare(char *filename, char *weightfile) -{ - int i = 0; - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - list *plist = get_paths("data/compare.val.list"); - //list *plist = get_paths("data/compare.val.old"); - char **paths = (char **)list_to_array(plist); - int N = plist->size/2; - free_list(plist); - - clock_t time; - int correct = 0; - int total = 0; - int splits = 10; - int num = (i+1)*N/splits - i*N/splits; - - data val, buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.classes = 20; - args.n = num; - args.m = 0; - args.d = &buffer; - args.type = COMPARE_DATA; - - pthread_t load_thread = load_data_in_thread(args); - for(i = 1; i <= splits; ++i){ - time=clock(); - - pthread_join(load_thread, 0); - val = buffer; - - num = (i+1)*N/splits - i*N/splits; - char **part = paths+(i*N/splits); - if(i != splits){ - args.paths = part; - load_thread = load_data_in_thread(args); - } - printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time)); - - time=clock(); - matrix pred = network_predict_data(net, val); - int j,k; - for(j = 0; j < val.y.rows; ++j){ - for(k = 0; k < 20; ++k){ - if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){ - ++total; - if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){ - ++correct; - } - } - } - } - free_matrix(pred); - printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows); - free_data(val); - } -} - -typedef struct { - network net; - char *filename; - int class_id; - int classes; - float elo; - float *elos; -} sortable_bbox; - -int total_compares = 0; -int current_class_id = 0; - -int elo_comparator(const void*a, const void *b) -{ - sortable_bbox box1 = *(sortable_bbox*)a; - sortable_bbox box2 = *(sortable_bbox*)b; - if(box1.elos[current_class_id] == box2.elos[current_class_id]) return 0; - if(box1.elos[current_class_id] > box2.elos[current_class_id]) return -1; - return 1; -} - -int bbox_comparator(const void *a, const void *b) -{ - ++total_compares; - sortable_bbox box1 = *(sortable_bbox*)a; - sortable_bbox box2 = *(sortable_bbox*)b; - network net = box1.net; - int class_id = box1.class_id; - - image im1 = load_image_color(box1.filename, net.w, net.h); - image im2 = load_image_color(box2.filename, net.w, net.h); - float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float)); - memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); - memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); - float *predictions = network_predict(net, X); - - free_image(im1); - free_image(im2); - free(X); - if (predictions[class_id*2] > predictions[class_id*2+1]){ - return 1; - } - return -1; -} - -void bbox_update(sortable_bbox *a, sortable_bbox *b, int class_id, int result) -{ - int k = 32; - float EA = 1./(1+pow(10, (b->elos[class_id] - a->elos[class_id])/400.)); - float EB = 1./(1+pow(10, (a->elos[class_id] - b->elos[class_id])/400.)); - float SA = result ? 1 : 0; - float SB = result ? 0 : 1; - a->elos[class_id] += k*(SA - EA); - b->elos[class_id] += k*(SB - EB); -} - -void bbox_fight(network net, sortable_bbox *a, sortable_bbox *b, int classes, int class_id) -{ - image im1 = load_image_color(a->filename, net.w, net.h); - image im2 = load_image_color(b->filename, net.w, net.h); - float* X = (float*)xcalloc(net.w * net.h * net.c, sizeof(float)); - memcpy(X, im1.data, im1.w*im1.h*im1.c*sizeof(float)); - memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c*sizeof(float)); - float *predictions = network_predict(net, X); - ++total_compares; - - int i; - for(i = 0; i < classes; ++i){ - if(class_id < 0 || class_id == i){ - int result = predictions[i*2] > predictions[i*2+1]; - bbox_update(a, b, i, result); - } - } - - free_image(im1); - free_image(im2); - free(X); -} - -void SortMaster3000(char *filename, char *weightfile) -{ - int i = 0; - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - set_batch_network(&net, 1); - - list *plist = get_paths("data/compare.sort.list"); - //list *plist = get_paths("data/compare.val.old"); - char **paths = (char **)list_to_array(plist); - int N = plist->size; - free_list(plist); - sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox)); - printf("Sorting %d boxes...\n", N); - for(i = 0; i < N; ++i){ - boxes[i].filename = paths[i]; - boxes[i].net = net; - boxes[i].class_id = 7; - boxes[i].elo = 1500; - } - clock_t time=clock(); - qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator); - for(i = 0; i < N; ++i){ - printf("%s\n", boxes[i].filename); - } - printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time)); -} - -void BattleRoyaleWithCheese(char *filename, char *weightfile) -{ - int classes = 20; - int i,j; - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - set_batch_network(&net, 1); - - list *plist = get_paths("data/compare.sort.list"); - //list *plist = get_paths("data/compare.small.list"); - //list *plist = get_paths("data/compare.cat.list"); - //list *plist = get_paths("data/compare.val.old"); - char **paths = (char **)list_to_array(plist); - int N = plist->size; - int total = N; - free_list(plist); - sortable_bbox* boxes = (sortable_bbox*)xcalloc(N, sizeof(sortable_bbox)); - printf("Battling %d boxes...\n", N); - for(i = 0; i < N; ++i){ - boxes[i].filename = paths[i]; - boxes[i].net = net; - boxes[i].classes = classes; - boxes[i].elos = (float*)xcalloc(classes, sizeof(float)); - for(j = 0; j < classes; ++j){ - boxes[i].elos[j] = 1500; - } - } - int round; - clock_t time=clock(); - for(round = 1; round <= 4; ++round){ - clock_t round_time=clock(); - printf("Round: %d\n", round); - shuffle(boxes, N, sizeof(sortable_bbox)); - for(i = 0; i < N/2; ++i){ - bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, -1); - } - printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); - } - - int class_id; - - for (class_id = 0; class_id < classes; ++class_id){ - - N = total; - current_class_id = class_id; - qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); - N /= 2; - - for(round = 1; round <= 100; ++round){ - clock_t round_time=clock(); - printf("Round: %d\n", round); - - sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10); - for(i = 0; i < N/2; ++i){ - bbox_fight(net, boxes+i*2, boxes+i*2+1, classes, class_id); - } - qsort(boxes, N, sizeof(sortable_bbox), elo_comparator); - if(round <= 20) N = (N*9/10)/2*2; - - printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N); - } - char buff[256]; - sprintf(buff, "results/battle_%d.log", class_id); - FILE *outfp = fopen(buff, "w"); - for(i = 0; i < N; ++i){ - fprintf(outfp, "%s %f\n", boxes[i].filename, boxes[i].elos[class_id]); - } - fclose(outfp); - } - printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time)); -} - -void run_compare(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - //char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "train")) train_compare(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights); - else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights); - else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights); - /* - else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights); - else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights); - */ -} diff --git a/src/Detector/darknet/src/connected_layer.c b/src/Detector/darknet/src/connected_layer.c deleted file mode 100644 index 25a5ffa1e..000000000 --- a/src/Detector/darknet/src/connected_layer.c +++ /dev/null @@ -1,447 +0,0 @@ -#include "connected_layer.h" -#include "batchnorm_layer.h" -#include "convolutional_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -size_t get_connected_workspace_size(layer l) -{ -#ifdef CUDNN - return get_convolutional_workspace_size(l); - /* - if (gpu_index >= 0) { - size_t most = 0; - size_t s = 0; - CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.weightDesc, - l.convDesc, - l.dstTensorDesc, - l.fw_algo, - &s)); - if (s > most) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.ddstTensorDesc, - l.convDesc, - l.dweightDesc, - l.bf_algo, - &s)); - if (s > most) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), - l.weightDesc, - l.ddstTensorDesc, - l.convDesc, - l.dsrcTensorDesc, - l.bd_algo, - &s)); - if (s > most) most = s; - return most; - } - */ -#endif - return 0; -} - -connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize) -{ - int total_batch = batch*steps; - int i; - connected_layer l = { (LAYER_TYPE)0 }; - l.type = CONNECTED; - - l.inputs = inputs; - l.outputs = outputs; - l.batch= batch; - l.batch_normalize = batch_normalize; - l.h = 1; - l.w = 1; - l.c = inputs; - l.out_h = 1; - l.out_w = 1; - l.out_c = outputs; - l.n = l.out_c; - l.size = 1; - l.stride = l.stride_x = l.stride_y = 1; - l.pad = 0; - l.activation = activation; - l.learning_rate_scale = 1; - l.groups = 1; - l.dilation = 1; - - l.output = (float*)xcalloc(total_batch * outputs, sizeof(float)); - l.delta = (float*)xcalloc(total_batch * outputs, sizeof(float)); - - l.weight_updates = (float*)xcalloc(inputs * outputs, sizeof(float)); - l.bias_updates = (float*)xcalloc(outputs, sizeof(float)); - - l.weights = (float*)xcalloc(outputs * inputs, sizeof(float)); - l.biases = (float*)xcalloc(outputs, sizeof(float)); - - l.forward = forward_connected_layer; - l.backward = backward_connected_layer; - l.update = update_connected_layer; - - //float scale = 1./sqrt(inputs); - float scale = sqrt(2.f/inputs); - for(i = 0; i < outputs*inputs; ++i){ - l.weights[i] = scale*rand_uniform(-1, 1); - } - - for(i = 0; i < outputs; ++i){ - l.biases[i] = 0; - } - - if(batch_normalize){ - l.scales = (float*)xcalloc(outputs, sizeof(float)); - l.scale_updates = (float*)xcalloc(outputs, sizeof(float)); - for(i = 0; i < outputs; ++i){ - l.scales[i] = 1; - } - - l.mean = (float*)xcalloc(outputs, sizeof(float)); - l.mean_delta = (float*)xcalloc(outputs, sizeof(float)); - l.variance = (float*)xcalloc(outputs, sizeof(float)); - l.variance_delta = (float*)xcalloc(outputs, sizeof(float)); - - l.rolling_mean = (float*)xcalloc(outputs, sizeof(float)); - l.rolling_variance = (float*)xcalloc(outputs, sizeof(float)); - - l.x = (float*)xcalloc(total_batch * outputs, sizeof(float)); - l.x_norm = (float*)xcalloc(total_batch * outputs, sizeof(float)); - } - -#ifdef GPU - l.forward_gpu = forward_connected_layer_gpu; - l.backward_gpu = backward_connected_layer_gpu; - l.update_gpu = update_connected_layer_gpu; - - l.weights_gpu = cuda_make_array(l.weights, outputs*inputs); - l.biases_gpu = cuda_make_array(l.biases, outputs); - - l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs); - l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs); - - l.output_gpu = cuda_make_array(l.output, outputs*total_batch); - l.delta_gpu = cuda_make_array(l.delta, outputs*total_batch); - if (batch_normalize) { - l.scales_gpu = cuda_make_array(l.scales, outputs); - l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs); - - l.mean_gpu = cuda_make_array(l.mean, outputs); - l.variance_gpu = cuda_make_array(l.variance, outputs); - - l.rolling_mean_gpu = cuda_make_array(l.mean, outputs); - l.rolling_variance_gpu = cuda_make_array(l.variance, outputs); - - l.mean_delta_gpu = cuda_make_array(l.mean, outputs); - l.variance_delta_gpu = cuda_make_array(l.variance, outputs); - - l.x_gpu = cuda_make_array(l.output, total_batch*outputs); - l.x_norm_gpu = cuda_make_array(l.output, total_batch*outputs); - } -#ifdef CUDNN - create_convolutional_cudnn_tensors(&l); - cudnn_convolutional_setup(&l, cudnn_fastest, 0); // cudnn_fastest, cudnn_smallest - l.workspace_size = get_connected_workspace_size(l); -#endif // CUDNN -#endif // GPU - fprintf(stderr, "connected %4d -> %4d\n", inputs, outputs); - return l; -} - -void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay) -{ - axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); - scal_cpu(l.outputs, momentum, l.bias_updates, 1); - - if(l.batch_normalize){ - axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1); - scal_cpu(l.outputs, momentum, l.scale_updates, 1); - } - - axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1); - axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1); - scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1); -} - -void forward_connected_layer(connected_layer l, network_state state) -{ - int i; - fill_cpu(l.outputs*l.batch, 0, l.output, 1); - int m = l.batch; - int k = l.inputs; - int n = l.outputs; - float *a = state.input; - float *b = l.weights; - float *c = l.output; - gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); - if(l.batch_normalize){ - if(state.train){ - mean_cpu(l.output, l.batch, l.outputs, 1, l.mean); - variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance); - - scal_cpu(l.outputs, .95f, l.rolling_mean, 1); - axpy_cpu(l.outputs, .05f, l.mean, 1, l.rolling_mean, 1); - scal_cpu(l.outputs, .95f, l.rolling_variance, 1); - axpy_cpu(l.outputs, .05f, l.variance, 1, l.rolling_variance, 1); - - copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); - normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1); - copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); - } else { - normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1); - } - scale_bias(l.output, l.scales, l.batch, l.outputs, 1); - } - for(i = 0; i < l.batch; ++i){ - axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1); - } - activate_array(l.output, l.outputs*l.batch, l.activation); -} - -void backward_connected_layer(connected_layer l, network_state state) -{ - int i; - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - for(i = 0; i < l.batch; ++i){ - axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); - } - if(l.batch_normalize){ - backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates); - - scale_bias(l.delta, l.scales, l.batch, l.outputs, 1); - - mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta); - variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta); - normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta); - } - - int m = l.outputs; - int k = l.batch; - int n = l.inputs; - float *a = l.delta; - float *b = state.input; - float *c = l.weight_updates; - gemm(1,0,m,n,k,1,a,m,b,n,1,c,n); - - m = l.batch; - k = l.outputs; - n = l.inputs; - - a = l.delta; - b = l.weights; - c = state.delta; - - if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); -} - - -void denormalize_connected_layer(layer l) -{ - int i, j; - for(i = 0; i < l.outputs; ++i){ - float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001f); - for(j = 0; j < l.inputs; ++j){ - l.weights[i*l.inputs + j] *= scale; - } - l.biases[i] -= l.rolling_mean[i] * scale; - l.scales[i] = 1; - l.rolling_mean[i] = 0; - l.rolling_variance[i] = 1; - } -} - - -void statistics_connected_layer(layer l) -{ - if(l.batch_normalize){ - printf("Scales "); - print_statistics(l.scales, l.outputs); - /* - printf("Rolling Mean "); - print_statistics(l.rolling_mean, l.outputs); - printf("Rolling Variance "); - print_statistics(l.rolling_variance, l.outputs); - */ - } - printf("Biases "); - print_statistics(l.biases, l.outputs); - printf("Weights "); - print_statistics(l.weights, l.outputs); -} - -#ifdef GPU - -void pull_connected_layer(connected_layer l) -{ - cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs); - cuda_pull_array(l.biases_gpu, l.biases, l.outputs); - cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); - cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); - if (l.batch_normalize){ - cuda_pull_array(l.scales_gpu, l.scales, l.outputs); - cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); - cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); - } - CHECK_CUDA(cudaPeekAtLastError()); -} - -void push_connected_layer(connected_layer l) -{ - cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs); - cuda_push_array(l.biases_gpu, l.biases, l.outputs); - cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs); - cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); - if (l.batch_normalize){ - cuda_push_array(l.scales_gpu, l.scales, l.outputs); - cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs); - cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs); - } - CHECK_CUDA(cudaPeekAtLastError()); -} - -void update_connected_layer_gpu(connected_layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) -{ - float learning_rate = learning_rate_init * l.learning_rate_scale; - - // Loss scale for Mixed-Precision on Tensor-Cores - if (loss_scale != 1.0) { - scal_ongpu(l.inputs*l.outputs, 1.0 / loss_scale, l.weight_updates_gpu, 1); - scal_ongpu(l.outputs, 1.0 / loss_scale, l.bias_updates_gpu, 1); - scal_ongpu(l.outputs, 1.0 / loss_scale, l.scale_updates_gpu, 1); - } - - axpy_ongpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); - scal_ongpu(l.outputs, momentum, l.bias_updates_gpu, 1); - - if(l.batch_normalize){ - axpy_ongpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); - scal_ongpu(l.outputs, momentum, l.scale_updates_gpu, 1); - } - - axpy_ongpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); - axpy_ongpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - scal_ongpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1); -} - -void forward_connected_layer_gpu(connected_layer l, network_state state) -{ - fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - - int m = l.batch; - int k = l.inputs; - int n = l.outputs; - float * a = state.input; - float * b = l.weights_gpu; - float * c = l.output_gpu; -#ifdef CUDNN - float one = 1; // alpha[0], beta[0] - float alpha = 1, beta = 0; - - CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(), - &alpha, //&one, - l.srcTensorDesc, - state.input, - l.weightDesc, - l.weights_gpu, - l.convDesc, - l.fw_algo, - state.workspace, - l.workspace_size, - &beta, //&one, - l.dstTensorDesc, - l.output_gpu)); -#else // CUDNN - gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n); -#endif // CUDNN - - if (l.batch_normalize) { - forward_batchnorm_layer_gpu(l, state); - } - else { - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.outputs, 1); - } - //for(i = 0; i < l.batch; ++i) axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); -} - -void backward_connected_layer_gpu(connected_layer l, network_state state) -{ - int i; - constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - for(i = 0; i < l.batch; ++i){ - axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); - } - - if(l.batch_normalize){ - backward_batchnorm_layer_gpu(l, state); - } - -#ifdef CUDNN_DISABLED - float one = 1; - // calculate conv weight updates - // if used: beta=1 then loss decreases faster - CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), - &one, - l.srcTensorDesc, - state.input, - l.ddstTensorDesc, - l.delta_gpu, - l.convDesc, - l.bf_algo, - state.workspace, - l.workspace_size, - &one, - l.dweightDesc, - l.weight_updates_gpu)); - - if (state.delta) { - // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData - // calculate delta for the next layer - - CHECK_CUDNN(cudnnConvolutionBackwardData(cudnn_handle(), - &one, - l.weightDesc, - l.weights_gpu, - l.ddstTensorDesc, - l.delta_gpu, - l.convDesc, - l.bd_algo, - state.workspace, - l.workspace_size, - &one, - l.dsrcTensorDesc, - state.delta)); - } -#else // CUDNN - - int m = l.outputs; - int k = l.batch; - int n = l.inputs; - float * a = l.delta_gpu; - float * b = state.input; - float * c = l.weight_updates_gpu; - - gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n); - - m = l.batch; - k = l.outputs; - n = l.inputs; - - a = l.delta_gpu; - b = l.weights_gpu; - c = state.delta; - - if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n); -#endif // CUDNN -} -#endif diff --git a/src/Detector/darknet/src/connected_layer.h b/src/Detector/darknet/src/connected_layer.h deleted file mode 100644 index 284c5125e..000000000 --- a/src/Detector/darknet/src/connected_layer.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef CONNECTED_LAYER_H -#define CONNECTED_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" - -typedef layer connected_layer; - -#ifdef __cplusplus -extern "C" { -#endif -connected_layer make_connected_layer(int batch, int steps, int inputs, int outputs, ACTIVATION activation, int batch_normalize); -size_t get_connected_workspace_size(layer l); - -void forward_connected_layer(connected_layer layer, network_state state); -void backward_connected_layer(connected_layer layer, network_state state); -void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay); -void denormalize_connected_layer(layer l); -void statistics_connected_layer(layer l); - -#ifdef GPU -void forward_connected_layer_gpu(connected_layer layer, network_state state); -void backward_connected_layer_gpu(connected_layer layer, network_state state); -void update_connected_layer_gpu(connected_layer layer, int batch, float learning_rate, float momentum, float decay, float loss_scale); -void push_connected_layer(connected_layer layer); -void pull_connected_layer(connected_layer layer); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/conv_lstm_layer.c b/src/Detector/darknet/src/conv_lstm_layer.c deleted file mode 100644 index 72e7eac38..000000000 --- a/src/Detector/darknet/src/conv_lstm_layer.c +++ /dev/null @@ -1,1497 +0,0 @@ -// Page 4: https://arxiv.org/abs/1506.04214v2 -// Page 3: https://arxiv.org/pdf/1705.06368v3.pdf -// https://wikimedia.org/api/rest_v1/media/math/render/svg/1edbece2559479959fe829e9c6657efb380debe7 - -#include "conv_lstm_layer.h" -#include "connected_layer.h" -#include "convolutional_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -static void increment_layer(layer *l, int steps) -{ - int num = l->outputs*l->batch*steps; - l->output += num; - l->delta += num; - l->x += num; - l->x_norm += num; - -#ifdef GPU - l->output_gpu += num; - l->delta_gpu += num; - l->x_gpu += num; - l->x_norm_gpu += num; -#endif -} - - -layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor, int bottleneck, int train) -{ - fprintf(stderr, "CONV_LSTM Layer: %d x %d x %d image, %d filters\n", h, w, c, output_filters); - /* - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.batch = batch; - l.type = LSTM; - l.steps = steps; - l.inputs = inputs; - l.out_w = 1; - l.out_h = 1; - l.out_c = outputs; - */ - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.train = train; - l.batch = batch; - l.type = CONV_LSTM; - l.bottleneck = bottleneck; - l.steps = steps; - l.size = size; - l.stride = stride; - l.dilation = dilation; - l.pad = pad; - l.h = h; - l.w = w; - l.c = c; - l.groups = groups; - l.out_c = output_filters; - l.inputs = h * w * c; - l.xnor = xnor; - l.peephole = peephole; - - // U - l.uf = (layer*)xcalloc(1, sizeof(layer)); - *(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.uf->batch = batch; - if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; - - l.ui = (layer*)xcalloc(1, sizeof(layer)); - *(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.ui->batch = batch; - if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; - - l.ug = (layer*)xcalloc(1, sizeof(layer)); - *(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.ug->batch = batch; - if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; - - l.uo = (layer*)xcalloc(1, sizeof(layer)); - *(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.uo->batch = batch; - if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; - - if (l.bottleneck) { - // bottleneck-conv with 2x channels - l.wf = (layer*)xcalloc(1, sizeof(layer)); - l.wi = (layer*)xcalloc(1, sizeof(layer)); - l.wg = (layer*)xcalloc(1, sizeof(layer)); - l.wo = (layer*)xcalloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters*2, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.wf->batch = batch; - if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; - } - else { - // W - l.wf = (layer*)xcalloc(1, sizeof(layer)); - *(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.wf->batch = batch; - if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; - - l.wi = (layer*)xcalloc(1, sizeof(layer)); - *(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.wi->batch = batch; - if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; - - l.wg = (layer*)xcalloc(1, sizeof(layer)); - *(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.wg->batch = batch; - if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; - - l.wo = (layer*)xcalloc(1, sizeof(layer)); - *(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.wo->batch = batch; - if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; - } - - // V - l.vf = (layer*)xcalloc(1, sizeof(layer)); - if (l.peephole) { - *(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.vf->batch = batch; - if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; - } - - l.vi = (layer*)xcalloc(1, sizeof(layer)); - if (l.peephole) { - *(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.vi->batch = batch; - if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; - } - - l.vo = (layer*)xcalloc(1, sizeof(layer)); - if (l.peephole) { - *(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.vo->batch = batch; - if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; - } - - - l.batch_normalize = batch_normalize; - - l.out_h = l.uo->out_h; - l.out_w = l.uo->out_w; - l.outputs = l.uo->outputs; - int outputs = l.outputs; - l.inputs = w*h*c; - - if (!l.bottleneck) assert(l.wo->outputs == l.uo->outputs); - assert(l.wf->outputs == l.uf->outputs); - - l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float)); - //l.state = (float*)xcalloc(outputs * batch, sizeof(float)); - - l.forward = forward_conv_lstm_layer; - l.update = update_conv_lstm_layer; - l.backward = backward_conv_lstm_layer; - - l.prev_state_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.prev_cell_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.cell_cpu = (float*)xcalloc(batch*outputs*steps, sizeof(float)); - - l.f_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.i_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.g_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.o_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.stored_c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.stored_h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp2_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp3_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.dc_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.dh_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - - /* - { - int k; - for (k = 0; k < l.uf->n; ++k) { - l.uf->biases[k] = 2; // ~0.9 - l.ui->biases[k] = -22; // ~0.1 - l.uo->biases[k] = 5; // ~1.0 - } -#ifdef GPU - cuda_push_array(l.uf->biases_gpu, l.uf->biases, l.n); - cuda_push_array(l.ui->biases_gpu, l.ui->biases, l.n); - cuda_push_array(l.uo->biases_gpu, l.uo->biases, l.n); -#endif// GPU - } - */ - -#ifdef GPU - l.forward_gpu = forward_conv_lstm_layer_gpu; - l.backward_gpu = backward_conv_lstm_layer_gpu; - l.update_gpu = update_conv_lstm_layer_gpu; - - //l.state_gpu = cuda_make_array(l.state, batch*l.outputs); - - l.output_gpu = cuda_make_array(0, batch*outputs*steps); - l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); - - l.prev_state_gpu = cuda_make_array(0, batch*outputs); - l.prev_cell_gpu = cuda_make_array(0, batch*outputs); - l.cell_gpu = cuda_make_array(0, batch*outputs*steps); - - l.f_gpu = cuda_make_array(0, batch*outputs); - l.i_gpu = cuda_make_array(0, batch*outputs); - l.g_gpu = cuda_make_array(0, batch*outputs); - l.o_gpu = cuda_make_array(0, batch*outputs); - l.c_gpu = cuda_make_array(0, batch*outputs); - if (l.bottleneck) { - l.bottelneck_hi_gpu = cuda_make_array(0, batch*outputs * 2); - l.bottelneck_delta_gpu = cuda_make_array(0, batch*outputs * 2); - } - l.h_gpu = cuda_make_array(0, batch*outputs); - l.stored_c_gpu = cuda_make_array(0, batch*outputs); - l.stored_h_gpu = cuda_make_array(0, batch*outputs); - l.temp_gpu = cuda_make_array(0, batch*outputs); - l.temp2_gpu = cuda_make_array(0, batch*outputs); - l.temp3_gpu = cuda_make_array(0, batch*outputs); - l.dc_gpu = cuda_make_array(0, batch*outputs); - l.dh_gpu = cuda_make_array(0, batch*outputs); - l.last_prev_state_gpu = cuda_make_array(0, l.batch*l.outputs); - l.last_prev_cell_gpu = cuda_make_array(0, l.batch*l.outputs); -#endif - - l.bflops = l.uf->bflops + l.ui->bflops + l.ug->bflops + l.uo->bflops + - l.wf->bflops + l.wi->bflops + l.wg->bflops + l.wo->bflops + - l.vf->bflops + l.vi->bflops + l.vo->bflops; - - if(l.peephole) l.bflops += 12 * l.outputs*l.batch / 1000000000.; - else l.bflops += 9 * l.outputs*l.batch / 1000000000.; - - return l; -} - -layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train) -{ - layer l = { (LAYER_TYPE)0 }; - l.train = train; - l.batch = batch; - l.type = HISTORY; - l.steps = steps; - l.history_size = history_size; - l.h = h; - l.w = w; - l.c = c; - l.out_h = h; - l.out_w = w; - l.out_c = c * history_size; - l.inputs = h * w * c; - l.outputs = h * w * c * history_size; - - l.forward = forward_history_layer; - l.backward = backward_history_layer; - - fprintf(stderr, "HISTORY b = %d, s = %2d, steps = %2d %4d x%4d x%4d -> %4d x%4d x%4d \n", l.batch / l.steps, l.history_size, l.steps, w, h, c, l.out_w, l.out_h, l.out_c); - - l.output = (float*)xcalloc(l.batch * l.outputs, sizeof(float)); - l.delta = (float*)xcalloc(l.batch * l.outputs, sizeof(float)); - - l.prev_state_cpu = (float*)xcalloc(l.batch*l.outputs, sizeof(float)); - -#ifdef GPU - - l.forward_gpu = forward_history_layer_gpu; - l.backward_gpu = backward_history_layer_gpu; - - l.output_gpu = cuda_make_array(0, l.batch * l.outputs); - l.delta_gpu = cuda_make_array(0, l.batch * l.outputs); - - l.prev_state_gpu = cuda_make_array(0, l.batch*l.outputs); - -#endif // GPU - - //l.batch = 4; - //l.steps = 1; - - return l; -} - -void forward_history_layer(layer l, network_state state) -{ - if (l.steps == 1) { - copy_cpu(l.inputs*l.batch, state.input, 1, l.output, 1); - return; - } - - const int batch = l.batch / l.steps; - - float *prev_output = l.prev_state_cpu; - - int i; - for (i = 0; i < l.steps; ++i) { - // shift cell - int shift_size = l.inputs * (l.history_size - 1); - int output_sift = l.inputs; - - int b; - for (b = 0; b < batch; ++b) { - int input_start = b*l.inputs + i*l.inputs*batch; - int output_start = b*l.outputs + i*l.outputs*batch; - float *input = state.input + input_start; - float *output = l.output + output_start; - - copy_cpu(shift_size, prev_output + b*l.outputs, 1, output + output_sift, 1); - - copy_cpu(l.inputs, input, 1, output, 1); - } - prev_output = l.output + i*l.outputs*batch; - } - - int output_start = (l.steps-1)*l.outputs*batch; - copy_cpu(batch*l.outputs, l.output + output_start, 1, l.prev_state_cpu, 1); -} - -void backward_history_layer(layer l, network_state state) -{ - if (l.steps == 1) { - axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1); - return; - } - - const int batch = l.batch / l.steps; - - // l.delta -> state.delta - int i; - for (i = 0; i < l.steps; ++i) { - int b; - for (b = 0; b < batch; ++b) { - int input_start = b*l.inputs + i*l.inputs*batch; - int output_start = b*l.outputs + i*l.outputs*batch; - float *state_delta = state.delta + input_start; - float *l_delta = l.delta + output_start; - - //copy_cpu(l.inputs, l_delta, 1, state_delta, 1); - axpy_cpu(l.inputs, 1, l_delta, 1, state_delta, 1); - } - } -} - -#ifdef GPU -void forward_history_layer_gpu(const layer l, network_state state) -{ - if (l.steps == 1) { - simple_copy_ongpu(l.inputs*l.batch, state.input, l.output_gpu); - return; - } - - const int batch = l.batch / l.steps; - - //int copy_size = l.inputs*batch*l.steps; - //printf(" copy_size = %d, inputs = %d, batch = %d, steps = %d, l.history_size = %d \n", copy_size, l.inputs, batch, l.steps, l.history_size); - //simple_copy_ongpu(copy_size, state.input, l.output_gpu); - //return; - - //fill_ongpu(batch*l.outputs, 0, l.prev_state_gpu, 1); - float *prev_output = l.prev_state_gpu; - - int i; - for (i = 0; i < l.steps; ++i) { - // shift cell - int shift_size = l.inputs * (l.history_size - 1); - int output_sift = l.inputs; - - int b; - for (b = 0; b < batch; ++b) { - //printf(" hist-fw: i = %d, b = %d \n", i, b); - - int input_start = b*l.inputs + i*l.inputs*batch; - int output_start = b*l.outputs + i*l.outputs*batch; - float *input = state.input + input_start; - float *output = l.output_gpu + output_start; - - //copy_cpu(shift_size, prev_output + b*l.outputs, 1, output + output_sift, 1); - simple_copy_ongpu(shift_size, prev_output + b*l.outputs, output + output_sift); - - //copy_cpu(l.inputs, input, 1, output, 1); - simple_copy_ongpu(l.inputs, input, output); - - int h; - for (h = 1; h < l.history_size; ++h) { - //scal_ongpu(l.inputs, (l.history_size - h)/ (float)l.history_size, output + h*l.inputs, 1); - //scal_ongpu(l.inputs, 0, output + h*l.inputs, 1); - } - } - prev_output = l.output_gpu + i*l.outputs*batch; - } - - int output_start = (l.steps - 1)*l.outputs*batch; - //copy_cpu(batch*l.outputs, l.output + output_start, 1, l.prev_state_cpu, 1); - simple_copy_ongpu(batch*l.outputs, l.output_gpu + output_start, l.prev_state_gpu); -} - -void backward_history_layer_gpu(const layer l, network_state state) -{ - if (l.steps == 1) { - axpy_ongpu(l.inputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); - return; - } - - const int batch = l.batch / l.steps; - - //int copy_size = l.inputs*batch*l.steps; - //printf(" copy_size = %d, inputs = %d, batch = %d, steps = %d, l.history_size = %d \n", copy_size, l.inputs, batch, l.steps, l.history_size); - //axpy_ongpu(copy_size, 1, l.delta_gpu, 1, state.delta, 1); - //return; - - // l.delta -> state.delta - int i; - for (i = 0; i < l.steps; ++i) { - int b; - for (b = 0; b < batch; ++b) { - //printf(" hist-bw: i = %d, b = %d \n", i, b); - - int input_start = b*l.inputs + i*l.inputs*batch; - int output_start = b*l.outputs + i*l.outputs*batch; - float *state_delta = state.delta + input_start; - float *l_delta = l.delta_gpu + output_start; - - //copy_cpu(l.inputs, l_delta, 1, state_delta, 1); - axpy_ongpu(l.inputs, 1, l_delta, 1, state_delta, 1); - } - } -} -#endif - - -void update_conv_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - if (l.peephole) { - update_convolutional_layer(*(l.vf), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.vi), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.vo), batch, learning_rate, momentum, decay); - } - update_convolutional_layer(*(l.wf), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.wi), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.wg), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.wo), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.uf), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.ui), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.ug), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.uo), batch, learning_rate, momentum, decay); -} - -void resize_conv_lstm_layer(layer *l, int w, int h) -{ - if (l->peephole) { - resize_convolutional_layer(l->vf, w, h); - if (l->workspace_size < l->vf->workspace_size) l->workspace_size = l->vf->workspace_size; - - resize_convolutional_layer(l->vi, w, h); - if (l->workspace_size < l->vi->workspace_size) l->workspace_size = l->vi->workspace_size; - - resize_convolutional_layer(l->vo, w, h); - if (l->workspace_size < l->vo->workspace_size) l->workspace_size = l->vo->workspace_size; - } - - resize_convolutional_layer(l->wf, w, h); - if (l->workspace_size < l->wf->workspace_size) l->workspace_size = l->wf->workspace_size; - - resize_convolutional_layer(l->wi, w, h); - if (l->workspace_size < l->wi->workspace_size) l->workspace_size = l->wi->workspace_size; - - resize_convolutional_layer(l->wg, w, h); - if (l->workspace_size < l->wg->workspace_size) l->workspace_size = l->wg->workspace_size; - - resize_convolutional_layer(l->wo, w, h); - if (l->workspace_size < l->wo->workspace_size) l->workspace_size = l->wo->workspace_size; - - - resize_convolutional_layer(l->uf, w, h); - if (l->workspace_size < l->uf->workspace_size) l->workspace_size = l->uf->workspace_size; - - resize_convolutional_layer(l->ui, w, h); - if (l->workspace_size < l->ui->workspace_size) l->workspace_size = l->ui->workspace_size; - - resize_convolutional_layer(l->ug, w, h); - if (l->workspace_size < l->ug->workspace_size) l->workspace_size = l->ug->workspace_size; - - resize_convolutional_layer(l->uo, w, h); - if (l->workspace_size < l->uo->workspace_size) l->workspace_size = l->uo->workspace_size; - - l->w = w; - l->h = h; - l->out_h = l->wo->out_h; - l->out_w = l->wo->out_w; - l->outputs = l->wo->outputs; - int outputs = l->outputs; - l->inputs = w*h*l->c; - int steps = l->steps; - int batch = l->batch; - - assert(l->wo->outputs == l->uo->outputs); - - l->output = (float*)xrealloc(l->output, outputs * batch * steps * sizeof(float)); - //l->state = (float*)xrealloc(l->state, outputs * batch * sizeof(float)); - - l->prev_state_cpu = (float*)xrealloc(l->prev_state_cpu, batch*outputs * sizeof(float)); - l->prev_cell_cpu = (float*)xrealloc(l->prev_cell_cpu, batch*outputs * sizeof(float)); - l->cell_cpu = (float*)xrealloc(l->cell_cpu, batch*outputs*steps * sizeof(float)); - - l->f_cpu = (float*)xrealloc(l->f_cpu, batch*outputs * sizeof(float)); - l->i_cpu = (float*)xrealloc(l->i_cpu, batch*outputs * sizeof(float)); - l->g_cpu = (float*)xrealloc(l->g_cpu, batch*outputs * sizeof(float)); - l->o_cpu = (float*)xrealloc(l->o_cpu, batch*outputs * sizeof(float)); - l->c_cpu = (float*)xrealloc(l->c_cpu, batch*outputs * sizeof(float)); - l->h_cpu = (float*)xrealloc(l->h_cpu, batch*outputs * sizeof(float)); - l->temp_cpu = (float*)xrealloc(l->temp_cpu, batch*outputs * sizeof(float)); - l->temp2_cpu = (float*)xrealloc(l->temp2_cpu, batch*outputs * sizeof(float)); - l->temp3_cpu = (float*)xrealloc(l->temp3_cpu, batch*outputs * sizeof(float)); - l->dc_cpu = (float*)xrealloc(l->dc_cpu, batch*outputs * sizeof(float)); - l->dh_cpu = (float*)xrealloc(l->dh_cpu, batch*outputs * sizeof(float)); - l->stored_c_cpu = (float*)xrealloc(l->stored_c_cpu, batch*outputs * sizeof(float)); - l->stored_h_cpu = (float*)xrealloc(l->stored_h_cpu, batch*outputs * sizeof(float)); - -#ifdef GPU - //if (l->state_gpu) cudaFree(l->state_gpu); - //l->state_gpu = cuda_make_array(l->state, batch*l->outputs); - - if (l->output_gpu) cudaFree(l->output_gpu); - l->output_gpu = cuda_make_array(0, batch*outputs*steps); - - if (l->delta_gpu) cudaFree(l->delta_gpu); - l->delta_gpu = cuda_make_array(0, batch*outputs*steps); - - if (l->prev_state_gpu) cudaFree(l->prev_state_gpu); - l->prev_state_gpu = cuda_make_array(0, batch*outputs); - - if (l->prev_cell_gpu) cudaFree(l->prev_cell_gpu); - l->prev_cell_gpu = cuda_make_array(0, batch*outputs); - - if (l->cell_gpu) cudaFree(l->cell_gpu); - l->cell_gpu = cuda_make_array(0, batch*outputs*steps); - - if (l->f_gpu) cudaFree(l->f_gpu); - l->f_gpu = cuda_make_array(0, batch*outputs); - - if (l->i_gpu) cudaFree(l->i_gpu); - l->i_gpu = cuda_make_array(0, batch*outputs); - - if (l->g_gpu) cudaFree(l->g_gpu); - l->g_gpu = cuda_make_array(0, batch*outputs); - - if (l->o_gpu) cudaFree(l->o_gpu); - l->o_gpu = cuda_make_array(0, batch*outputs); - - if (l->c_gpu) cudaFree(l->c_gpu); - l->c_gpu = cuda_make_array(0, batch*outputs); - - if (l->h_gpu) cudaFree(l->h_gpu); - l->h_gpu = cuda_make_array(0, batch*outputs); - - if (l->temp_gpu) cudaFree(l->temp_gpu); - l->temp_gpu = cuda_make_array(0, batch*outputs); - - if (l->temp2_gpu) cudaFree(l->temp2_gpu); - l->temp2_gpu = cuda_make_array(0, batch*outputs); - - if (l->temp3_gpu) cudaFree(l->temp3_gpu); - l->temp3_gpu = cuda_make_array(0, batch*outputs); - - if (l->dc_gpu) cudaFree(l->dc_gpu); - l->dc_gpu = cuda_make_array(0, batch*outputs); - - if (l->dh_gpu) cudaFree(l->dh_gpu); - l->dh_gpu = cuda_make_array(0, batch*outputs); - - if (l->stored_c_gpu) cudaFree(l->stored_c_gpu); - l->stored_c_gpu = cuda_make_array(0, batch*outputs); - - if (l->stored_h_gpu) cudaFree(l->stored_h_gpu); - l->stored_h_gpu = cuda_make_array(0, batch*outputs); - - if (l->last_prev_state_gpu) cudaFree(l->last_prev_state_gpu); - l->last_prev_state_gpu = cuda_make_array(0, batch*outputs); - - if (l->last_prev_cell_gpu) cudaFree(l->last_prev_cell_gpu); - l->last_prev_cell_gpu = cuda_make_array(0, batch*outputs); -#endif -} - -void free_state_conv_lstm(layer l) -{ - int i; - for (i = 0; i < l.outputs * l.batch; ++i) l.h_cpu[i] = 0; - for (i = 0; i < l.outputs * l.batch; ++i) l.c_cpu[i] = 0; - -#ifdef GPU - cuda_push_array(l.h_gpu, l.h_cpu, l.outputs * l.batch); - cuda_push_array(l.c_gpu, l.c_cpu, l.outputs * l.batch); - - //fill_ongpu(l.outputs * l.batch, 0, l.dc_gpu, 1); // dont use - //fill_ongpu(l.outputs * l.batch, 0, l.dh_gpu, 1); // dont use -#endif // GPU -} - -void randomize_state_conv_lstm(layer l) -{ - int i; - for (i = 0; i < l.outputs * l.batch; ++i) l.h_cpu[i] = rand_uniform(-1, 1); - for (i = 0; i < l.outputs * l.batch; ++i) l.c_cpu[i] = rand_uniform(-1, 1); - -#ifdef GPU - cuda_push_array(l.h_gpu, l.h_cpu, l.outputs * l.batch); - cuda_push_array(l.c_gpu, l.c_cpu, l.outputs * l.batch); -#endif // GPU -} - - -void remember_state_conv_lstm(layer l) -{ - memcpy(l.stored_c_cpu, l.c_cpu, l.outputs * l.batch * sizeof(float)); - memcpy(l.stored_h_cpu, l.h_cpu, l.outputs * l.batch * sizeof(float)); - -#ifdef GPU - copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.stored_c_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.h_gpu, 1, l.stored_h_gpu, 1); -#endif // GPU -} - -void restore_state_conv_lstm(layer l) -{ - memcpy(l.c_cpu, l.stored_c_cpu, l.outputs * l.batch * sizeof(float)); - memcpy(l.h_cpu, l.stored_h_cpu, l.outputs * l.batch * sizeof(float)); - -#ifdef GPU - copy_ongpu(l.outputs*l.batch, l.stored_c_gpu, 1, l.c_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.stored_h_gpu, 1, l.h_gpu, 1); -#endif // GPU -} - -void forward_conv_lstm_layer(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - int i; - layer vf = *(l.vf); - layer vi = *(l.vi); - layer vo = *(l.vo); - - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - if (state.train) { - if (l.peephole) { - fill_cpu(l.outputs * l.batch * l.steps, 0, vf.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, vi.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, vo.delta, 1); - } - - fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); - - fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); - - fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); - } - - for (i = 0; i < l.steps; ++i) - { - if (l.peephole) { - assert(l.outputs == vf.out_w * vf.out_h * vf.out_c); - s.input = l.c_cpu; - forward_convolutional_layer(vf, s); - forward_convolutional_layer(vi, s); - // vo below - } - - assert(l.outputs == wf.out_w * wf.out_h * wf.out_c); - assert(wf.c == l.out_c && wi.c == l.out_c && wg.c == l.out_c && wo.c == l.out_c); - - s.input = l.h_cpu; - forward_convolutional_layer(wf, s); - forward_convolutional_layer(wi, s); - forward_convolutional_layer(wg, s); - forward_convolutional_layer(wo, s); - - assert(l.inputs == uf.w * uf.h * uf.c); - assert(uf.c == l.c && ui.c == l.c && ug.c == l.c && uo.c == l.c); - - s.input = state.input; - forward_convolutional_layer(uf, s); - forward_convolutional_layer(ui, s); - forward_convolutional_layer(ug, s); - forward_convolutional_layer(uo, s); - - // f = wf + uf + vf - copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vf.output, 1, l.f_cpu, 1); - - // i = wi + ui + vi - copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vi.output, 1, l.i_cpu, 1); - - // g = wg + ug - copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); - - activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.g_cpu, l.outputs*l.batch, TANH); - - // c = f*c + i*g - copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); - - // o = wo + uo + vo(c_new) - if (l.peephole) { - s.input = l.c_cpu; - forward_convolutional_layer(vo, s); - } - copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vo.output, 1, l.o_cpu, 1); - activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); - - // h = o * tanh(c) - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); - activate_array(l.h_cpu, l.outputs*l.batch, TANH); - mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); - - if (l.state_constrain) constrain_cpu(l.outputs*l.batch, l.state_constrain, l.c_cpu); - fix_nan_and_inf_cpu(l.c_cpu, l.outputs*l.batch); - fix_nan_and_inf_cpu(l.h_cpu, l.outputs*l.batch); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); - copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); - - state.input += l.inputs*l.batch; - l.output += l.outputs*l.batch; - l.cell_cpu += l.outputs*l.batch; - - if (l.peephole) { - increment_layer(&vf, 1); - increment_layer(&vi, 1); - increment_layer(&vo, 1); - } - - increment_layer(&wf, 1); - increment_layer(&wi, 1); - increment_layer(&wg, 1); - increment_layer(&wo, 1); - - increment_layer(&uf, 1); - increment_layer(&ui, 1); - increment_layer(&ug, 1); - increment_layer(&uo, 1); - } -} - -void backward_conv_lstm_layer(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer vf = *(l.vf); - layer vi = *(l.vi); - layer vo = *(l.vo); - - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - if (l.peephole) { - increment_layer(&vf, l.steps - 1); - increment_layer(&vi, l.steps - 1); - increment_layer(&vo, l.steps - 1); - } - - increment_layer(&wf, l.steps - 1); - increment_layer(&wi, l.steps - 1); - increment_layer(&wg, l.steps - 1); - increment_layer(&wo, l.steps - 1); - - increment_layer(&uf, l.steps - 1); - increment_layer(&ui, l.steps - 1); - increment_layer(&ug, l.steps - 1); - increment_layer(&uo, l.steps - 1); - - state.input += l.inputs*l.batch*(l.steps - 1); - if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); - - l.output += l.outputs*l.batch*(l.steps - 1); - l.cell_cpu += l.outputs*l.batch*(l.steps - 1); - l.delta += l.outputs*l.batch*(l.steps - 1); - - for (i = l.steps - 1; i >= 0; --i) { - if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); - copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); - if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); - copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); - - l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; - - // f = wf + uf + vf - copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vf.output, 1, l.f_cpu, 1); - - // i = wi + ui + vi - copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vi.output, 1, l.i_cpu, 1); - - // g = wg + ug - copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); - - // o = wo + uo + vo - copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); - if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vo.output, 1, l.o_cpu, 1); - - activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.g_cpu, l.outputs*l.batch, TANH); - activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); - - copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); - activate_array(l.temp_cpu, l.outputs*l.batch, TANH); - - copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); - mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); - - gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); - axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); - // temp = tanh(c) - // temp2 = delta * o * grad_tanh(tanh(c)) - // temp3 = delta - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); - activate_array(l.temp_cpu, l.outputs*l.batch, TANH); - mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); - gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - // delta for o(w,u,v): temp = delta * tanh(c) * grad_logistic(o) - // delta for c,f,i,g(w,u,v): temp2 = delta * o * grad_tanh(tanh(c)) + delta_c(???) - // delta for output: temp3 = delta - - // o - // delta for O(w,u,v): temp = delta * tanh(c) * grad_logistic(o) - if (l.peephole) { - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vo.delta, 1); - s.input = l.cell_cpu; - //s.delta = l.dc_cpu; - backward_convolutional_layer(vo, s); - } - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); - s.input = l.prev_state_cpu; - //s.delta = l.dh_cpu; - backward_convolutional_layer(wo, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_convolutional_layer(uo, s); - - // g - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); - gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); - // delta for c,f,i,g(w,u,v): temp2 = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * g * grad_logistic(i) - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); - s.input = l.prev_state_cpu; - //s.delta = l.dh_cpu; - backward_convolutional_layer(wg, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_convolutional_layer(ug, s); - - // i - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); - gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - // delta for c,f,i,g(w,u,v): temp2 = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * g * grad_logistic(i) - - if (l.peephole) { - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vi.delta, 1); - s.input = l.prev_cell_cpu; - //s.delta = l.dc_cpu; - backward_convolutional_layer(vi, s); - } - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); - s.input = l.prev_state_cpu; - //s.delta = l.dh_cpu; - backward_convolutional_layer(wi, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_convolutional_layer(ui, s); - - // f - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); - gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - // delta for c,f,i,g(w,u,v): temp2 = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * c * grad_logistic(f) - - if (l.peephole) { - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vf.delta, 1); - s.input = l.prev_cell_cpu; - //s.delta = l.dc_cpu; - backward_convolutional_layer(vf, s); - } - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); - s.input = l.prev_state_cpu; - //s.delta = l.dh_cpu; - backward_convolutional_layer(wf, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_convolutional_layer(uf, s); - - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); - - state.input -= l.inputs*l.batch; - if (state.delta) state.delta -= l.inputs*l.batch; - l.output -= l.outputs*l.batch; - l.cell_cpu -= l.outputs*l.batch; - l.delta -= l.outputs*l.batch; - - if (l.peephole) { - increment_layer(&vf, -1); - increment_layer(&vi, -1); - increment_layer(&vo, -1); - } - - increment_layer(&wf, -1); - increment_layer(&wi, -1); - increment_layer(&wg, -1); - increment_layer(&wo, -1); - - increment_layer(&uf, -1); - increment_layer(&ui, -1); - increment_layer(&ug, -1); - increment_layer(&uo, -1); - } -} - -#ifdef GPU -void pull_conv_lstm_layer(layer l) -{ - if (l.peephole) { - pull_convolutional_layer(*(l.vf)); - pull_convolutional_layer(*(l.vi)); - pull_convolutional_layer(*(l.vo)); - } - pull_convolutional_layer(*(l.wf)); - if (!l.bottleneck) { - pull_convolutional_layer(*(l.wi)); - pull_convolutional_layer(*(l.wg)); - pull_convolutional_layer(*(l.wo)); - } - pull_convolutional_layer(*(l.uf)); - pull_convolutional_layer(*(l.ui)); - pull_convolutional_layer(*(l.ug)); - pull_convolutional_layer(*(l.uo)); -} - -void push_conv_lstm_layer(layer l) -{ - if (l.peephole) { - push_convolutional_layer(*(l.vf)); - push_convolutional_layer(*(l.vi)); - push_convolutional_layer(*(l.vo)); - } - push_convolutional_layer(*(l.wf)); - if (!l.bottleneck) { - push_convolutional_layer(*(l.wi)); - push_convolutional_layer(*(l.wg)); - push_convolutional_layer(*(l.wo)); - } - push_convolutional_layer(*(l.uf)); - push_convolutional_layer(*(l.ui)); - push_convolutional_layer(*(l.ug)); - push_convolutional_layer(*(l.uo)); -} - -void update_conv_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - if (l.peephole) { - update_convolutional_layer_gpu(*(l.vf), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.vi), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.vo), batch, learning_rate, momentum, decay, loss_scale); - } - update_convolutional_layer_gpu(*(l.wf), batch, learning_rate, momentum, decay, loss_scale); - if (!l.bottleneck) { - update_convolutional_layer_gpu(*(l.wi), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.wg), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.wo), batch, learning_rate, momentum, decay, loss_scale); - } - update_convolutional_layer_gpu(*(l.uf), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.ui), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.ug), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.uo), batch, learning_rate, momentum, decay, loss_scale); -} - -void forward_conv_lstm_layer_gpu(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - int i; - layer vf = *(l.vf); - layer vi = *(l.vi); - layer vo = *(l.vo); - - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - if (state.train) { - if (l.peephole) { - fill_ongpu(l.outputs * l.batch * l.steps, 0, vf.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, vi.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, vo.delta_gpu, 1); - } - - fill_ongpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); - if (!l.bottleneck) { - fill_ongpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); - } - - fill_ongpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); - } - - for (i = 0; i < l.steps; ++i) - { - if (l.peephole) { - assert(l.outputs == vf.out_w * vf.out_h * vf.out_c); - s.input = l.c_gpu; - forward_convolutional_layer_gpu(vf, s); - forward_convolutional_layer_gpu(vi, s); - // vo below - } - - if (l.bottleneck) { - // l.bottelneck_hi_gpu size is 2x - simple_copy_ongpu(l.outputs*l.batch, l.h_gpu, l.bottelneck_hi_gpu); - simple_copy_ongpu(l.outputs*l.batch, state.input, l.bottelneck_hi_gpu + l.outputs*l.batch); - s.input = l.bottelneck_hi_gpu; - forward_convolutional_layer_gpu(wf, s); // 2x input channels - activate_array_ongpu(wf.output_gpu, l.outputs*l.batch, l.lstm_activation); - s.input = wf.output_gpu; - } - else { - assert(l.outputs == wf.out_w * wf.out_h * wf.out_c); - assert(wf.c == l.out_c && wi.c == l.out_c && wg.c == l.out_c && wo.c == l.out_c); - - s.input = l.h_gpu; - forward_convolutional_layer_gpu(wf, s); - forward_convolutional_layer_gpu(wi, s); - forward_convolutional_layer_gpu(wg, s); - forward_convolutional_layer_gpu(wo, s); - - s.input = state.input; - } - - assert(l.inputs == uf.w * uf.h * uf.c); - assert(uf.c == l.c && ui.c == l.c && ug.c == l.c && uo.c == l.c); - - forward_convolutional_layer_gpu(uf, s); - forward_convolutional_layer_gpu(ui, s); - forward_convolutional_layer_gpu(ug, s); - forward_convolutional_layer_gpu(uo, s); - - // f = wf + uf + vf - add_3_arrays_activate((l.bottleneck)?NULL:wf.output_gpu, uf.output_gpu, (l.peephole)?vf.output_gpu:NULL, l.outputs*l.batch, LOGISTIC, l.f_gpu); - //copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vf.output_gpu, 1, l.f_gpu, 1); - //activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); - - // i = wi + ui + vi - add_3_arrays_activate((l.bottleneck)?NULL:wi.output_gpu, ui.output_gpu, (l.peephole) ? vi.output_gpu : NULL, l.outputs*l.batch, LOGISTIC, l.i_gpu); - //copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vi.output_gpu, 1, l.i_gpu, 1); - //activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); - - // g = wg + ug - add_3_arrays_activate((l.bottleneck)?NULL:wg.output_gpu, ug.output_gpu, NULL, l.outputs*l.batch, l.lstm_activation, l.g_gpu); - //copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); - //activate_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH); - - // c = f*c + i*g - sum_of_mults(l.f_gpu, l.c_gpu, l.i_gpu, l.g_gpu, l.outputs*l.batch, l.c_gpu); // decreases mAP??? - //copy_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); - //mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); - //mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); - - // o = wo + uo + vo(c_new) - if (l.peephole) { - s.input = l.c_gpu; - forward_convolutional_layer_gpu(vo, s); - } - add_3_arrays_activate((l.bottleneck)?NULL:wo.output_gpu, uo.output_gpu, (l.peephole) ? vo.output_gpu : NULL, l.outputs*l.batch, LOGISTIC, l.o_gpu); - //copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vo.output_gpu, 1, l.o_gpu, 1); - //activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); - - // h = o * tanh(c) - activate_and_mult(l.c_gpu, l.o_gpu, l.outputs*l.batch, l.lstm_activation, l.h_gpu); - //simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.h_gpu); - //activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH); - //mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); - - fix_nan_and_inf(l.c_gpu, l.outputs*l.batch); // should be fix_nan_and_inf() - fix_nan_and_inf(l.h_gpu, l.outputs*l.batch); // should be fix_nan_and_inf() - if (l.state_constrain) constrain_ongpu(l.outputs*l.batch, l.state_constrain, l.c_gpu, 1); - - if(state.train) simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.cell_gpu); - simple_copy_ongpu(l.outputs*l.batch, l.h_gpu, l.output_gpu); // is required for both Detection and Training - - if (l.shortcut) { - // partial residual connection - if (l.bottleneck) axpy_ongpu(l.outputs*l.batch/2, 1, wf.output_gpu, 1, l.output_gpu, 1); - //else axpy_ongpu(l.outputs*l.batch, 1, l.f_gpu, 1, l.output_gpu, 1); - } - - state.input += l.inputs*l.batch; - l.output_gpu += l.outputs*l.batch; - l.cell_gpu += l.outputs*l.batch; - - if (l.peephole) { - increment_layer(&vf, 1); - increment_layer(&vi, 1); - increment_layer(&vo, 1); - } - - increment_layer(&wf, 1); - increment_layer(&wi, 1); - increment_layer(&wg, 1); - increment_layer(&wo, 1); - - increment_layer(&uf, 1); - increment_layer(&ui, 1); - increment_layer(&ug, 1); - increment_layer(&uo, 1); - } -} - -void backward_conv_lstm_layer_gpu(layer l, network_state state) -{ - float *last_output = l.output_gpu + l.outputs*l.batch*(l.steps - 1); - float *last_cell = l.cell_gpu + l.outputs*l.batch*(l.steps - 1); - - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - int i; - layer vf = *(l.vf); - layer vi = *(l.vi); - layer vo = *(l.vo); - - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - if (l.peephole) { - increment_layer(&vf, l.steps - 1); - increment_layer(&vi, l.steps - 1); - increment_layer(&vo, l.steps - 1); - } - - increment_layer(&wf, l.steps - 1); - increment_layer(&wi, l.steps - 1); - increment_layer(&wg, l.steps - 1); - increment_layer(&wo, l.steps - 1); - - increment_layer(&uf, l.steps - 1); - increment_layer(&ui, l.steps - 1); - increment_layer(&ug, l.steps - 1); - increment_layer(&uo, l.steps - 1); - - state.input += l.inputs*l.batch*(l.steps - 1); - if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); - - l.output_gpu += l.outputs*l.batch*(l.steps - 1); - l.cell_gpu += l.outputs*l.batch*(l.steps - 1); - l.delta_gpu += l.outputs*l.batch*(l.steps - 1); - - //fill_ongpu(l.outputs * l.batch, 0, l.dc_gpu, 1); // dont use - const int sequence = get_sequence_value(state.net); - - for (i = l.steps - 1; i >= 0; --i) { - if (i != 0) simple_copy_ongpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, l.prev_cell_gpu); - //else fill_ongpu(l.outputs * l.batch, 0, l.prev_cell_gpu, 1); // dont use - else if (state.net.current_subdivision % sequence != 0) simple_copy_ongpu(l.outputs*l.batch, l.last_prev_cell_gpu, l.prev_cell_gpu); - - simple_copy_ongpu(l.outputs*l.batch, l.cell_gpu, l.c_gpu); - - if (i != 0) simple_copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, l.prev_state_gpu); - //else fill_ongpu(l.outputs * l.batch, 0, l.prev_state_gpu, 1); // dont use - else if (state.net.current_subdivision % sequence != 0) simple_copy_ongpu(l.outputs*l.batch, l.last_prev_state_gpu, l.prev_state_gpu); - - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.h_gpu); - - l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; - - // f = wf + uf + vf - add_3_arrays_activate((l.bottleneck) ? NULL : wf.output_gpu, uf.output_gpu, (l.peephole) ? vf.output_gpu : NULL, l.outputs*l.batch, LOGISTIC, l.f_gpu); - //copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vf.output_gpu, 1, l.f_gpu, 1); - //activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); - - // i = wi + ui + vi - add_3_arrays_activate((l.bottleneck) ? NULL : wi.output_gpu, ui.output_gpu, (l.peephole) ? vi.output_gpu : NULL, l.outputs*l.batch, LOGISTIC, l.i_gpu); - //copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vi.output_gpu, 1, l.i_gpu, 1); - //activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); - - // g = wg + ug - add_3_arrays_activate((l.bottleneck) ? NULL : wg.output_gpu, ug.output_gpu, NULL, l.outputs*l.batch, l.lstm_activation, l.g_gpu); // TANH - //copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); - //activate_array_ongpu(l.g_gpu, l.outputs*l.batch, l.lstm_activation); - - // o = wo + uo + vo - add_3_arrays_activate((l.bottleneck) ? NULL : wo.output_gpu, uo.output_gpu, (l.peephole) ? vo.output_gpu : NULL, l.outputs*l.batch, LOGISTIC, l.o_gpu); - //copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); - //axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); - //if (l.peephole) axpy_ongpu(l.outputs*l.batch, 1, vo.output_gpu, 1, l.o_gpu, 1); - //activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); - - - simple_copy_ongpu(l.outputs*l.batch, l.delta_gpu, l.temp3_gpu); // temp3 = delta - - simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.temp_gpu); - activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, l.lstm_activation); // temp = tanh(c) - - simple_copy_ongpu(l.outputs*l.batch, l.temp3_gpu, l.temp2_gpu); - mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); // temp2 = delta * o - - gradient_array_ongpu(l.temp_gpu, l.outputs*l.batch, l.lstm_activation, l.temp2_gpu); // temp2 = delta * o * grad_tanh(tanh(c)) - //??? - axpy_ongpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); // temp2 = delta * o * grad_tanh(tanh(c)) + delta_c(???) - // temp = tanh(c) - // temp2 = delta * o * grad_tanh(tanh(c)) + delta_c(???) - // temp3 = delta - - simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.temp_gpu); - activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, l.lstm_activation); // temp = tanh(c) - - mul_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); // temp = delta * tanh(c) - gradient_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); // temp = delta * tanh(c) * grad_logistic(o) - // delta for o(w,u,v): temp = delta * tanh(c) * grad_logistic(o) - // delta for c,f,i,g(w,u,v): temp2 = delta * o * grad_tanh(tanh(c)) + delta_c(???) - // delta for output: temp3 = delta - - // o - // delta for O(w,u,v): temp = delta * tanh(c) * grad_logistic(o) - if (l.peephole) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vo.delta_gpu); - s.input = l.cell_gpu; - //s.delta = l.dc_gpu; - backward_convolutional_layer_gpu(vo, s); - } - - if (!l.bottleneck) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wo.delta_gpu); - s.input = l.prev_state_gpu; - s.delta = l.temp3_gpu;// s.delta = l.dh_gpu; - fill_ongpu(l.outputs * l.batch, 0, l.temp3_gpu, 1); - backward_convolutional_layer_gpu(wo, s); - } - - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, uo.delta_gpu); - if (l.bottleneck) { - s.input = wf.output_gpu; - s.delta = wf.delta_gpu; - } - else { - s.input = state.input; - s.delta = state.delta; - } - backward_convolutional_layer_gpu(uo, s); - - // g - simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); - mul_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.g_gpu, l.outputs*l.batch, l.lstm_activation, l.temp_gpu); - // delta for c,f,i,g(w,u,v): temp = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * i * grad_tanh(g) - - if (!l.bottleneck) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wg.delta_gpu); - s.input = l.prev_state_gpu; - s.delta = l.temp3_gpu;// s.delta = l.dh_gpu; // comment this - backward_convolutional_layer_gpu(wg, s); // lead to nan - } - - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, ug.delta_gpu); - if (l.bottleneck) { - s.input = wf.output_gpu; - s.delta = wf.delta_gpu; - } - else { - s.input = state.input; - s.delta = state.delta; - } - backward_convolutional_layer_gpu(ug, s); - - // i - simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); - mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); - // delta for c,f,i,g(w,u,v): temp = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * g * grad_logistic(i) - - if (l.peephole) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vi.delta_gpu); - s.input = l.prev_cell_gpu; - //s.delta = l.dc_gpu; - backward_convolutional_layer_gpu(vi, s); - } - - if (!l.bottleneck) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wi.delta_gpu); - s.input = l.prev_state_gpu; - s.delta = l.temp3_gpu;// s.delta = l.dh_gpu; // comment this - backward_convolutional_layer_gpu(wi, s); // lead to nan (after 1000 it) - } - - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, ui.delta_gpu); - if (l.bottleneck) { - s.input = wf.output_gpu; - s.delta = wf.delta_gpu; - } - else { - s.input = state.input; - s.delta = state.delta; - } - backward_convolutional_layer_gpu(ui, s); - - // f - simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); - mul_ongpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); - // delta for c,f,i,g(w,u,v): temp = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * c * grad_logistic(f) - - if (l.peephole) { - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vf.delta_gpu); - s.input = l.prev_cell_gpu; - //s.delta = l.dc_gpu; - backward_convolutional_layer_gpu(vf, s); - } - - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, uf.delta_gpu); - if (l.bottleneck) { - s.input = wf.output_gpu; - s.delta = wf.delta_gpu; - } - else { - s.input = state.input; - s.delta = state.delta; - } - backward_convolutional_layer_gpu(uf, s); - - - if (l.bottleneck) { - // l.bottelneck_hi_gpu size is 2x - simple_copy_ongpu(l.outputs*l.batch, l.prev_state_gpu, l.bottelneck_hi_gpu); - simple_copy_ongpu(l.outputs*l.batch, state.input, l.bottelneck_hi_gpu + l.outputs*l.batch); - fill_ongpu(l.outputs * l.batch * 2, 0, l.bottelneck_delta_gpu, 1); - s.input = l.bottelneck_hi_gpu; - s.delta = l.bottelneck_delta_gpu; - if (l.shortcut) axpy_ongpu(l.outputs*l.batch/2, 1, l.delta_gpu, 1, wf.delta_gpu, 1); // partial residual connection - gradient_array_ongpu(wf.output_gpu, l.outputs*l.batch, l.lstm_activation, wf.delta_gpu); - - reset_nan_and_inf(wf.delta_gpu, l.outputs*l.batch); - constrain_ongpu(l.outputs*l.batch, 1, wf.delta_gpu, 1); - } - else { - s.input = l.prev_state_gpu; - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wf.delta_gpu); - s.delta = l.temp3_gpu;// s.delta = l.dh_gpu; - } - - // WF - backward_convolutional_layer_gpu(wf, s); - - if (l.bottleneck) { - reset_nan_and_inf(l.bottelneck_delta_gpu, l.outputs*l.batch*2); - //constrain_ongpu(l.outputs*l.batch*2, 1, l.bottelneck_delta_gpu, 1); - if (l.dh_gpu) axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.bottelneck_delta_gpu, 1, l.dh_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, l.bottelneck_delta_gpu + l.outputs*l.batch, 1, state.delta, 1); // lead to nan - } - else { - axpy_ongpu(l.outputs*l.batch, l.time_normalizer, l.temp3_gpu, 1, l.dh_gpu, 1); - } - - // c - simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); - mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); - simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, l.dc_gpu); - reset_nan_and_inf(l.dc_gpu, l.outputs*l.batch); - if (i != 0) reset_nan_and_inf(l.dh_gpu, l.outputs*l.batch); - // delta for c,f,i,g(w,u,v): delta_c = temp = (delta * o * grad_tanh(tanh(c)) + delta_c(???)) * f // (grad_linear(c)==1) - - state.input -= l.inputs*l.batch; - if (state.delta) state.delta -= l.inputs*l.batch; // new delta: state.delta = prev_layer.delta_gpu; - l.output_gpu -= l.outputs*l.batch; - l.cell_gpu -= l.outputs*l.batch; - l.delta_gpu -= l.outputs*l.batch; - - if (l.peephole) { - increment_layer(&vf, -1); - increment_layer(&vi, -1); - increment_layer(&vo, -1); - } - - increment_layer(&wf, -1); - increment_layer(&wi, -1); - increment_layer(&wg, -1); - increment_layer(&wo, -1); - - increment_layer(&uf, -1); - increment_layer(&ui, -1); - increment_layer(&ug, -1); - increment_layer(&uo, -1); - } - - simple_copy_ongpu(l.outputs*l.batch, last_output, l.last_prev_state_gpu); - simple_copy_ongpu(l.outputs*l.batch, last_cell, l.last_prev_cell_gpu); - - // free state after each 100 iterations - //if (get_current_batch(state.net) % 100) free_state_conv_lstm(l); // dont use -} -#endif diff --git a/src/Detector/darknet/src/conv_lstm_layer.h b/src/Detector/darknet/src/conv_lstm_layer.h deleted file mode 100644 index fae59f149..000000000 --- a/src/Detector/darknet/src/conv_lstm_layer.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef CONV_LSTM_LAYER_H -#define CONV_LSTM_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" -#define USET - -#ifdef __cplusplus -extern "C" { -#endif -layer make_conv_lstm_layer(int batch, int h, int w, int c, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int peephole, int xnor, int bottleneck, int train); -void resize_conv_lstm_layer(layer *l, int w, int h); -void free_state_conv_lstm(layer l); -void randomize_state_conv_lstm(layer l); -void remember_state_conv_lstm(layer l); -void restore_state_conv_lstm(layer l); - -void forward_conv_lstm_layer(layer l, network_state state); -void backward_conv_lstm_layer(layer l, network_state state); -void update_conv_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -layer make_history_layer(int batch, int h, int w, int c, int history_size, int steps, int train); -void forward_history_layer(layer l, network_state state); -void backward_history_layer(layer l, network_state state); - -#ifdef GPU -void forward_conv_lstm_layer_gpu(layer l, network_state state); -void backward_conv_lstm_layer_gpu(layer l, network_state state); -void update_conv_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale); - -void forward_history_layer_gpu(const layer l, network_state state); -void backward_history_layer_gpu(const layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif // CONV_LSTM_LAYER_H diff --git a/src/Detector/darknet/src/convolutional_kernels.cu b/src/Detector/darknet/src/convolutional_kernels.cu deleted file mode 100644 index b1aa4e698..000000000 --- a/src/Detector/darknet/src/convolutional_kernels.cu +++ /dev/null @@ -1,1404 +0,0 @@ -#include -#include -#include - -#include "convolutional_layer.h" -#include "batchnorm_layer.h" -#include "gemm.h" -#include "blas.h" -#include "im2col.h" -#include "col2im.h" -#include "utils.h" -#include "dark_cuda.h" -#include "box.h" - - -__global__ void binarize_kernel(float *x, int n, float *binary) -{ - int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (i >= n) return; - binary[i] = (x[i] >= 0) ? 1 : -1; -} - -void binarize_gpu(float *x, int n, float *binary) -{ - binarize_kernel<<>>(x, n, binary); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void binarize_input_kernel(float *input, int n, int size, float *binary) -{ - int s = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (s >= size) return; - int i = 0; - float mean = 0; - for(i = 0; i < n; ++i){ - mean += fabs(input[i*size + s]); - } - mean = mean / n; - for(i = 0; i < n; ++i){ - binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; - } -} - -void binarize_input_gpu(float *input, int n, int size, float *binary) -{ - binarize_input_kernel<<>>(input, n, size, binary); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void binarize_weights_kernel(float *weights, int n, int size, float *binary) -{ - int f = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (f >= n) return; - int i = 0; - float mean = 0; - for (i = 0; i < size; ++i) { - mean += fabs(weights[f*size + i]); - } - mean = mean / size; - for (i = 0; i < size; ++i) { - binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean; - //binary[f*size + i] = weights[f*size + i]; - } -} - -void binarize_weights_gpu(float *weights, int n, int size, float *binary) -{ - binarize_weights_kernel << > >(weights, n, size, binary); - CHECK_CUDA(cudaPeekAtLastError()); -} - - -__global__ void set_zero_kernel(float *src, int size) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < size) src[i] = 0; -} - -__inline__ __device__ -float warpAllReduceSum(float val) { - for (int mask = WARP_SIZE / 2; mask > 0; mask /= 2) -#if CUDART_VERSION >= 9000 - val += __shfl_xor_sync(0xffffffff, val, mask); -#else - val += __shfl_xor(val, mask); -#endif - return val; -} - -// only if (size % 32 == 0) -__global__ void reduce_kernel(float *weights, int n, int size, float *mean_arr_gpu) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - int f = i / size; - if (f >= n) return; - float warp_mean = warpAllReduceSum(fabs(weights[i])); - if(i % 32 == 0) - atomicAdd(&mean_arr_gpu[f], warp_mean / size); -} - -__global__ void binarize_weights_mean_kernel(float *weights, int n, int size, float *binary, float *mean_arr_gpu) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - int f = i / size; - if (f >= n) return; - float mean = mean_arr_gpu[f]; - binary[i] = (weights[i] > 0) ? mean : -mean; -} - -void fast_binarize_weights_gpu(float *weights, int n, int size, float *binary, float *mean_arr_gpu) -{ - if (size % 32 == 0) { - size_t gridsize = n * size; - const int num_blocks = get_number_of_blocks(gridsize, BLOCK);// gridsize / BLOCK + 1; - - set_zero_kernel << <(n/BLOCK + 1), BLOCK, 0, get_cuda_stream() >> > (mean_arr_gpu, n); - reduce_kernel << > > (weights, n, size, mean_arr_gpu); - binarize_weights_mean_kernel << > > (weights, n, size, binary, mean_arr_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - } - else { - binarize_weights_gpu(weights, n, size, binary); - } -} - - -__global__ void cuda_f32_to_f16(float* input_f32, size_t size, half *output_f16) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) output_f16[idx] = __float2half(input_f32[idx]); - //if (idx < size) output_f16[idx] = __float2half_rn(input_f32[idx]); // can't be compiled on Linux without casting - // __float2half_ru, __float2half_rd, __float2half_rz, __float2half_rn - //if (idx < size) *((unsigned short *)output_f16 + idx) = __float2half(input_f32[idx]); -} - -void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16) { - cuda_f32_to_f16 <<< get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >>> (input_f32, size, (half *)output_f16); - CHECK_CUDA(cudaPeekAtLastError()); -} - -__global__ void cuda_f16_to_f32(half* input_f16, size_t size, float *output_f32) -{ - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < size) output_f32[idx] = __half2float(input_f16[idx]); - //if (idx < size) output_f32[idx] = __half2float(*((unsigned short *)input_f16 + idx)); -} - -void cuda_convert_f16_to_f32(float* input_f16, size_t size, float *output_f32) { - cuda_f16_to_f32 <<< get_number_of_blocks(size, BLOCK), BLOCK, 0, get_cuda_stream() >>> ((half *)input_f16, size, output_f32); - CHECK_CUDA(cudaPeekAtLastError()); -} - -half *cuda_make_f16_from_f32_array(float *src, size_t n) -{ - half *dst16; - size_t size = sizeof(half)*n; - CHECK_CUDA(cudaMalloc((void **)&dst16, size)); - if (src) { - assert(n > 0); - cuda_convert_f32_to_f16(src, n, (float *)dst16); - } - if (!dst16) error("Cuda malloc failed\n"); - return dst16; -} - -void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) -{ - //fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - if(l.binary){ - binarize_weights_gpu(l.weights_gpu, l.n, (l.c / l.groups)*l.size*l.size, l.binary_weights_gpu); - swap_binary(&l); - } - - if(l.xnor){ - if (!l.align_bit_weights_gpu || state.train) { - //binarize_weights_gpu(l.weights_gpu, l.n, (l.c / l.groups)*l.size*l.size, l.binary_weights_gpu); - - fast_binarize_weights_gpu(l.weights_gpu, l.n, (l.c / l.groups)*l.size*l.size, l.binary_weights_gpu, l.mean_arr_gpu); - } - - if (l.align_bit_weights_gpu && !state.train && l.c >= 32 && l.stride_x == l.stride_y) - { - //return; - //cudaError_t status = cudaSuccess; - //int input_size = l.c*l.h*l.w*l.batch; - - int m = l.n / l.groups; - int k = l.size*l.size*l.c / l.groups; - int n = l.out_w*l.out_h; - //float * a = l.weights_gpu; - - // int i, j; - // for(i = 0; i < l.batch; ++i){ - // for (j = 0; j < l.groups; ++j) { - - int ldb_align = l.lda_align; - size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - //size_t t_intput_size = new_ldb * n; - //size_t t_bit_input_size = t_intput_size / 8;// +1; - - if (l.c % 32 == 0) - { - //printf("\n\n l.index = %d, l.w = %d, l.c = %d, l.n = %d, l.stride = %d, l.pad = %d - new XNOR \n", l.index, l.w, l.c, l.n, l.stride, l.pad); - //printf("l.align_workspace_size = %d, (l.c * l.w * l.h) = %d \n", l.align_workspace_size, (l.c * l.w * l.h)); - - //float *intput_cpu = (float *)calloc(l.inputs, sizeof(float)); - // state.input - //cudaMemcpy(intput_cpu, state.input, l.inputs * sizeof(float), cudaMemcpyDefault); - - int ldb_align = l.lda_align; - size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - //size_t t_intput_size = new_ldb * l.bit_align;// n; - //size_t t_bit_input_size = t_intput_size / 8;// +1; - - const int new_c = l.c / 32; - - //float *re_packed_input = (float *)calloc(l.c * l.w * l.h, sizeof(float)); - //uint32_t *bin_re_packed_input = (uint32_t *)calloc(new_c * l.w * l.h + 1, sizeof(uint32_t)); - - // float32x4 by channel (as in cuDNN) - //repack_input(intput_cpu, re_packed_input, l.w, l.h, l.c); - - - // 32 x floats -> 1 x uint32_t - //float_to_bit(re_packed_input, (uint8_t *)bin_re_packed_input, l.c * l.w * l.h); - - //cudaDeviceSynchronize(); - //start_timer(); - - repack_input_gpu_bin(state.input, (uint32_t *)l.align_workspace_gpu, l.w, l.h, l.c); - - //repack_input_gpu(state.input, state.workspace, l.w, l.h, l.c); - - // 32 x floats -> 1 x uint32_t - //float_to_bit_gpu(state.workspace, (unsigned char *)l.align_workspace_gpu, l.c * l.w * l.h);// l.align_workspace_size); - - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("repack_input_gpu + float_to_bit_gpu"); - - //free(re_packed_input); - - // slow - convolution the packed inputs and weights: float x 32 by channel (as in cuDNN) - //convolution_repacked((uint32_t *)bin_re_packed_input, (uint32_t *)l.align_bit_weights, l.output, - // l.w, l.h, l.c, l.n, l.size, l.pad, l.new_lda, l.mean_arr); - - // // then exit from if() - - //float *b = state.workspace; - //float *b = (float *)calloc(100 * 1024 * 1024, sizeof(float)); - //float *c = l.output; - //memset(c, 0, l.outputs * sizeof(float)); - - - //im2col_cpu_custom((float *)bin_re_packed_input, new_c, l.h, l.w, l.size, l.stride, l.pad, b); - - //cudaMemcpy(l.align_workspace_gpu, bin_re_packed_input, (new_c * l.w * l.h + 1) * sizeof(uint32_t), cudaMemcpyDefault); - - //start_timer(); - im2col_ongpu(l.align_workspace_gpu, new_c, l.h, l.w, l.size, l.stride, l.pad, state.workspace); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("im2col_ongpu"); - - //free(bin_re_packed_input); - - int new_k = l.size*l.size*l.c / 32; - - // good for (l.c == 64) - //gemm_nn_bin_32bit_packed(m, n, new_k, 1, - // l.align_bit_weights, l.new_lda/32, - // b, n, - // c, n, l.mean_arr); - - // // then exit from if() - - - //size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - //size_t t_intput_size = new_ldb * l.bit_align;// n; - //size_t t_bit_input_size = t_intput_size / 8;// +1; - - //char *t_bit_input = (char *)calloc(t_bit_input_size, sizeof(char)); - //transpose_uint32((uint32_t *)b, (uint32_t *)t_bit_input, new_k, n, n, new_ldb); - //cudaMemcpy(l.transposed_align_workspace_gpu, t_bit_input, t_bit_input_size * sizeof(char), cudaMemcpyDefault); - - //cudaMemcpy(state.workspace, b, t_bit_input_size * sizeof(char), cudaMemcpyDefault); - //printf("\n n = %d, n % 32 = %d, new_ldb = %d, new_ldb % 32 = %d \n", n, n % 32, new_ldb, new_ldb % 32); - - //start_timer(); - transpose_uint32_gpu((uint32_t *)state.workspace, (uint32_t *)l.transposed_align_workspace_gpu, new_k, n, n, new_ldb); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("transpose_uint32_gpu"); - - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("repack_input_gpu_bin + im2col_ongpu + transpose_uint32_gpu_2"); - - //start_timer(); - gemm_nn_custom_bin_mean_transposed_gpu(m, n, k, - (unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu, - new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY, - l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu"); - - - // the main GEMM function - //gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (uint8_t *)l.align_bit_weights, new_ldb, (uint8_t *)t_bit_input, new_ldb, c, n, l.mean_arr); - - //add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); - - //cudaMemcpy(l.output_gpu, l.output, l.outputs * sizeof(float), cudaMemcpyDefault); - - - // // alternative GEMM - //gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1, - // l.align_bit_weights, l.new_lda/32, - // t_bit_input, new_ldb / 32, - // c, n, l.mean_arr); - - //free(t_bit_input); - - //free(b); - } - else - { - //printf("\n\n l.index = %d, l.w = %d, l.c = %d, l.n = %d, l.stride = %d, l.pad = %d - old XNOR \n", l.index, l.w, l.c, l.n, l.stride, l.pad); - //cudaDeviceSynchronize(); - - int i = 0; - /* - // if (l.stride == 1 && l.c >= 256 && l.size > 1) - if (l.stride == 1 && l.c >= 1024 && l.size > 1 && 0)// && l.w >= 13) // disabled - { - // stride=1 only - //start_timer(); - im2col_align_bin_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace, l.bit_align); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("im2col_align_bin_ongpu"); - } - else*/ - { - //start_timer(); - im2col_align_ongpu(state.input + i*l.c*l.h*l.w, l.c, l.h, l.w, l.size, l.stride, l.pad, l.align_workspace_gpu, l.bit_align); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("im2col_align_ongpu"); - //getchar(); - - // should be optimized - //start_timer(); - float_to_bit_gpu(l.align_workspace_gpu, (unsigned char *)state.workspace, l.align_workspace_size); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("float_to_bit_gpu"); - } - //start_timer(); - transpose_bin_gpu((unsigned char *)state.workspace, (unsigned char *)l.transposed_align_workspace_gpu, k, n, l.bit_align, new_ldb, 8); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("transpose_bin_gpu"); - - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("im2col_align_ongpu + float_to_bit_gpu + transpose_bin_gpu"); - - // should be optimized - //if(0) {//if (k > 1000) { // sequentially input-shared - BAD - // gemm_nn_custom_bin_mean_transposed_sequentially_gpu(m, n, k, - // (unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu, new_ldb, l.output_gpu, n, l.mean_arr_gpu); - //} - //else { // coalescing & weights-shared-memory - GOOD - //start_timer(); - gemm_nn_custom_bin_mean_transposed_gpu(m, n, k, - (unsigned char *)l.align_bit_weights_gpu, new_ldb, (unsigned char *)l.transposed_align_workspace_gpu, - new_ldb, l.output_gpu, n, l.mean_arr_gpu, l.biases_gpu, l.activation == LEAKY, - l.bin_conv_shortcut_in_gpu, l.bin_conv_shortcut_out_gpu); - //cudaDeviceSynchronize(); - //stop_timer_and_show_name("gemm_nn_custom_bin_mean_transposed_gpu"); - //} - //cudaDeviceSynchronize(); - //check_error(status); - //getchar(); - } - - - /* - { - float_to_bit_gpu(state.input, (unsigned char *)l.align_workspace_gpu, input_size); - convolve_bin_gpu(l.align_workspace_gpu, (float *)l.align_bit_weights_gpu, l.output_gpu, l.w, l.h, l.c, l.n, l.size, l.pad, l.new_lda, l.mean_arr_gpu); - - //convolve_gpu(state.input, l.weights_gpu, l.output_gpu, l.w, l.h, l.c, l.n, l.size, l.pad); - - //cudaDeviceSynchronize(); - //check_error(status); - - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } - */ - - //add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == HARD_MISH) activate_array_hard_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0); - else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1); - else if (l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); - //if(l.activation != LINEAR && l.activation != LEAKY) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); - //if (l.binary || l.xnor) swap_binary(&l); - //cudaDeviceSynchronize(); - return; - } - } - - if (l.xnor) { - swap_binary(&l); - binarize_gpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input_gpu); - state.input = l.binary_input_gpu; - } - - //fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - -#ifdef CUDNN - //float one = 1; // alpha[0], beta[0] is float for HALF and FLOAT - float alpha = 1, beta = 0; - -//#ifdef CUDNN_HALF - //if (state.use_mixed_precision) { - int iteration_num = get_current_iteration(state.net); // (*state.net.seen) / (state.net.batch*state.net.subdivisions); - if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || (iteration_num > 3 * state.net.burn_in) && state.net.loss_scale != 1) && - (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && l.groups <= 1 && l.size > 1) - { - //printf("\n CUDNN_HALF!!! state.index = %d \n", state.index); - - // Note: For improved performance it is advised to use beta[0] = 0.0. - // For Tensor Core: cudnnSetConvolutionMathType() where cudnnMathType_t mathType = CUDNN_TENSOR_OP_MATH; - // 1. or CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM and use CUDNN_DATA_HALF - // 2. or CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED - // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t output16_size = l.batch*l.out_c*l.out_h*l.out_w; - - if (*state.net.max_input16_size < input16_size) { - //printf("\n input16_size: cur = %zu \t max = %zu \n", input16_size, *state.net.max_input16_size); - *state.net.max_input16_size = input16_size; - if (*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); - assert(*state.net.max_input16_size > 0); - *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); - } - float *input16 = *state.net.input16_gpu; - - if (*state.net.max_output16_size < output16_size) { - *state.net.max_output16_size = output16_size; - if (*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); - assert(*state.net.max_output16_size > 0); - *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); - } - float *output16 = *state.net.output16_gpu; - - assert(input16_size > 0); - cuda_convert_f32_to_f16(state.input, input16_size, input16); - - //fill_ongpu(output16_size / 2, 0, (float *)output16, 1); - CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(), - &alpha, - l.srcTensorDesc16, - input16, - l.weightDesc16, - l.weights_gpu16, - l.convDesc, - l.fw_algo16, - state.workspace, - l.workspace_size, - &beta, - l.dstTensorDesc16, - output16)); - - - if (l.batch_normalize) - { - if (state.train && !state.net.adversarial) // Training - { - simple_copy_ongpu(l.outputs*l.batch / 2, output16, l.x_gpu); - //copy_ongpu(l.outputs*l.batch / 2, output16, 1, l.x_gpu, 1); - //cudaMemcpyAsync(l.x_gpu, output16, l.outputs*l.batch*sizeof(half), cudaMemcpyDefault, get_cuda_stream()); - float one = 1.0f; - float zero = 0.0f; - // Batch-normalization can still take FP16 inputs and outputs, saving half the bandwidth - // compared to FP32, it's just that the statistics and value adjustment should be done in FP32. - CHECK_CUDNN(cudnnBatchNormalizationForwardTraining(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - l.normDstTensorDescF16, - l.x_gpu, // input - l.normDstTensorDescF16, - output16, // output - l.normTensorDesc, - l.scales_gpu, // input - l.biases_gpu, // input - .01, - l.rolling_mean_gpu, // input/output (should be FP32) - l.rolling_variance_gpu, // input/output (should be FP32) - .00001, - l.mean_gpu, // output (should be FP32) - optional cache to speedup cudnnBatchNormalizationBackward() - l.variance_gpu)); // output (should be FP32) - optional cache to speedup cudnnBatchNormalizationBackward() - - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - //forward_batchnorm_layer_gpu(l, state); - } - else // Detection - { - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - normalize_gpu(l.output_gpu, l.rolling_mean_gpu, l.rolling_variance_gpu, l.batch, l.out_c, l.out_h*l.out_w); - scale_bias_gpu(l.output_gpu, l.scales_gpu, l.batch, l.out_c, l.out_h*l.out_w); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.out_c, l.out_w*l.out_h); - } - } - else // BIAS only - { - cuda_convert_f16_to_f32(output16, output16_size, l.output_gpu); - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } - } - else { - - //#else - /* - int input_nan_inf = is_nan_or_inf(state.input, l.inputs * l.batch); - printf("\n is_nan_or_inf(state.input) = %d \n", input_nan_inf); - if (input_nan_inf) getchar(); - - int weights_nan_inf = is_nan_or_inf(l.weights_gpu, l.nweights); - printf("\n is_nan_or_inf(l.weights_gpu) = %d \n", weights_nan_inf); - if (weights_nan_inf) getchar(); - */ - - CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(), - &alpha, //&one, - l.srcTensorDesc, - state.input, - l.weightDesc, - l.weights_gpu, - l.convDesc, - l.fw_algo, - state.workspace, - l.workspace_size, - &beta, //&one, - l.dstTensorDesc, - l.output_gpu)); - - //cudaDeviceSynchronize(); - if (l.batch_normalize) { - forward_batchnorm_layer_gpu(l, state); - } - else { - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } - //#endif // CUDNN_HALF - } - - -#else - fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - - int i, j; - int m = l.n / l.groups; - int k = l.size*l.size*l.c / l.groups; - int n = l.out_w*l.out_h; - for(i = 0; i < l.batch; ++i){ - for (j = 0; j < l.groups; ++j) { - //float *im = state.input + i*l.c*l.h*l.w; - float *im = state.input + (i*l.groups + j)*l.c / l.groups*l.h*l.w; - float *a = l.weights_gpu + j*l.nweights / l.groups; - float *b = state.workspace; - float *c = l.output_gpu + (i*l.groups + j)*n*m; - if (l.size == 1 && l.stride == 1 && l.dilation == 1) { - b = im; - } - else { - //im2col_ongpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, state.workspace); - - im2col_gpu_ext(im, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - state.workspace); // output - - } - //gemm_ongpu(0, 0, m, n, k, 1., a, k, b, n, 1., c + i*m*n, n); - gemm_ongpu(0, 0, m, n, k, 1, a, k, b, n, 1, c, n); - } - } - - if (l.batch_normalize) { - forward_batchnorm_layer_gpu(l, state); - } - else { - add_bias_gpu(l.output_gpu, l.biases_gpu, l.batch, l.n, l.out_w*l.out_h); - } -#endif - -//#ifndef CUDNN_HALF -//#endif // no CUDNN_HALF - - if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == HARD_MISH) activate_array_hard_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == NORM_CHAN) activate_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 0); - else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output_gpu, 1); - else if (l.activation != LINEAR) activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); - //if(l.dot > 0) dot_error_gpu(l); - if(l.binary || l.xnor) swap_binary(&l); - //cudaDeviceSynchronize(); // for correct profiling of performance - - if (state.net.try_fix_nan) { - fix_nan_and_inf(l.output_gpu, l.outputs*l.batch); - } - - if(l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); - - if (l.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - s.input = l.output_gpu; - forward_convolutional_layer_gpu(*(l.input_layer), s); - simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.input_antialiasing_gpu); - simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.input_layer->output_gpu, l.output_gpu); - } - - if (l.coordconv) { - coord_conv_gpu(l.output_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 0); - } -} - -void backward_convolutional_layer_gpu(convolutional_layer l, network_state state) -{ - if (l.coordconv) { - coord_conv_gpu(l.delta_gpu, l.outputs*l.batch, l.out_w, l.out_h, l.out_c, l.batch, 1); - } - - if (l.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - s.delta = l.delta_gpu; // s.delta will be returned to l.delta_gpu - s.input = l.input_antialiasing_gpu; - //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - simple_copy_ongpu(l.input_layer->outputs*l.input_layer->batch, l.delta_gpu, l.input_layer->delta_gpu); - backward_convolutional_layer_gpu(*(l.input_layer), s); - - simple_copy_ongpu(l.outputs*l.batch, l.input_antialiasing_gpu, l.output_gpu); - } - - if(state.net.try_fix_nan) constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); - - if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else if (l.activation == HARD_MISH) gradient_array_hard_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); - else if (l.activation == NORM_CHAN) gradient_array_normalize_channels_ongpu(l.output_gpu, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta_gpu); - else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - - if (!l.batch_normalize) - backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); - -//#ifndef CUDNN_HALF - //if(l.batch_normalize){ - // backward_batchnorm_layer_gpu(l, state); - //} else { - // //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); - //} -//#endif // no CUDNN_HALF - float *original_input = state.input; - - if(l.xnor) state.input = l.binary_input_gpu; -#ifdef CUDNN - float one = 1.f; - float alpha = 1, beta = 0; - -//#ifdef CUDNN_HALF - int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions); - if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || (iteration_num > 3 * state.net.burn_in) && state.net.loss_scale != 1) && - (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && l.groups <= 1 && l.size > 1) - { - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h; - - if (*state.net.max_input16_size < input16_size) { - *state.net.max_input16_size = input16_size; - if (*state.net.input16_gpu) cuda_free(*state.net.input16_gpu); - assert(*state.net.max_input16_size > 0); - *state.net.input16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_input16_size); - } - float *input16 = *state.net.input16_gpu; - - if (*state.net.max_output16_size < delta16_size) { - *state.net.max_output16_size = delta16_size; - if (*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); - assert(*state.net.max_output16_size > 0); - *state.net.output16_gpu = (float *)cuda_make_f16_from_f32_array(NULL, *state.net.max_output16_size); - } - float *delta16 = *state.net.output16_gpu; - - assert(input16_size > 0); - assert(delta16_size > 0); - cuda_convert_f32_to_f16(state.input, input16_size, input16); - cuda_convert_f32_to_f16(l.delta_gpu, delta16_size, delta16); - - if (l.batch_normalize) { - //if (!state.train) { - // l.mean_gpu = l.rolling_mean_gpu; - // l.variance_gpu = l.rolling_variance_gpu; - //} - float one = 1.0f; - float zero = 0.0f; - CHECK_CUDNN(cudnnBatchNormalizationBackward(cudnn_handle(), - CUDNN_BATCHNORM_SPATIAL, - &one, - &zero, - &one, - &one, - l.normDstTensorDescF16, - l.x_gpu, // input (input in BN-forward-inference) - l.normDstTensorDescF16, - delta16, // input - l.normDstTensorDescF16, - l.output_gpu, //l.x_norm_gpu, // output (new delta) - l.normTensorDesc, - l.scales_gpu, // input (should be FP32) - l.scale_updates_gpu, // output (should be FP32) - l.bias_updates_gpu, // output (should be FP32) - .00001, - l.mean_gpu, // input (should be FP32) - l.variance_gpu)); // input (should be FP32) - - simple_copy_ongpu(l.outputs*l.batch / 2, l.output_gpu, delta16); - //copy_ongpu(l.outputs*l.batch / 2, l.x_norm_gpu, 1, delta16, 1); - //cudaMemcpyAsync(delta16, l.x_norm_gpu, l.outputs*l.batch * sizeof(half), cudaMemcpyDefault, get_cuda_stream()); - } - else - { - //backward_bias_gpu(l.bias_updates_gpu, l.delta_gpu, l.batch, l.n, l.out_w*l.out_h); - } - - // convert input: state.input (x), l.delta_gpu (y) from fp32 to fp16 - // get output: l.weight_updates_gpu (dw) and convert it to fp32 (ONLY if it is fp16) - - // calculate conv weight updates - // Already: l.weight_updates_gpu = (l.weight_updates_gpu - l.weight*decay*batch*subdivision)*momentum - // so we should copy f32 to f16, or compute: f16=(w_up - w*d*b*s)*m - assert((l.nweights) > 0); - cuda_convert_f32_to_f16(l.weight_updates_gpu, l.nweights, l.weight_updates_gpu16); - - if (!state.net.adversarial && !l.train_only_bn) { - CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), - &one, - l.srcTensorDesc16, - input16, //state.input, - l.ddstTensorDesc16, - delta16, //l.delta_gpu, - l.convDesc, - l.bf_algo16, - state.workspace, - l.workspace_size, - &one, - l.dweightDesc16, - l.weight_updates_gpu16)); // l.weight_updates_gpu); - - cuda_convert_f16_to_f32(l.weight_updates_gpu16, l.nweights, l.weight_updates_gpu); - } - - if (state.delta) { - if (l.binary || l.xnor) swap_binary(&l); - - // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData - // calculate delta for the next layer - // convert input: l.weights_gpu (w), l.delta_gpu (dy) from fp32 to fp16 - // get output: state.delta (dx) and convert it to fp32 (ONLY if it is fp16) - CHECK_CUDNN(cudnnConvolutionBackwardData(cudnn_handle(), - &alpha, - l.weightDesc16, - l.weights_gpu16, //l.weights_gpu, - l.ddstTensorDesc16, - delta16, //l.delta_gpu, - l.convDesc, - l.bd_algo16, - state.workspace, - l.workspace_size, - &beta, - l.dsrcTensorDesc16, - input16)); // state.delta); - - cuda_convert_f16_to_f32(input16, input16_size, state.delta); - - if (l.binary || l.xnor) swap_binary(&l); - if (l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta); - } - } - else { - //#else // CUDNN_HALF - - if(l.batch_normalize){ - backward_batchnorm_layer_gpu(l, state); - } - - if (!state.net.adversarial && !l.train_only_bn) { - - float *old_input = state.input; - /* - if (l.reverse) { - if (*state.net.max_output16_size < l.inputs*l.batch) { - *state.net.max_output16_size = l.inputs*l.batch; - if (*state.net.output16_gpu) cuda_free(*state.net.output16_gpu); - assert(*state.net.max_output16_size > 0); - *state.net.output16_gpu = cuda_make_array(NULL, *state.net.max_output16_size); - } - mult_inverse_array_gpu(state.input, *state.net.output16_gpu, l.inputs*l.batch, l.reverse); - state.input = *state.net.output16_gpu; - } - */ - - - // calculate conv weight updates - // if used: beta=1 then loss decreases faster - CHECK_CUDNN(cudnnConvolutionBackwardFilter(cudnn_handle(), - &one, - l.srcTensorDesc, - state.input, - l.ddstTensorDesc, - l.delta_gpu, - l.convDesc, - l.bf_algo, - state.workspace, - l.workspace_size, - &one, - l.dweightDesc, - l.weight_updates_gpu)); - - state.input = old_input; - } - - - if (state.delta) { - if (l.binary || l.xnor) swap_binary(&l); - - float *old_weights = l.weights_gpu; - - if (l.reverse) { - if (*state.net.max_output16_size < l.nweights) { - *state.net.max_output16_size = l.nweights; - if (*state.net.output16_gpu && *state.net.max_output16_size > 0) cuda_free(*state.net.output16_gpu); - assert(*state.net.max_output16_size > 0); - *state.net.output16_gpu = cuda_make_array(NULL, l.nweights); - } - mult_inverse_array_gpu(l.weights_gpu, *state.net.output16_gpu, l.nweights, l.reverse); - l.weights_gpu = *state.net.output16_gpu; - } - - - - // http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnConvolutionBackwardData - // calculate delta for the next layer - CHECK_CUDNN(cudnnConvolutionBackwardData(cudnn_handle(), - &one, - l.weightDesc, - l.weights_gpu, - l.ddstTensorDesc, - l.delta_gpu, - l.convDesc, - l.bd_algo, - state.workspace, - l.workspace_size, - &one, - l.dsrcTensorDesc, - state.delta)); - - l.weights_gpu = old_weights; - - if (l.binary || l.xnor) swap_binary(&l); - if (l.xnor) gradient_array_ongpu(original_input, l.batch*l.c*l.h*l.w, HARDTAN, state.delta); - } - } - -//#endif // CUDNN_HALF - -#else // CUDNN - if (l.batch_normalize) { - backward_batchnorm_layer_gpu(l, state); - } - - int m = l.n / l.groups; - int n = l.size*l.size*l.c / l.groups; - int k = l.out_w*l.out_h; - - int i, j; - for(i = 0; i < l.batch; ++i){ - for (j = 0; j < l.groups; ++j) { - float * a = l.delta_gpu + (i*l.groups + j)*m*k; - float * b = state.workspace; - float * c = l.weight_updates_gpu + j*l.nweights / l.groups; - - float *im = state.input + (i*l.groups + j)*l.c / l.groups*l.h*l.w; - - if (!state.net.adversarial && !l.train_only_bn) { - //im2col_ongpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, state.workspace); - im2col_gpu_ext(im, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - state.workspace); // output - //gemm_ongpu(0, 1, m, n, k, 1, a + i*m*k, k, b, k, 1, c, n); - gemm_ongpu(0, 1, m, n, k, 1, a, k, b, k, 1, c, n); - } - - if (state.delta) { - if (l.binary || l.xnor) swap_binary(&l); - float * a = l.weights_gpu + j*l.nweights / l.groups; - float * b = l.delta_gpu + (i*l.groups + j)*m*k; - float * c = state.workspace; - - //gemm_ongpu(1, 0, n, k, m, 1, a, n, b + i*k*m, k, 0, c, k); - gemm_ongpu(1, 0, n, k, m, 1, a, n, b, k, 0, c, k); - - - float *delta = state.delta + (i*l.groups + j)*l.c / l.groups*l.h*l.w; - - //col2im_ongpu(state.workspace, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, delta); - col2im_gpu_ext( - state.workspace, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding size (h, w) - l.stride_y, l.stride_x, // stride size (h, w) - l.dilation, l.dilation, // dilation size (h, w) - delta); // output (delta) - - if (l.binary || l.xnor) { - swap_binary(&l); - } - if (l.xnor) gradient_array_ongpu(original_input + i*l.c*l.h*l.w, l.c*l.h*l.w, HARDTAN, state.delta + i*l.c*l.h*l.w); - } - } - } -#endif - if (state.net.try_fix_nan) { - if (state.delta) { - reset_nan_and_inf(state.delta, l.inputs * l.batch); - } - int size = l.nweights; - reset_nan_and_inf(l.weight_updates_gpu, size); - fix_nan_and_inf(l.weights_gpu, size); - } -} - -__global__ void calc_avg_activation_kernel(float *src, float *dst, int size, int channels, int batches) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - int xy = i % size; - int b = i / size; - - if (i < size*batches) { - dst[i] = 0; - for (int c = 0; c < channels; ++c) { - dst[i] += src[xy + size*(c + channels*b)]; - } - dst[i] = dst[i] / channels; - } -} - -void calc_avg_activation_gpu(float *src, float *dst, int size, int channels, int batches) -{ - const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - - calc_avg_activation_kernel << > > (src, dst, size, channels, batches); -} - - -__global__ void assisted_activation_kernel(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - int xy = i % size; - int b = i / size; - - if (b < batches) { - for (int c = 0; c < channels; ++c) { - output[xy + size*(c + channels*b)] += alpha * gt_gpu[i] * a_avg_gpu[i]; - //output[xy + size*(c + channels*b)] += gt_gpu[i] * a_avg_gpu[i]; - //output[xy + size*(c + channels*b)] += gt_gpu[i] * output[xy + size*(c + channels*b)]; - //output[xy + size*(c + channels*b)] = a_avg_gpu[i]; - } - } -} - -void assisted_activation_gpu(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) -{ - const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - - assisted_activation_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); -} - - -__global__ void assisted_activation2_kernel(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) -{ - int i = blockIdx.x * blockDim.x + threadIdx.x; - int xy = i % size; - int b = i / size; - float beta = 1 - alpha; - - if (b < batches) { - for (int c = 0; c < channels; ++c) { - if(gt_gpu[i] == 0) - output[xy + size*(c + channels*b)] *= beta; - - } - } -} - -void assisted_activation2_gpu(float alpha, float *output, float *gt_gpu, float *a_avg_gpu, int size, int channels, int batches) -{ - const int num_blocks = get_number_of_blocks(size*batches, BLOCK); - - assisted_activation2_kernel << > > (alpha, output, gt_gpu, a_avg_gpu, size, channels, batches); -} - -void assisted_excitation_forward_gpu(convolutional_layer l, network_state state) -{ - const int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions); - - // epoch - //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; - - // calculate alpha - //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); - //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)) / 2; - //float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); - - if (l.assisted_excitation == 1) { - if (iteration_num > state.net.max_batches / 2) return; - } - else { - if (iteration_num < state.net.burn_in) return; - else - if (iteration_num > l.assisted_excitation) return; - else - alpha = (1 + cos(3.141592 * iteration_num / (state.net.burn_in + l.assisted_excitation))) / 2; // from 1 to 0 - } - - //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", - // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); - - //const int size = l.outputs * l.batch; - - float *a_avg = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); - float *gt = (float *)calloc(l.out_w * l.out_h * l.batch, sizeof(float)); - - int b; - int w, h; - - l.max_boxes = state.net.num_boxes; - l.truths = l.max_boxes*(4 + 1); - - int num_truth = l.batch*l.truths; - float *truth_cpu = (float *)calloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - //cudaStreamSynchronize(get_cuda_stream()); - //CHECK_CUDA(cudaPeekAtLastError()); - - for (b = 0; b < l.batch; ++b) - { - // calculate G - int t; - for (t = 0; t < state.net.num_boxes; ++t) { - box truth = float_to_box_stride(truth_cpu + t*(4 + 1) + b*l.truths, 1); - if (!truth.x) break; // continue; - float beta = 0; - //float beta = 1 - alpha; // from 0 to 1 - float dw = (1 - truth.w) * beta; - float dh = (1 - truth.h) * beta; - //printf(" alpha = %f, beta = %f, truth.w = %f, dw = %f, tw+dw = %f, l.out_w = %d \n", alpha, beta, truth.w, dw, truth.w+dw, l.out_w); - - int left = floor((truth.x - (dw + truth.w) / 2) * l.out_w); - int right = ceil((truth.x + (dw + truth.w) / 2) * l.out_w); - int top = floor((truth.y - (dh + truth.h) / 2) * l.out_h); - int bottom = ceil((truth.y + (dh + truth.h) / 2) * l.out_h); - if (left < 0) left = 0; - if (top < 0) top = 0; - if (right > l.out_w) right = l.out_w; - if (bottom > l.out_h) bottom = l.out_h; - - for (w = left; w <= right; w++) { - for (h = top; h < bottom; h++) { - gt[w + l.out_w * h + l.out_w*l.out_h*b] = 1; - } - } - } - } - - cuda_push_array(l.gt_gpu, gt, l.out_w * l.out_h * l.batch); - //cudaStreamSynchronize(get_cuda_stream()); - //CHECK_CUDA(cudaPeekAtLastError()); - - // calc avg_output on GPU - for whole batch - calc_avg_activation_gpu(l.output_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); - //cudaStreamSynchronize(get_cuda_stream()); - //CHECK_CUDA(cudaPeekAtLastError()); - - // calc new output - //assisted_activation2_gpu(1, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); // AE3: gt increases (beta = 1 - alpha = 0) - //assisted_activation2_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); - assisted_activation_gpu(alpha, l.output_gpu, l.gt_gpu, l.a_avg_gpu, l.out_w * l.out_h, l.out_c, l.batch); - //cudaStreamSynchronize(get_cuda_stream()); - //CHECK_CUDA(cudaPeekAtLastError()); - - - - /* - for (b = 0; b < l.batch; ++b) - { - // calculate average A - for (w = 0; w < l.out_w; w++) { - for (h = 0; h < l.out_h; h++) { - for (c = 0; c < l.out_c; c++) { - a_avg[w + l.out_w*(h + l.out_h*b)] += l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))]; - } - a_avg[w + l.out_w*(h + l.out_h*b)] /= l.out_c; // a_avg / d - } - } - } - - // change activation - for (b = 0; b < l.batch; ++b) - { - for (w = 0; w < l.out_w; w++) { - for (h = 0; h < l.out_h; h++) { - for (c = 0; c < l.out_c; c++) - { - // a = a + alpha(t) + e(c,i,j) = a + alpha(t) + g(i,j) * avg_a(i,j) / channels - l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] += - alpha * - g[w + l.out_w*(h + l.out_h*b)] * - a_avg[w + l.out_w*(h + l.out_h*b)]; - - //l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] = - // alpha * g[w + l.out_w*(h + l.out_h*b)] * a_avg[w + l.out_w*(h + l.out_h*b)]; - } - } - } - } - */ - - if (0) // visualize ground truth - { -#ifdef OPENCV - cuda_pull_array(l.output_gpu, l.output, l.outputs * l.batch); - cudaStreamSynchronize(get_cuda_stream()); - CHECK_CUDA(cudaPeekAtLastError()); - - for (b = 0; b < l.batch; ++b) - { - printf(" Assisted Excitation alpha = %f \n", alpha); - image img = float_to_image(l.out_w, l.out_h, 1, >[l.out_w*l.out_h*b]); - char buff[100]; - sprintf(buff, "a_excitation_gt_%d", b); - show_image_cv(img, buff); - - //image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); - image img2 = float_to_image_scaled(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); - char buff2[100]; - sprintf(buff2, "a_excitation_output_%d", b); - show_image_cv(img2, buff2); - - /* - int c = l.out_c; - if (c > 4) c = 4; - image img3 = float_to_image(l.out_w, l.out_h, c, &l.output[l.out_w*l.out_h*l.out_c*b]); - image dc = collapse_image_layers(img3, 1); - char buff3[100]; - sprintf(buff3, "a_excitation_act_collapsed_%d", b); - show_image_cv(dc, buff3); - */ - - wait_key_cv(5); - } - wait_until_press_key_cv(); -#endif // OPENCV - } - - free(truth_cpu); - free(gt); - free(a_avg); -} - -void pull_convolutional_layer(convolutional_layer l) -{ - cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); - cuda_pull_array_async(l.biases_gpu, l.biases, l.n); - cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); - cuda_pull_array_async(l.bias_updates_gpu, l.bias_updates, l.n); - if (l.batch_normalize){ - cuda_pull_array_async(l.scales_gpu, l.scales, l.n); - cuda_pull_array_async(l.rolling_mean_gpu, l.rolling_mean, l.n); - cuda_pull_array_async(l.rolling_variance_gpu, l.rolling_variance, l.n); - } - if (l.adam){ - cuda_pull_array_async(l.m_gpu, l.m, l.nweights); - cuda_pull_array_async(l.v_gpu, l.v, l.nweights); - } - CHECK_CUDA(cudaPeekAtLastError()); - cudaStreamSynchronize(get_cuda_stream()); -} - -void push_convolutional_layer(convolutional_layer l) -{ - cuda_push_array(l.weights_gpu, l.weights, l.nweights); -#ifdef CUDNN_HALF - assert(l.nweights > 0); - cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16); -#endif - cuda_push_array(l.biases_gpu, l.biases, l.n); - if (l.train) { - cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); - cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); - } - if (l.batch_normalize){ - cuda_push_array(l.scales_gpu, l.scales, l.n); - cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.n); - cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.n); - } - if (l.adam){ - cuda_push_array(l.m_gpu, l.m, l.nweights); - cuda_push_array(l.v_gpu, l.v, l.nweights); - } - CHECK_CUDA(cudaPeekAtLastError()); -} - -void update_convolutional_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) -{ - - /* - for (int angle = 0; angle < 360; angle++) { - printf(" angle = %d \n", angle); - smooth_rotate_weights_kernel(l.weights_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, angle, 0); - - cuda_pull_array(l.weight_deform_gpu, l.weights, l.nweights); - visualize_convolutional_layer(l, "weights", NULL); - wait_key_cv(10); - } - */ - - if (l.deform) { - - //for (l.angle = 0; l.angle < 360; l.angle += 1) - //{ - //stretch_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, l.angle/180, 1); - //else simple_copy_ongpu(l.nweights, l.weight_updates_gpu, l.weight_deform_gpu); - - if (l.rotate) rotate_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, 1); - else if (l.sway) sway_and_flip_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, l.angle, 1); - else if (l.stretch) stretch_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, 0, 1); - else if (l.stretch_sway) stretch_sway_flip_weights_gpu(l.weight_updates_gpu, l.weight_deform_gpu, l.nweights, l.n, l.size, l.angle, 1); - - //simple_copy_ongpu(l.nweights, l.weight_updates_gpu, l.weight_deform_gpu); - - reduce_and_expand_array_gpu(l.weight_deform_gpu, l.weight_updates_gpu, l.nweights, 4); - - //printf(" angle = %f \n", l.angle); - //cuda_pull_array(l.weight_deform_gpu, l.weights, l.nweights); - //visualize_convolutional_layer(l, "weights", NULL); - //wait_key_cv(10); - //} - - } - - // Loss scale for Mixed-Precision on Tensor-Cores - float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale; - //float momentum = a.momentum; - //float decay = a.decay; - //int batch = a.batch; - - - reset_nan_and_inf(l.weight_updates_gpu, l.nweights); - fix_nan_and_inf(l.weights_gpu, l.nweights); - - // Gradient Centralization - if (l.grad_centr && l.batch_normalize) { - // weights[filters][channels][height][width] - // for(filters) w[f] = w[f] - mean(w[c][h][w]) - gradient_centralization_gpu(l.size, l.size, l.c / l.groups, l.n, l.weight_updates_gpu); - } - - - if (l.adam) { - //adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, a.B1, a.B2, a.eps, decay, learning_rate, l.nweights, batch, a.t); - adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.nweights, batch, l.t); - - adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t); - if (l.scales_gpu) { - adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t); - } - } - else { - //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); - //axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - //scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); - axpy_ongpu(l.nweights, -decay*batch*loss_scale, l.weights_gpu, 1, l.weight_updates_gpu, 1); - axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); - - axpy_ongpu(l.n, learning_rate / batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); - scal_ongpu(l.n, momentum, l.bias_updates_gpu, 1); - - if (l.scales_gpu) { - axpy_ongpu(l.n, learning_rate / batch, l.scale_updates_gpu, 1, l.scales_gpu, 1); - scal_ongpu(l.n, momentum, l.scale_updates_gpu, 1); - } - } - - if (l.deform) { - //for (l.angle = 0; l.angle < 360; l.angle += 4) - //{ - expand_array_gpu(l.weights_gpu, l.weight_deform_gpu, l.nweights, 4); - - //simple_copy_ongpu(l.nweights, l.weight_deform_gpu, l.weights_gpu); - - if (l.rotate) rotate_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, 0); - else if (l.sway) sway_and_flip_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, l.angle, 0); - else if (l.stretch) stretch_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, 0, 0); - else if (l.stretch_sway) stretch_sway_flip_weights_gpu(l.weight_deform_gpu, l.weights_gpu, l.nweights, l.n, l.size, l.angle, 0); - - //printf(" angle = %f, reverse = %d \n", l.angle, 0); - //cuda_pull_array(l.weights_gpu, l.weights, l.nweights); - //visualize_convolutional_layer(l, "weights", NULL); - //wait_key_cv(10); - //} - } - - if (l.clip) { - constrain_ongpu(l.nweights, l.clip, l.weights_gpu, 1); - } -} - - - -/* -void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay) -{ - int size = layer.size*layer.size*layer.c*layer.n; - axpy_ongpu(layer.n, learning_rate/batch, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); - scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); - - if(layer.scales_gpu){ - axpy_ongpu(layer.n, learning_rate/batch, layer.scale_updates_gpu, 1, layer.scales_gpu, 1); - scal_ongpu(layer.n, momentum, layer.scale_updates_gpu, 1); - } - - if(layer.adam){ - scal_ongpu(size, layer.B1, layer.m_gpu, 1); - scal_ongpu(size, layer.B2, layer.v_gpu, 1); - - axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); - - axpy_ongpu(size, -(1-layer.B1), layer.weight_updates_gpu, 1, layer.m_gpu, 1); - mul_ongpu(size, layer.weight_updates_gpu, 1, layer.weight_updates_gpu, 1); - axpy_ongpu(size, (1-layer.B2), layer.weight_updates_gpu, 1, layer.v_gpu, 1); - - adam_gpu(size, layer.weights_gpu, layer.m_gpu, layer.v_gpu, layer.B1, layer.B2, learning_rate/batch, layer.eps, layer.t+1); - fill_ongpu(size, 0, layer.weight_updates_gpu, 1); - }else{ - axpy_ongpu(size, -decay*batch, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); // wu = wu - w*decay*batch - axpy_ongpu(size, learning_rate/batch, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); // w = w + wu*lr/batch - scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); // wu = wu*momentum // wu = (wu - w*decay*batch)*momentum - // w = w + (wu - w*decay*batch)*lr/batch = w + wu*lr/batch - w*decay*lr = w*(1-decay*lr) + wu*lr/batch - //wu_prev = (wu_old - w_old*decay*batch)*momentum - - - //weights_update = weights_update_new + (weights_update_old - weights_old*decay*batch)*momentum - weights_new*decay*batch = - // = weights_update_new + weights_update_old*momentum - weights_old*decay*batch*momentum - weights_new*decay*batch - // = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch - - //------------- RESULT -------------- - // weights_update = weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch - //----------------------------------- - - // weights_newest = weights_new + (weights_update_new + weights_update_old*momentum - (weights_old*momentum + weights_new)*decay*batch)*lr/batch - // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*batch*lr/batch - weights_new*decay*batch*lr/batch - // = weights_new + weights_update_new*lr/batch + weights_update_old*momentum*lr/batch - weights_old*momentum*decay*lr - weights_new*decay*lr - // = weights_new*(1 - decay*lr) - weights_old*momentum*decay*lr + (weights_update_new + weights_update_old*momentum)*lr/batch - - //------------- RESULT -------------- - // weights_newest = weights_new*(1 - decay*lr) - weights_old*momentum*(decay*lr) + (weights_update_new + weights_update_old*momentum)*lr/batch = - // = weights_new - (weights_new + weights_old*momentum)*decay*lr + (weights_update_new + weights_update_old*momentum)*lr / batch - //----------------------------------- - } -} -*/ diff --git a/src/Detector/darknet/src/convolutional_layer.c b/src/Detector/darknet/src/convolutional_layer.c deleted file mode 100644 index 1d52dd1d2..000000000 --- a/src/Detector/darknet/src/convolutional_layer.c +++ /dev/null @@ -1,1690 +0,0 @@ -#include "convolutional_layer.h" -#include "utils.h" -#include "batchnorm_layer.h" -#include "im2col.h" -#include "col2im.h" -#include "blas.h" -#include "gemm.h" -#include "box.h" -#include -#include - -#ifdef AI2 -#include "xnor_layer.h" -#endif - -#ifdef __cplusplus -#define PUT_IN_REGISTER -#else -#define PUT_IN_REGISTER register -#endif - -#ifndef AI2 -#define AI2 0 -void forward_xnor_layer(layer l, network_state state); -#endif - -void swap_binary(convolutional_layer *l) -{ - float *swap = l->weights; - l->weights = l->binary_weights; - l->binary_weights = swap; - - #ifdef GPU - swap = l->weights_gpu; - l->weights_gpu = l->binary_weights_gpu; - l->binary_weights_gpu = swap; - #endif -} - -void binarize_weights(float *weights, int n, int size, float *binary) -{ - int i, f; - for(f = 0; f < n; ++f){ - float mean = 0; - for(i = 0; i < size; ++i){ - mean += fabs(weights[f*size + i]); - } - mean = mean / size; - for(i = 0; i < size; ++i){ - binary[f*size + i] = (weights[f*size + i] > 0) ? mean: -mean; - } - } -} - -void binarize_cpu(float *input, int n, float *binary) -{ - int i; - for(i = 0; i < n; ++i){ - binary[i] = (input[i] > 0) ? 1 : -1; - } -} - -void binarize_input(float *input, int n, int size, float *binary) -{ - int i, s; - for(s = 0; s < size; ++s){ - float mean = 0; - for(i = 0; i < n; ++i){ - mean += fabs(input[i*size + s]); - } - mean = mean / n; - for(i = 0; i < n; ++i){ - binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean; - } - } -} - -int convolutional_out_height(convolutional_layer l) -{ - return (l.h + 2*l.pad - l.size) / l.stride_y + 1; -} - -int convolutional_out_width(convolutional_layer l) -{ - return (l.w + 2*l.pad - l.size) / l.stride_x + 1; -} - -image get_convolutional_image(convolutional_layer l) -{ - int h,w,c; - h = convolutional_out_height(l); - w = convolutional_out_width(l); - c = l.n; - return float_to_image(w,h,c,l.output); -} - -image get_convolutional_delta(convolutional_layer l) -{ - int h,w,c; - h = convolutional_out_height(l); - w = convolutional_out_width(l); - c = l.n; - return float_to_image(w,h,c,l.delta); -} - -size_t get_workspace_size32(layer l){ -#ifdef CUDNN - if(gpu_index >= 0){ - size_t most = 0; - size_t s = 0; - CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.weightDesc, - l.convDesc, - l.dstTensorDesc, - l.fw_algo, - &s)); - if (s > most) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.ddstTensorDesc, - l.convDesc, - l.dweightDesc, - l.bf_algo, - &s)); - if (s > most && l.train) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), - l.weightDesc, - l.ddstTensorDesc, - l.convDesc, - l.dsrcTensorDesc, - l.bd_algo, - &s)); - if (s > most && l.train) most = s; - return most; - } - #endif - if (l.xnor) { - size_t re_packed_input_size = l.c * l.w * l.h * sizeof(float); - size_t workspace_size = (size_t)l.bit_align*l.size*l.size*l.c * sizeof(float); - if (workspace_size < re_packed_input_size) workspace_size = re_packed_input_size; - return workspace_size; - } - return (size_t)l.out_h*l.out_w*l.size*l.size*(l.c / l.groups)*sizeof(float); -} - -size_t get_workspace_size16(layer l) { -#if defined(CUDNN) && defined(CUDNN_HALF) - if (gpu_index >= 0) { - size_t most = 0; - size_t s = 0; - CHECK_CUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), - l.srcTensorDesc16, - l.weightDesc16, - l.convDesc, - l.dstTensorDesc16, - l.fw_algo16, - &s)); - if (s > most) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), - l.srcTensorDesc16, - l.ddstTensorDesc16, - l.convDesc, - l.dweightDesc16, - l.bf_algo16, - &s)); - if (s > most && l.train) most = s; - CHECK_CUDNN(cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), - l.weightDesc16, - l.ddstTensorDesc16, - l.convDesc, - l.dsrcTensorDesc16, - l.bd_algo16, - &s)); - if (s > most && l.train) most = s; - return most; - } -#endif - return 0; - //if (l.xnor) return (size_t)l.bit_align*l.size*l.size*l.c * sizeof(float); - //return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float); -} - -size_t get_convolutional_workspace_size(layer l) { - size_t workspace_size = get_workspace_size32(l); - size_t workspace_size16 = get_workspace_size16(l); - if (workspace_size16 > workspace_size) workspace_size = workspace_size16; - return workspace_size; -} -#ifdef GPU -#ifdef CUDNN -void create_convolutional_cudnn_tensors(layer *l) -{ - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->normTensorDesc)); - - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->normDstTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->srcTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dstTensorDesc)); - CHECK_CUDNN(cudnnCreateFilterDescriptor(&l->weightDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dsrcTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->ddstTensorDesc)); - CHECK_CUDNN(cudnnCreateFilterDescriptor(&l->dweightDesc)); - - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->normDstTensorDescF16)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->srcTensorDesc16)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dstTensorDesc16)); - CHECK_CUDNN(cudnnCreateFilterDescriptor(&l->weightDesc16)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dsrcTensorDesc16)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->ddstTensorDesc16)); - CHECK_CUDNN(cudnnCreateFilterDescriptor(&l->dweightDesc16)); - - CHECK_CUDNN(cudnnCreateConvolutionDescriptor(&l->convDesc)); -} - -void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_size_specify) -{ - -// CUDNN_HALF - // TRUE_HALF_CONFIG is only supported on architectures with true fp16 support (compute capability 5.3 and 6.0): - // Tegra X1, Jetson TX1, DRIVE CX, DRIVE PX, Quadro GP100, Tesla P100 - // PSEUDO_HALF_CONFIG is required for Tensor Cores - our case! - - cudnnDataType_t data_type = CUDNN_DATA_FLOAT; - -#if(CUDNN_MAJOR >= 7) - // Tensor Core uses CUDNN_TENSOR_OP_MATH instead of CUDNN_DEFAULT_MATH - // For *_ALGO_WINOGRAD_NONFUSED can be used CUDNN_DATA_FLOAT - // otherwise Input, Filter and Output descriptors (xDesc, yDesc, wDesc, dxDesc, dyDesc and dwDesc as applicable) have dataType = CUDNN_DATA_HALF - // Three techniques for training using Mixed-precision: https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/ - // 1. Accumulation into FP32 - // 2. Loss Scaling - required only for: activation gradients. We do not use. - // 3. FP32 Master Copy of Weights - // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - if (l->groups < 1) l->groups = 1; - if (l->stride_x < 1) l->stride_x = 1; - if (l->stride_y < 1) l->stride_y = 1; - CHECK_CUDNN(cudnnSetConvolutionGroupCount(l->convDesc, l->groups)); - CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH)); -#if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2 - //CHECK_CUDNN(cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)); // reduces the speed of regular and group convolution -#endif -#else //if(CUDNN_MAJOR >= 7) - if (l->groups > 1) { - error("CUDNN < 7 doesn't support groups, please upgrade!"); - } -#endif - - // INT8_CONFIG, INT8_EXT_CONFIG, INT8x4_CONFIG and INT8x4_EXT_CONFIG are only supported - // on architectures with DP4A support (compute capability 6.1 and later). - //cudnnDataType_t data_type = CUDNN_DATA_INT8; - - // backward delta - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w)); - CHECK_CUDNN(cudnnSetFilter4dDescriptor(l->dweightDesc, data_type, CUDNN_TENSOR_NCHW, l->n, l->c / l->groups, l->size, l->size)); - - // forward - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, data_type, l->batch, l->out_c, l->out_h, l->out_w)); - CHECK_CUDNN(cudnnSetFilter4dDescriptor(l->weightDesc, data_type, CUDNN_TENSOR_NCHW, l->n, l->c / l->groups, l->size, l->size)); - - // backward delta - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dsrcTensorDesc16, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->ddstTensorDesc16, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, l->batch, l->out_c, l->out_h, l->out_w)); - CHECK_CUDNN(cudnnSetFilter4dDescriptor(l->dweightDesc16, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, l->n, l->c / l->groups, l->size, l->size)); - - // forward - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->srcTensorDesc16, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dstTensorDesc16, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, l->batch, l->out_c, l->out_h, l->out_w)); - CHECK_CUDNN(cudnnSetFilter4dDescriptor(l->weightDesc16, CUDNN_DATA_HALF, CUDNN_TENSOR_NCHW, l->n, l->c / l->groups, l->size, l->size)); - - // batch norm - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normDstTensorDescF16, CUDNN_TENSOR_NCHW, CUDNN_DATA_HALF, l->batch, l->out_c, l->out_h, l->out_w)); - - // batch norm - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->normDstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); - - //printf("\n l->dilation = %d, l->pad = %d, l->size = %d, l->stride = %d, l->stride_x = %d, l->stride_y = %d, l->groups = %d, l->w = %d, l->h = %d, l->c = %d, l->n = %d, l->out_w = %d, l->out_h = %d, l->out_c = %d, l->batch = %d, data_type = %d \n", - // l->dilation, l->pad, l->size, l->stride, l->stride_x, l->stride_y, l->groups, l->w, l->h, l->c, l->n, l->out_w, l->out_h, l->out_c, l->batch, data_type); -#if(CUDNN_MAJOR >= 6) - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad * l->dilation, l->stride_y, l->stride_x, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT)); // cudnn >= 6.0 -#else - CHECK_CUDNN(cudnnSetConvolution2dDescriptor(l->convDesc, l->pad * l->dilation, l->pad * l->dilation, l->stride_y, l->stride_x, l->dilation, l->dilation, CUDNN_CROSS_CORRELATION)); // cudnn 5.1 -#endif - - -#if CUDNN_MAJOR >= 8 - - if (cudnn_preference == cudnn_smallest) - { - workspace_size_specify = 0; - } - - size_t free_memory, total_memory; - int requested_algo_count = 0, returned_algo_count = 0; - int found_conv_algorithm = 0; - float min_time = 1000000; // 1000 sec - - // FWD - cudnnConvolutionFwdAlgoPerf_t conv_fwd_results[100]; - CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithmMaxCount(cudnn_handle(), &requested_algo_count)); - - CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm_v7(cudnn_handle(), - l->srcTensorDesc, - l->weightDesc, - l->convDesc, - l->dstTensorDesc, - requested_algo_count, // (cudnnConvolutionFwdPreference_t)forward_algo, - &returned_algo_count, // workspace_size_specify, - conv_fwd_results)); - - CHECK_CUDA(cudaMemGetInfo(&free_memory, &total_memory)); - - found_conv_algorithm = 0; - min_time = 1000000; // 1000 sec - for (int i = 0; i < returned_algo_count; i++) - { - if (conv_fwd_results[i].status == CUDNN_STATUS_SUCCESS && - conv_fwd_results[i].algo != CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED && - conv_fwd_results[i].memory < free_memory && - (conv_fwd_results[i].memory <= workspace_size_specify || cudnn_preference == cudnn_fastest) && - conv_fwd_results[i].time < min_time) - { - found_conv_algorithm = 1; - l->fw_algo = conv_fwd_results[i].algo; - min_time = conv_fwd_results[i].time; - //printf(" - cuDNN FWD algo: %d, time = %f ms \n", l->fw_algo, min_time); - } - } - - if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found FWD algo for convolution.\n"); - getchar(); - exit(0); - } - //printf(" cuDNN FWD algo: %d, time = %f ms \n", l->fw_algo, min_time); - - // Bwd-Data - cudnnConvolutionBwdDataAlgoPerf_t conv_bwd_data_results[100]; - CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnn_handle(), &requested_algo_count)); - - CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnn_handle(), - l->weightDesc, - l->ddstTensorDesc, - l->convDesc, - l->dsrcTensorDesc, - requested_algo_count, // (cudnnConvolutionFwdPreference_t)forward_algo, - &returned_algo_count, // workspace_size_specify, - &conv_bwd_data_results[0])); - - CHECK_CUDA(cudaMemGetInfo(&free_memory, &total_memory)); - - found_conv_algorithm = 0; - min_time = 1000000; // 1000 sec - for (int i = 0; i < returned_algo_count; i++) - { - if (conv_bwd_data_results[i].status == CUDNN_STATUS_SUCCESS && - conv_bwd_data_results[i].memory < free_memory && - (conv_bwd_data_results[i].memory <= workspace_size_specify || cudnn_preference == cudnn_fastest) && - conv_bwd_data_results[i].time < min_time) - { - found_conv_algorithm = 1; - l->bd_algo = conv_bwd_data_results[i].algo; - min_time = conv_bwd_data_results[i].time; - } - } - - if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found BWD-data algo for convolution.\n"); - getchar(); - exit(0); - } - //printf(" cuDNN BWD-data algo: %d \n", l->bd_algo); - - // Bwd-Filters - cudnnConvolutionBwdFilterAlgoPerf_t conv_bwd_filter_results[100]; - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnn_handle(), &requested_algo_count)); - - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnn_handle(), - l->srcTensorDesc, - l->ddstTensorDesc, - l->convDesc, - l->dweightDesc, - requested_algo_count, // (cudnnConvolutionFwdPreference_t)forward_algo, - &returned_algo_count, // workspace_size_specify, - &conv_bwd_filter_results[0])); - - CHECK_CUDA(cudaMemGetInfo(&free_memory, &total_memory)); - - found_conv_algorithm = 0; - min_time = 1000000; // 1000 sec - for (int i = 0; i < returned_algo_count; i++) - { - if (conv_bwd_filter_results[i].status == CUDNN_STATUS_SUCCESS && - conv_bwd_filter_results[i].memory < free_memory && - (conv_bwd_filter_results[i].memory <= workspace_size_specify || cudnn_preference == cudnn_fastest) && - conv_bwd_filter_results[i].time < min_time) - { - found_conv_algorithm = 1; - l->bf_algo = conv_bwd_filter_results[i].algo; - min_time = conv_bwd_filter_results[i].time; - } - } - - if (!found_conv_algorithm) { - printf(" Error: cuDNN isn't found BWD-filter algo for convolution.\n"); - getchar(); - exit(0); - } - //printf(" cuDNN BWD-filter algo: %d \n", l->bf_algo); - -#else // CUDNN_MAJOR >= 8 - - int forward_algo = CUDNN_CONVOLUTION_FWD_PREFER_FASTEST; - int backward_algo = CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST; - int backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST; - if (cudnn_preference == cudnn_smallest) - { - forward_algo = CUDNN_CONVOLUTION_FWD_NO_WORKSPACE; - backward_algo = CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE; - backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE; - printf(" CUDNN-slow "); - } - if (cudnn_preference == cudnn_specify) - { - forward_algo = CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT; - backward_algo = CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT; - backward_filter = CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT; - //printf(" CUDNN-specified %zu ", workspace_size_specify); - } - - CHECK_CUDNN(cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), - l->srcTensorDesc, - l->weightDesc, - l->convDesc, - l->dstTensorDesc, - (cudnnConvolutionFwdPreference_t)forward_algo, - workspace_size_specify, - &l->fw_algo)); - - CHECK_CUDNN(cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(), - l->weightDesc, - l->ddstTensorDesc, - l->convDesc, - l->dsrcTensorDesc, - (cudnnConvolutionBwdDataPreference_t)backward_algo, - workspace_size_specify, - &l->bd_algo)); - - CHECK_CUDNN(cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(), - l->srcTensorDesc, - l->ddstTensorDesc, - l->convDesc, - l->dweightDesc, - (cudnnConvolutionBwdFilterPreference_t)backward_filter, - workspace_size_specify, - &l->bf_algo)); -#endif // CUDNN_MAJOR >= 8 - - - //if (data_type == CUDNN_DATA_HALF) - { - // HALF-16 if(data_type == CUDNN_DATA_HALF) - l->fw_algo16 = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; - l->bd_algo16 = CUDNN_CONVOLUTION_BWD_DATA_ALGO_1; - l->bf_algo16 = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1; - - // FLOAT-32 if(data_type == CUDNN_DATA_FLOAT) - //l->fw_algo16 = CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED; - //l->bd_algo16 = CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED; - //l->bf_algo16 = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED; - } -} -#endif -#endif - - -void free_convolutional_batchnorm(convolutional_layer *l) -{ - if (!l->share_layer) { - if (l->scales) free(l->scales), l->scales = NULL; - if (l->scale_updates) free(l->scale_updates), l->scale_updates = NULL; - if (l->mean) free(l->mean), l->mean = NULL; - if (l->variance) free(l->variance), l->variance = NULL; - if (l->mean_delta) free(l->mean_delta), l->mean_delta = NULL; - if (l->variance_delta) free(l->variance_delta), l->variance_delta = NULL; - if (l->rolling_mean) free(l->rolling_mean), l->rolling_mean = NULL; - if (l->rolling_variance) free(l->rolling_variance), l->rolling_variance = NULL; - if (l->x) free(l->x), l->x = NULL; - if (l->x_norm) free(l->x_norm), l->x_norm = NULL; - -#ifdef GPU - if (l->scales_gpu) cuda_free(l->scales_gpu), l->scales_gpu = NULL; - if (l->scale_updates_gpu) cuda_free(l->scale_updates_gpu), l->scale_updates_gpu = NULL; - if (l->mean_gpu) cuda_free(l->mean_gpu), l->mean_gpu = NULL; - if (l->variance_gpu) cuda_free(l->variance_gpu), l->variance_gpu = NULL; - if (l->mean_delta_gpu) cuda_free(l->mean_delta_gpu), l->mean_delta_gpu = NULL; - if (l->variance_delta_gpu) cuda_free(l->variance_delta_gpu), l->variance_delta_gpu = NULL; - if (l->rolling_mean_gpu) cuda_free(l->rolling_mean_gpu), l->rolling_mean_gpu = NULL; - if (l->rolling_variance_gpu) cuda_free(l->rolling_variance_gpu), l->rolling_variance_gpu = NULL; - if (l->x_gpu) cuda_free(l->x_gpu), l->x_gpu = NULL; - if (l->x_norm_gpu) cuda_free(l->x_norm_gpu), l->x_norm_gpu = NULL; -#endif - } -} - -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation, int deform, int train) -{ - int total_batch = batch*steps; - int i; - convolutional_layer l = { (LAYER_TYPE)0 }; - l.type = CONVOLUTIONAL; - l.train = train; - - if (xnor) groups = 1; // disable groups for XNOR-net - if (groups < 1) groups = 1; - - const int blur_stride_x = stride_x; - const int blur_stride_y = stride_y; - l.antialiasing = antialiasing; - if (antialiasing) { - stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer - } - - l.deform = deform; - l.assisted_excitation = assisted_excitation; - l.share_layer = share_layer; - l.index = index; - l.h = h; - l.w = w; - l.c = c; - l.groups = groups; - l.n = n; - l.binary = binary; - l.xnor = xnor; - l.use_bin_output = use_bin_output; - l.batch = batch; - l.steps = steps; - l.stride = stride_x; - l.stride_x = stride_x; - l.stride_y = stride_y; - l.dilation = dilation; - l.size = size; - l.pad = padding; - l.batch_normalize = batch_normalize; - l.learning_rate_scale = 1; - l.nweights = (c / groups) * n * size * size; - - if (l.share_layer) { - if (l.size != l.share_layer->size || l.nweights != l.share_layer->nweights || l.c != l.share_layer->c || l.n != l.share_layer->n) { - printf(" Layer size, nweights, channels or filters don't match for the share_layer"); - getchar(); - } - - l.weights = l.share_layer->weights; - l.weight_updates = l.share_layer->weight_updates; - - l.biases = l.share_layer->biases; - l.bias_updates = l.share_layer->bias_updates; - } - else { - l.weights = (float*)xcalloc(l.nweights, sizeof(float)); - l.biases = (float*)xcalloc(n, sizeof(float)); - - if (train) { - l.weight_updates = (float*)xcalloc(l.nweights, sizeof(float)); - l.bias_updates = (float*)xcalloc(n, sizeof(float)); - - l.weights_ema = (float*)xcalloc(l.nweights, sizeof(float)); - l.biases_ema = (float*)xcalloc(n, sizeof(float)); - } - } - - // float scale = 1./sqrt(size*size*c); - float scale = sqrt(2./(size*size*c/groups)); - if (l.activation == NORM_CHAN || l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) { - for (i = 0; i < l.nweights; ++i) l.weights[i] = 1; // rand_normal(); - } - else { - for (i = 0; i < l.nweights; ++i) l.weights[i] = scale*rand_uniform(-1, 1); // rand_normal(); - } - int out_h = convolutional_out_height(l); - int out_w = convolutional_out_width(l); - l.out_h = out_h; - l.out_w = out_w; - l.out_c = n; - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = l.w * l.h * l.c; - l.activation = activation; - - l.output = (float*)xcalloc(total_batch*l.outputs, sizeof(float)); -#ifndef GPU - if (train) l.delta = (float*)xcalloc(total_batch*l.outputs, sizeof(float)); -#endif // not GPU - - l.forward = forward_convolutional_layer; - l.backward = backward_convolutional_layer; - l.update = update_convolutional_layer; - if(binary){ - l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float)); - l.cweights = (char*)xcalloc(l.nweights, sizeof(char)); - l.scales = (float*)xcalloc(n, sizeof(float)); - } - if(xnor){ - l.binary_weights = (float*)xcalloc(l.nweights, sizeof(float)); - l.binary_input = (float*)xcalloc(l.inputs * l.batch, sizeof(float)); - - int align = 32;// 8; - int src_align = l.out_h*l.out_w; - l.bit_align = src_align + (align - src_align % align); - - l.mean_arr = (float*)xcalloc(l.n, sizeof(float)); - - const size_t new_c = l.c / 32; - size_t in_re_packed_input_size = new_c * l.w * l.h + 1; - l.bin_re_packed_input = (uint32_t*)xcalloc(in_re_packed_input_size, sizeof(uint32_t)); - - l.lda_align = 256; // AVX2 - int k = l.size*l.size*l.c; - size_t k_aligned = k + (l.lda_align - k%l.lda_align); - size_t t_bit_input_size = k_aligned * l.bit_align / 8; - l.t_bit_input = (char*)xcalloc(t_bit_input_size, sizeof(char)); - } - - if(batch_normalize){ - if (l.share_layer) { - l.scales = l.share_layer->scales; - l.scale_updates = l.share_layer->scale_updates; - l.mean = l.share_layer->mean; - l.variance = l.share_layer->variance; - l.mean_delta = l.share_layer->mean_delta; - l.variance_delta = l.share_layer->variance_delta; - l.rolling_mean = l.share_layer->rolling_mean; - l.rolling_variance = l.share_layer->rolling_variance; - } - else { - l.scales = (float*)xcalloc(n, sizeof(float)); - for (i = 0; i < n; ++i) { - l.scales[i] = 1; - } - if (train) { - l.scales_ema = (float*)xcalloc(n, sizeof(float)); - l.scale_updates = (float*)xcalloc(n, sizeof(float)); - - l.mean = (float*)xcalloc(n, sizeof(float)); - l.variance = (float*)xcalloc(n, sizeof(float)); - - l.mean_delta = (float*)xcalloc(n, sizeof(float)); - l.variance_delta = (float*)xcalloc(n, sizeof(float)); - } - l.rolling_mean = (float*)xcalloc(n, sizeof(float)); - l.rolling_variance = (float*)xcalloc(n, sizeof(float)); - } - -#ifndef GPU - if (train) { - l.x = (float*)xcalloc(total_batch * l.outputs, sizeof(float)); - l.x_norm = (float*)xcalloc(total_batch * l.outputs, sizeof(float)); - } -#endif // not GPU - } - -#ifndef GPU - if (l.activation == SWISH || l.activation == MISH || l.activation == HARD_MISH) l.activation_input = (float*)calloc(total_batch*l.outputs, sizeof(float)); -#endif // not GPU - - if(adam){ - l.adam = 1; - l.m = (float*)xcalloc(l.nweights, sizeof(float)); - l.v = (float*)xcalloc(l.nweights, sizeof(float)); - l.bias_m = (float*)xcalloc(n, sizeof(float)); - l.scale_m = (float*)xcalloc(n, sizeof(float)); - l.bias_v = (float*)xcalloc(n, sizeof(float)); - l.scale_v = (float*)xcalloc(n, sizeof(float)); - } - -#ifdef GPU - - - l.forward_gpu = forward_convolutional_layer_gpu; - l.backward_gpu = backward_convolutional_layer_gpu; - l.update_gpu = update_convolutional_layer_gpu; - - if(gpu_index >= 0){ - - if (train && (l.activation == SWISH || l.activation == MISH || l.activation == HARD_MISH)) { - l.activation_input_gpu = cuda_make_array(l.activation_input, total_batch*l.outputs); - } - - if (l.deform) l.weight_deform_gpu = cuda_make_array(NULL, l.nweights); - - if (adam) { - l.m_gpu = cuda_make_array(l.m, l.nweights); - l.v_gpu = cuda_make_array(l.v, l.nweights); - l.bias_m_gpu = cuda_make_array(l.bias_m, n); - l.bias_v_gpu = cuda_make_array(l.bias_v, n); - l.scale_m_gpu = cuda_make_array(l.scale_m, n); - l.scale_v_gpu = cuda_make_array(l.scale_v, n); - } - if (l.share_layer) { - l.weights_gpu = l.share_layer->weights_gpu; - l.weight_updates_gpu = l.share_layer->weight_updates_gpu; - l.weights_gpu16 = l.share_layer->weights_gpu16; - l.weight_updates_gpu16 = l.share_layer->weight_updates_gpu16; - l.biases_gpu = l.share_layer->biases_gpu; - l.bias_updates_gpu = l.share_layer->bias_updates_gpu; - } - else { - l.weights_gpu = cuda_make_array(l.weights, l.nweights); - if (train) l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); -#ifdef CUDNN_HALF - l.weights_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); - if (train) l.weight_updates_gpu16 = cuda_make_array(NULL, l.nweights / 2 + 1); -#endif // CUDNN_HALF - l.biases_gpu = cuda_make_array(l.biases, n); - if (train) l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); - } - - l.output_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); - if (train) l.delta_gpu = cuda_make_array(l.delta, total_batch*out_h*out_w*n); - - if(binary){ - l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); - } - if(xnor){ - l.binary_weights_gpu = cuda_make_array(l.weights, l.nweights); - l.mean_arr_gpu = cuda_make_array(0, l.n); - l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); - } - - if(batch_normalize){ - if (l.share_layer) { - l.scales_gpu = l.share_layer->scales_gpu; - l.scale_updates_gpu = l.share_layer->scale_updates_gpu; - l.mean_gpu = l.share_layer->mean_gpu; - l.variance_gpu = l.share_layer->variance_gpu; - l.rolling_mean_gpu = l.share_layer->rolling_mean_gpu; - l.rolling_variance_gpu = l.share_layer->rolling_variance_gpu; - l.mean_delta_gpu = l.share_layer->mean_delta_gpu; - l.variance_delta_gpu = l.share_layer->variance_delta_gpu; - } - else { - l.scales_gpu = cuda_make_array(l.scales, n); - - if (train) { - l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); - - l.mean_gpu = cuda_make_array(l.mean, n); - l.variance_gpu = cuda_make_array(l.variance, n); - l.m_cbn_avg_gpu = cuda_make_array(l.mean, n); - l.v_cbn_avg_gpu = cuda_make_array(l.variance, n); -#ifndef CUDNN - l.mean_delta_gpu = cuda_make_array(l.mean, n); - l.variance_delta_gpu = cuda_make_array(l.variance, n); -#endif // CUDNN - } - - l.rolling_mean_gpu = cuda_make_array(l.mean, n); - l.rolling_variance_gpu = cuda_make_array(l.variance, n); - } - - if (train) { - l.x_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); -#ifndef CUDNN - l.x_norm_gpu = cuda_make_array(l.output, total_batch*out_h*out_w*n); -#endif // CUDNN - } - } - - if (l.assisted_excitation) - { - const int size = l.out_w * l.out_h * l.batch; - l.gt_gpu = cuda_make_array(NULL, size); - l.a_avg_gpu = cuda_make_array(NULL, size); - } -#ifdef CUDNN - create_convolutional_cudnn_tensors(&l); - cudnn_convolutional_setup(&l, cudnn_fastest, 0); -#endif // CUDNN - } -#endif // GPU - l.workspace_size = get_convolutional_workspace_size(l); - - //fprintf(stderr, "conv %5d %2d x%2d /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); - l.bflops = (2.0 * l.nweights * l.out_h*l.out_w) / 1000000000.; - if (l.xnor) l.bflops = l.bflops / 32; - if (l.xnor && l.use_bin_output) fprintf(stderr, "convXB"); - else if (l.xnor) fprintf(stderr, "convX "); - else if (l.share_layer) fprintf(stderr, "convS "); - else if (l.assisted_excitation) fprintf(stderr, "convAE"); - else fprintf(stderr, "conv "); - - if (groups > 1) fprintf(stderr, "%5d/%4d ", n, groups); - else fprintf(stderr, "%5d ", n); - - if (stride_x != stride_y) fprintf(stderr, "%2dx%2d/%2dx%2d ", size, size, stride_x, stride_y); - else { - if (dilation > 1) fprintf(stderr, "%2d x%2d/%2d(%1d)", size, size, stride_x, dilation); - else fprintf(stderr, "%2d x%2d/%2d ", size, size, stride_x); - } - - fprintf(stderr, "%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - - //fprintf(stderr, "%5d/%2d %2d x%2d /%2d(%d)%4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", n, groups, size, size, stride, dilation, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - - if (l.antialiasing) { - printf("AA: "); - l.input_layer = (layer*)calloc(1, sizeof(layer)); - int blur_size = 3; - int blur_pad = blur_size / 2; - if (l.antialiasing == 2) { - blur_size = 2; - blur_pad = 0; - } - *(l.input_layer) = make_convolutional_layer(batch, steps, out_h, out_w, n, n, n, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, index, 0, NULL, 0, 0, train); - const int blur_nweights = n * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; - int i; - if (blur_size == 2) { - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - l.input_layer->weights[i + 0] = 1 / 4.f; - l.input_layer->weights[i + 1] = 1 / 4.f; - l.input_layer->weights[i + 2] = 1 / 4.f; - l.input_layer->weights[i + 3] = 1 / 4.f; - } - } - else { - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - l.input_layer->weights[i + 0] = 1 / 16.f; - l.input_layer->weights[i + 1] = 2 / 16.f; - l.input_layer->weights[i + 2] = 1 / 16.f; - - l.input_layer->weights[i + 3] = 2 / 16.f; - l.input_layer->weights[i + 4] = 4 / 16.f; - l.input_layer->weights[i + 5] = 2 / 16.f; - - l.input_layer->weights[i + 6] = 1 / 16.f; - l.input_layer->weights[i + 7] = 2 / 16.f; - l.input_layer->weights[i + 8] = 1 / 16.f; - } - } - for (i = 0; i < n; ++i) l.input_layer->biases[i] = 0; -#ifdef GPU - if (gpu_index >= 0) { - l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); - push_convolutional_layer(*(l.input_layer)); - } -#endif // GPU - } - - return l; -} - -void denormalize_convolutional_layer(convolutional_layer l) -{ - int i, j; - for(i = 0; i < l.n; ++i){ - float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); - for(j = 0; j < l.nweights; ++j){ - l.weights[i*l.nweights + j] *= scale; - } - l.biases[i] -= l.rolling_mean[i] * scale; - l.scales[i] = 1; - l.rolling_mean[i] = 0; - l.rolling_variance[i] = 1; - } -} - -void test_convolutional_layer() -{ - convolutional_layer l = make_convolutional_layer(1, 1, 5, 5, 3, 2, 1, 5, 2, 2, 1, 1, LEAKY, 1, 0, 0, 0, 0, 0, 0, NULL, 0, 0, 0); - l.batch_normalize = 1; - float data[] = {1,1,1,1,1, - 1,1,1,1,1, - 1,1,1,1,1, - 1,1,1,1,1, - 1,1,1,1,1, - 2,2,2,2,2, - 2,2,2,2,2, - 2,2,2,2,2, - 2,2,2,2,2, - 2,2,2,2,2, - 3,3,3,3,3, - 3,3,3,3,3, - 3,3,3,3,3, - 3,3,3,3,3, - 3,3,3,3,3}; - network_state state = {0}; - state.input = data; - forward_convolutional_layer(l, state); -} - -void resize_convolutional_layer(convolutional_layer *l, int w, int h) -{ - int total_batch = l->batch*l->steps; - int old_w = l->w; - int old_h = l->h; - l->w = w; - l->h = h; - int out_w = convolutional_out_width(*l); - int out_h = convolutional_out_height(*l); - - l->out_w = out_w; - l->out_h = out_h; - - l->outputs = l->out_h * l->out_w * l->out_c; - l->inputs = l->w * l->h * l->c; - - - l->output = (float*)xrealloc(l->output, total_batch * l->outputs * sizeof(float)); - if (l->train) { - l->delta = (float*)xrealloc(l->delta, total_batch * l->outputs * sizeof(float)); - - if (l->batch_normalize) { - l->x = (float*)xrealloc(l->x, total_batch * l->outputs * sizeof(float)); - l->x_norm = (float*)xrealloc(l->x_norm, total_batch * l->outputs * sizeof(float)); - } - } - - if (l->xnor) { - //l->binary_input = realloc(l->inputs*l->batch, sizeof(float)); - } - - if (l->activation == SWISH || l->activation == MISH || l->activation == HARD_MISH) l->activation_input = (float*)realloc(l->activation_input, total_batch*l->outputs * sizeof(float)); -#ifdef GPU - if (old_w < w || old_h < h || l->dynamic_minibatch) { - if (l->train) { - cuda_free(l->delta_gpu); - l->delta_gpu = cuda_make_array(l->delta, total_batch*l->outputs); - } - - cuda_free(l->output_gpu); - l->output_gpu = cuda_make_array(l->output, total_batch*l->outputs); - - if (l->batch_normalize) { - cuda_free(l->x_gpu); - l->x_gpu = cuda_make_array(l->output, total_batch*l->outputs); - -#ifndef CUDNN - cuda_free(l->x_norm_gpu); - l->x_norm_gpu = cuda_make_array(l->output, total_batch*l->outputs); -#endif // CUDNN - } - - if (l->xnor) { - cuda_free(l->binary_input_gpu); - l->binary_input_gpu = cuda_make_array(0, l->inputs*l->batch); - } - - if (l->activation == SWISH || l->activation == MISH || l->activation == HARD_MISH) { - cuda_free(l->activation_input_gpu); - l->activation_input_gpu = cuda_make_array(l->activation_input, total_batch*l->outputs); - } - - if (l->assisted_excitation) - { - cuda_free(l->gt_gpu); - cuda_free(l->a_avg_gpu); - - const int size = l->out_w * l->out_h * l->batch; - l->gt_gpu = cuda_make_array(NULL, size); - l->a_avg_gpu = cuda_make_array(NULL, size); - } - } -#ifdef CUDNN - cudnn_convolutional_setup(l, cudnn_fastest, 0); -#endif -#endif - l->workspace_size = get_convolutional_workspace_size(*l); - -#ifdef CUDNN - // check for excessive memory consumption - size_t free_byte; - size_t total_byte; - CHECK_CUDA(cudaMemGetInfo(&free_byte, &total_byte)); - if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { - printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); - cudnn_convolutional_setup(l, cudnn_smallest, 0); - l->workspace_size = get_convolutional_workspace_size(*l); - } -#endif -} - -void set_specified_workspace_limit(convolutional_layer *l, size_t workspace_size_limit) -{ -#ifdef CUDNN - size_t free_byte; - size_t total_byte; - CHECK_CUDA(cudaMemGetInfo(&free_byte, &total_byte)); - cudnn_convolutional_setup(l, cudnn_specify, workspace_size_limit); - l->workspace_size = get_convolutional_workspace_size(*l); - //printf("Set specified workspace limit for cuDNN: %zu, available: %zu, workspace = %zu \n", workspace_size_limit, free_byte, l->workspace_size); -#endif // CUDNN -} - -void add_bias(float *output, float *biases, int batch, int n, int size) -{ - int i,j,b; - for(b = 0; b < batch; ++b){ - for(i = 0; i < n; ++i){ - for(j = 0; j < size; ++j){ - output[(b*n + i)*size + j] += biases[i]; - } - } - } -} - -void scale_bias(float *output, float *scales, int batch, int n, int size) -{ - int i,j,b; - for(b = 0; b < batch; ++b){ - for(i = 0; i < n; ++i){ - for(j = 0; j < size; ++j){ - output[(b*n + i)*size + j] *= scales[i]; - } - } - } -} - -void backward_bias(float *bias_updates, float *delta, int batch, int n, int size) -{ - int i,b; - for(b = 0; b < batch; ++b){ - for(i = 0; i < n; ++i){ - bias_updates[i] += sum_array(delta+size*(i+b*n), size); - } - } -} - -void gemm_nn_custom(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; - //printf("\n weight = %f \n", A_PART); - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } -} - - -void get_mean_array(float *src, size_t size, size_t filters, float *mean_arr) { - size_t i, counter; - counter = 0; - for (i = 0; i < size; i += size / filters) { - mean_arr[counter++] = fabs(src[i]); - } -} - -/* -void float_to_bit(float *src, unsigned char *dst, size_t size) { - - size_t dst_size = size / 8 + 1; - memset(dst, 0, dst_size); - size_t i, dst_i, dst_shift; - for (i = 0; i < size; ++i) { - if (src[i] > 0) set_bit(dst, i); - } -} -*/ - -void bit_to_float(unsigned char *src, float *dst, size_t size, size_t filters, float *mean_arr) { - memset(dst, 0, size *sizeof(float)); - size_t i; - - for (i = 0; i < size; ++i) { - float mean_val = 1; - if(mean_arr != NULL) mean_val = fabs(mean_arr[i / (size / filters)]); - if(get_bit(src, i)) dst[i] = mean_val; - else dst[i] = -mean_val; - } -} - -void binary_align_weights(convolutional_layer *l) -{ - int m = l->n; // (l->n / l->groups) - int k = l->size*l->size*l->c; // ->size*l->size*(l->c / l->groups) - size_t new_lda = k + (l->lda_align - k % l->lda_align); // (k / 8 + 1) * 8; - l->new_lda = new_lda; - - binarize_weights(l->weights, m, k, l->binary_weights); - - size_t align_weights_size = new_lda * m; - l->align_bit_weights_size = align_weights_size / 8 + 1; - float* align_weights = (float*)xcalloc(align_weights_size, sizeof(float)); - l->align_bit_weights = (char*)xcalloc(l->align_bit_weights_size, sizeof(char)); - - size_t i, j; - // align A without transpose - for (i = 0; i < m; ++i) { - for (j = 0; j < k; ++j) { - align_weights[i*new_lda + j] = l->binary_weights[i*k + j]; - } - } - - - if (l->c % 32 == 0) - //if(gpu_index < 0 && l->stride == 1 && l->pad == 1 && l->c % 32 == 0) - //if (l->stride == 1 && l->pad == 1 && l->c % 32 == 0) - { - int fil, chan; - const int items_per_filter = l->c * l->size * l->size; - //const int dst_items_per_filter = new_lda; - for (fil = 0; fil < l->n; ++fil) - { - for (chan = 0; chan < l->c; chan += 32) - { - const int items_per_channel = l->size*l->size; - for (i = 0; i < items_per_channel; ++i) - { - //uint32_t val = 0; - int c_pack; - for (c_pack = 0; c_pack < 32; ++c_pack) { - float src = l->binary_weights[fil*items_per_filter + (chan + c_pack)*items_per_channel + i]; - - //align_weights[fil*items_per_filter + chan*items_per_channel + i * 32 + c_pack] = src; - - align_weights[fil*new_lda + chan*items_per_channel + i*32 + c_pack] = src; - //val |= (src << c); - } - - } - } - } - - //printf("\n l.index = %d \t aw[0] = %f, aw[1] = %f, aw[2] = %f, aw[3] = %f \n", l->index, align_weights[0], align_weights[1], align_weights[2], align_weights[3]); - //memcpy(l->binary_weights, align_weights, (l->size * l->size * l->c * l->n) * sizeof(float)); - - float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size); - - //if (l->n >= 32) - if(gpu_index >= 0) - { - //int M = l->n; - //int N = l->out_w*l->out_h; - //printf("\n M = %d, N = %d, M %% 8 = %d, N %% 8 = %d - weights \n", M, N, M % 8, N % 8); - //printf("\n l.w = %d, l.c = %d, l.n = %d \n", l->w, l->c, l->n); - for (i = 0; i < align_weights_size / 8; ++i) l->align_bit_weights[i] = ~(l->align_bit_weights[i]); - } - - - - get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr); - //get_mean_array(l->binary_weights, m*new_lda, l->n, l->mean_arr); - } - else { - float_to_bit(align_weights, (unsigned char*)l->align_bit_weights, align_weights_size); - - get_mean_array(l->binary_weights, m*k, l->n, l->mean_arr); - } - - //l->mean_arr = calloc(l->n, sizeof(float)); - - //get_mean_array(align_weights, align_weights_size, l->n, l->mean_arr); - - - - -#ifdef GPU - cudaError_t status; - l->align_workspace_size = l->bit_align * l->size * l->size * l->c; - status = cudaMalloc((void **)&l->align_workspace_gpu, l->align_workspace_size * sizeof(float)); - status = cudaMalloc((void **)&l->transposed_align_workspace_gpu, l->align_workspace_size * sizeof(float)); - CHECK_CUDA(status); - - //l->align_bit_weights_gpu = cuda_make_array(l->align_bit_weights, l->align_bit_weights_size * sizeof(char)/sizeof(float)); - status = cudaMalloc((void **)&l->align_bit_weights_gpu, l->align_bit_weights_size); - CHECK_CUDA(status); - status = cudaMemcpy(l->align_bit_weights_gpu, l->align_bit_weights, l->align_bit_weights_size, cudaMemcpyHostToDevice); - CHECK_CUDA(status); - status = cudaMemcpy(l->binary_weights_gpu, l->binary_weights, m*k * sizeof(float), cudaMemcpyHostToDevice); - CHECK_CUDA(status); - - //l->mean_arr_gpu = cuda_make_array(l->mean_arr, l->n); - cuda_push_array(l->mean_arr_gpu, l->mean_arr, l->n); - CHECK_CUDA(cudaDeviceSynchronize()); -#endif // GPU - - free(align_weights); -} - -// binary transpose -size_t binary_transpose_align_input(int k, int n, float *b, char **t_bit_input, size_t ldb_align, int bit_align) -{ - size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - //printf("\n n = %d, bit_align = %d \n", n, bit_align); - size_t t_intput_size = new_ldb * bit_align;// n; - size_t t_bit_input_size = t_intput_size / 8;// +1; - - memset(*t_bit_input, 0, t_bit_input_size * sizeof(char)); - //int src_size = k * bit_align; - - // b - [bit_align, k] - [l.bit_align, l.size*l.size*l.c] = src_size - // t_input - [bit_align, k] - [n', k] - // t_bit_input - [new_ldb, n] - [k', n] - - //transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); - transpose_bin((uint32_t*)b, (uint32_t*)*t_bit_input, k, n, bit_align, new_ldb, 8); - - return t_intput_size; -} - - -void forward_convolutional_layer(convolutional_layer l, network_state state) -{ - int out_h = convolutional_out_height(l); - int out_w = convolutional_out_width(l); - int i, j; - - fill_cpu(l.outputs*l.batch, 0, l.output, 1); - - if (l.xnor && (!l.align_bit_weights || state.train)) { - if (!l.align_bit_weights || state.train) { - binarize_weights(l.weights, l.n, l.nweights, l.binary_weights); - //printf("\n binarize_weights l.align_bit_weights = %p \n", l.align_bit_weights); - } - swap_binary(&l); - binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input); - state.input = l.binary_input; - } - - int m = l.n / l.groups; - int k = l.size*l.size*l.c / l.groups; - int n = out_h*out_w; - - static int u = 0; - u++; - - for(i = 0; i < l.batch; ++i) - { - for (j = 0; j < l.groups; ++j) - { - float *a = l.weights +j*l.nweights / l.groups; - float *b = state.workspace; - float *c = l.output +(i*l.groups + j)*n*m; - - //gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); - //gemm_nn_custom(m, n, k, 1, a, k, b, n, c, n); - if (l.xnor && l.align_bit_weights && !state.train && l.stride_x == l.stride_y) - { - memset(b, 0, l.bit_align*l.size*l.size*l.c * sizeof(float)); - - if (l.c % 32 == 0) - { - //printf(" l.index = %d - new XNOR \n", l.index); - - int ldb_align = l.lda_align; - size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; - //size_t t_intput_size = new_ldb * l.bit_align;// n; - //size_t t_bit_input_size = t_intput_size / 8;// +1; - - int re_packed_input_size = l.c * l.w * l.h; - memset(state.workspace, 0, re_packed_input_size * sizeof(float)); - - const size_t new_c = l.c / 32; - size_t in_re_packed_input_size = new_c * l.w * l.h + 1; - memset(l.bin_re_packed_input, 0, in_re_packed_input_size * sizeof(uint32_t)); - - //float *re_packed_input = calloc(l.c * l.w * l.h, sizeof(float)); - //uint32_t *bin_re_packed_input = calloc(new_c * l.w * l.h + 1, sizeof(uint32_t)); - - // float32x4 by channel (as in cuDNN) - repack_input(state.input, state.workspace, l.w, l.h, l.c); - - // 32 x floats -> 1 x uint32_t - float_to_bit(state.workspace, (unsigned char *)l.bin_re_packed_input, l.c * l.w * l.h); - - //free(re_packed_input); - - // slow - convolution the packed inputs and weights: float x 32 by channel (as in cuDNN) - //convolution_repacked((uint32_t *)bin_re_packed_input, (uint32_t *)l.align_bit_weights, l.output, - // l.w, l.h, l.c, l.n, l.size, l.pad, l.new_lda, l.mean_arr); - - // // then exit from if() - - - im2col_cpu_custom((float *)l.bin_re_packed_input, new_c, l.h, l.w, l.size, l.stride, l.pad, state.workspace); - //im2col_cpu((float *)bin_re_packed_input, new_c, l.h, l.w, l.size, l.stride, l.pad, b); - - //free(bin_re_packed_input); - - int new_k = l.size*l.size*l.c / 32; - - // good for (l.c == 64) - //gemm_nn_bin_32bit_packed(m, n, new_k, 1, - // l.align_bit_weights, l.new_lda/32, - // b, n, - // c, n, l.mean_arr); - - // // then exit from if() - - transpose_uint32((uint32_t *)state.workspace, (uint32_t*)l.t_bit_input, new_k, n, n, new_ldb); - - // the main GEMM function - gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr); - - // // alternative GEMM - //gemm_nn_bin_transposed_32bit_packed(m, n, new_k, 1, - // l.align_bit_weights, l.new_lda/32, - // t_bit_input, new_ldb / 32, - // c, n, l.mean_arr); - - //free(t_bit_input); - - } - else - { // else (l.c % 32 != 0) - - //-------------------------------------------------------- - //printf(" l.index = %d - old XNOR \n", l.index); - - //im2col_cpu_custom_align(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b, l.bit_align); - im2col_cpu_custom_bin(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, state.workspace, l.bit_align); - - //size_t output_size = l.outputs; - //float *count_output = calloc(output_size, sizeof(float)); - //size_t bit_output_size = output_size / 8 + 1; - //char *bit_output = calloc(bit_output_size, sizeof(char)); - - //size_t intput_size = n * k; // (out_h*out_w) X (l.size*l.size*l.c) : after im2col() - //size_t bit_input_size = intput_size / 8 + 1; - //char *bit_input = calloc(bit_input_size, sizeof(char)); - - //size_t weights_size = k * m; //l.size*l.size*l.c*l.n; // l.nweights - //size_t bit_weights_size = weights_size / 8 + 1; - - //char *bit_weights = calloc(bit_weights_size, sizeof(char)); - //float *mean_arr = calloc(l.n, sizeof(float)); - - // transpose B from NxK to KxN (x-axis (ldb = l.size*l.size*l.c) - should be multiple of 8 bits) - { - //size_t ldb_align = 256; // 256 bit for AVX2 - int ldb_align = l.lda_align; - size_t new_ldb = k + (ldb_align - k%ldb_align); - size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align); - - // 5x times faster than gemm()-float32 - gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr); - - //gemm_nn_custom_bin_mean_transposed(m, n, k, 1, bit_weights, k, t_bit_input, new_ldb, c, n, mean_arr); - - //free(t_input); - //free(t_bit_input); - //} - } - - } - - add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); - - //activate_array(l.output, m*n*l.batch, l.activation); - if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == HARD_MISH) activate_array_hard_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0); - else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1); - else activate_array_cpu_custom(l.output, m*n*l.batch, l.activation); - return; - - } - else { - //printf(" l.index = %d - FP32 \n", l.index); - float *im = state.input + (i*l.groups + j)*(l.c / l.groups)*l.h*l.w; - if (l.size == 1 && l.stride == 1 && l.dilation == 1) { - b = im; - } - else { - //im2col_cpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, b); - - im2col_cpu_ext(im, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - b); // output - - } - - gemm(0, 0, m, n, k, 1, a, k, b, n, 1, c, n); - // bit-count to float - } - //c += n*m; - //state.input += l.c*l.h*l.w; - } - } - - if(l.batch_normalize){ - forward_batchnorm_layer(l, state); - } - else { - add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); - } - - //activate_array(l.output, m*n*l.batch, l.activation); - if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == HARD_MISH) activate_array_hard_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == NORM_CHAN) activate_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output); - else if (l.activation == NORM_CHAN_SOFTMAX) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 0); - else if (l.activation == NORM_CHAN_SOFTMAX_MAXVAL) activate_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.output, 1); - else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); - - if(l.binary || l.xnor) swap_binary(&l); - - //visualize_convolutional_layer(l, "conv_visual", NULL); - //wait_until_press_key_cv(); - - if(l.assisted_excitation && state.train) assisted_excitation_forward(l, state); - - if (l.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - s.input = l.output; - forward_convolutional_layer(*(l.input_layer), s); - //simple_copy_ongpu(l.outputs*l.batch, l.output, l.input_antialiasing); - memcpy(l.output, l.input_layer->output, l.input_layer->outputs * l.input_layer->batch * sizeof(float)); - } -} - -void assisted_excitation_forward(convolutional_layer l, network_state state) -{ - const int iteration_num = (*state.net.seen) / (state.net.batch*state.net.subdivisions); - - // epoch - //const float epoch = (float)(*state.net.seen) / state.net.train_images_num; - - // calculate alpha - //const float alpha = (1 + cos(3.141592 * iteration_num)) / (2 * state.net.max_batches); - //const float alpha = (1 + cos(3.141592 * epoch)) / (2 * state.net.max_batches); - float alpha = (1 + cos(3.141592 * iteration_num / state.net.max_batches)); - - if (l.assisted_excitation > 1) { - if (iteration_num > l.assisted_excitation) alpha = 0; - else alpha = (1 + cos(3.141592 * iteration_num / l.assisted_excitation)); - } - - //printf("\n epoch = %f, alpha = %f, seen = %d, max_batches = %d, train_images_num = %d \n", - // epoch, alpha, (*state.net.seen), state.net.max_batches, state.net.train_images_num); - - float *a_avg = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float)); - float *g = (float *)xcalloc(l.out_w * l.out_h * l.batch, sizeof(float)); - - int b; - int w, h, c; - - l.max_boxes = state.net.num_boxes; - l.truths = l.max_boxes*(4 + 1); - - for (b = 0; b < l.batch; ++b) - { - // calculate G - int t; - for (t = 0; t < state.net.num_boxes; ++t) { - box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - if (!truth.x) break; // continue; - - int left = floor((truth.x - truth.w / 2) * l.out_w); - int right = ceil((truth.x + truth.w / 2) * l.out_w); - int top = floor((truth.y - truth.h / 2) * l.out_h); - int bottom = ceil((truth.y + truth.h / 2) * l.out_h); - - for (w = left; w <= right; w++) { - for (h = top; h < bottom; h++) { - g[w + l.out_w * h + l.out_w*l.out_h*b] = 1; - } - } - } - } - - for (b = 0; b < l.batch; ++b) - { - // calculate average A - for (w = 0; w < l.out_w; w++) { - for (h = 0; h < l.out_h; h++) { - for (c = 0; c < l.out_c; c++) { - a_avg[w + l.out_w*(h + l.out_h*b)] += l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))]; - } - a_avg[w + l.out_w*(h + l.out_h*b)] /= l.out_c; // a_avg / d - } - } - } - - // change activation - for (b = 0; b < l.batch; ++b) - { - for (w = 0; w < l.out_w; w++) { - for (h = 0; h < l.out_h; h++) { - for (c = 0; c < l.out_c; c++) - { - // a = a + alpha(t) + e(c,i,j) = a + alpha(t) + g(i,j) * avg_a(i,j) / channels - l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] += - alpha * - g[w + l.out_w*(h + l.out_h*b)] * - a_avg[w + l.out_w*(h + l.out_h*b)]; - - //l.output[w + l.out_w*(h + l.out_h*(c + l.out_c*b))] = - // alpha * g[w + l.out_w*(h + l.out_h*b)] * a_avg[w + l.out_w*(h + l.out_h*b)]; - } - } - } - } - - if(0) // visualize ground truth - { -#ifdef OPENCV - for (b = 0; b < l.batch; ++b) - { - image img = float_to_image(l.out_w, l.out_h, 1, &g[l.out_w*l.out_h*b]); - char buff[100]; - sprintf(buff, "a_excitation_%d", b); - show_image_cv(img, buff); - - image img2 = float_to_image(l.out_w, l.out_h, 1, &l.output[l.out_w*l.out_h*l.out_c*b]); - char buff2[100]; - sprintf(buff2, "a_excitation_act_%d", b); - show_image_cv(img2, buff2); - wait_key_cv(5); - } - wait_until_press_key_cv(); -#endif // OPENCV - } - - free(g); - free(a_avg); -} - - -void backward_convolutional_layer(convolutional_layer l, network_state state) -{ - int i, j; - int m = l.n / l.groups; - int n = l.size*l.size*l.c / l.groups; - int k = l.out_w*l.out_h; - - if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta); - else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta); - else if (l.activation == HARD_MISH) gradient_array_hard_mish(l.outputs*l.batch, l.activation_input, l.delta); - else if (l.activation == NORM_CHAN_SOFTMAX || l.activation == NORM_CHAN_SOFTMAX_MAXVAL) gradient_array_normalize_channels_softmax(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta); - else if (l.activation == NORM_CHAN) gradient_array_normalize_channels(l.output, l.outputs*l.batch, l.batch, l.out_c, l.out_w*l.out_h, l.delta); - else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - - if (l.batch_normalize) { - backward_batchnorm_layer(l, state); - } - else { - backward_bias(l.bias_updates, l.delta, l.batch, l.n, k); - } - - for (i = 0; i < l.batch; ++i) { - for (j = 0; j < l.groups; ++j) { - float *a = l.delta + (i*l.groups + j)*m*k; - float *b = state.workspace; - float *c = l.weight_updates + j*l.nweights / l.groups; - - float *im = state.input + (i*l.groups + j)* (l.c / l.groups)*l.h*l.w; - - //im2col_cpu(im, l.c / l.groups, l.h, l.w, l.size, l.stride, l.pad, b); - im2col_cpu_ext( - im, // input - l.c / l.groups, // input channels - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - b); // output - - gemm(0, 1, m, n, k, 1, a, k, b, k, 1, c, n); - - if (state.delta) { - a = l.weights + j*l.nweights / l.groups; - b = l.delta + (i*l.groups + j)*m*k; - c = state.workspace; - - gemm(1, 0, n, k, m, 1, a, n, b, k, 0, c, k); - - //col2im_cpu(state.workspace, l.c / l.groups, l.h, l.w, l.size, l.stride, - // l.pad, state.delta + (i*l.groups + j)*l.c / l.groups*l.h*l.w); - - col2im_cpu_ext( - state.workspace, // input - l.c / l.groups, // input channels (h, w) - l.h, l.w, // input size (h, w) - l.size, l.size, // kernel size (h, w) - l.pad * l.dilation, l.pad * l.dilation, // padding (h, w) - l.stride_y, l.stride_x, // stride (h, w) - l.dilation, l.dilation, // dilation (h, w) - state.delta + (i*l.groups + j)* (l.c / l.groups)*l.h*l.w); // output (delta) - } - } - } -} - -void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate_init, float momentum, float decay) -{ - float learning_rate = learning_rate_init*l.learning_rate_scale; - //float momentum = a.momentum; - //float decay = a.decay; - //int batch = a.batch; - - axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); - axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1); - scal_cpu(l.nweights, momentum, l.weight_updates, 1); - - axpy_cpu(l.n, learning_rate / batch, l.bias_updates, 1, l.biases, 1); - scal_cpu(l.n, momentum, l.bias_updates, 1); - - if (l.scales) { - axpy_cpu(l.n, learning_rate / batch, l.scale_updates, 1, l.scales, 1); - scal_cpu(l.n, momentum, l.scale_updates, 1); - } -} - - - -image get_convolutional_weight(convolutional_layer l, int i) -{ - int h = l.size; - int w = l.size; - int c = l.c / l.groups; - return float_to_image(w, h, c, l.weights + i*h*w*c); -} - -void rgbgr_weights(convolutional_layer l) -{ - int i; - for (i = 0; i < l.n; ++i) { - image im = get_convolutional_weight(l, i); - if (im.c == 3) { - rgbgr_image(im); - } - } -} - -void rescale_weights(convolutional_layer l, float scale, float trans) -{ - int i; - for (i = 0; i < l.n; ++i) { - image im = get_convolutional_weight(l, i); - if (im.c == 3) { - scale_image(im, scale); - float sum = sum_array(im.data, im.w*im.h*im.c); - l.biases[i] += sum*trans; - } - } -} - -image *get_weights(convolutional_layer l) -{ - image *weights = (image *)xcalloc(l.n, sizeof(image)); - int i; - for (i = 0; i < l.n; ++i) { - weights[i] = copy_image(get_convolutional_weight(l, i)); - normalize_image(weights[i]); - /* - char buff[256]; - sprintf(buff, "filter%d", i); - save_image(weights[i], buff); - */ - } - //error("hey"); - return weights; -} - -image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights) -{ - image *single_weights = get_weights(l); - show_images(single_weights, l.n, window); - - image delta = get_convolutional_image(l); - image dc = collapse_image_layers(delta, 1); - char buff[256]; - sprintf(buff, "%s: Output", window); - show_image(dc, buff); - //save_image(dc, buff); - free_image(dc); - return single_weights; -} - diff --git a/src/Detector/darknet/src/convolutional_layer.h b/src/Detector/darknet/src/convolutional_layer.h deleted file mode 100644 index e83ca873c..000000000 --- a/src/Detector/darknet/src/convolutional_layer.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef CONVOLUTIONAL_LAYER_H -#define CONVOLUTIONAL_LAYER_H - -#include "dark_cuda.h" -#include "image.h" -#include "activations.h" -#include "layer.h" -#include "network.h" - -typedef layer convolutional_layer; - -#ifdef __cplusplus -extern "C" { -#endif -#ifdef GPU -void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state); -void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state); -void update_convolutional_layer_gpu(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay, float loss_scale); - -void push_convolutional_layer(convolutional_layer layer); -void pull_convolutional_layer(convolutional_layer layer); - -void add_bias_gpu(float *output, float *biases, int batch, int n, int size); -void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); -#ifdef CUDNN -void cudnn_convolutional_setup(layer *l, int cudnn_preference, size_t workspace_size_specify); -void create_convolutional_cudnn_tensors(layer *l); -void cuda_convert_f32_to_f16(float* input_f32, size_t size, float *output_f16); -#endif -#endif -void free_convolutional_batchnorm(convolutional_layer *l); - -size_t get_convolutional_workspace_size(layer l); -convolutional_layer make_convolutional_layer(int batch, int steps, int h, int w, int c, int n, int groups, int size, int stride_x, int stride_y, int dilation, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam, int use_bin_output, int index, int antialiasing, convolutional_layer *share_layer, int assisted_excitation, int deform, int train); -void denormalize_convolutional_layer(convolutional_layer l); -void set_specified_workspace_limit(convolutional_layer *l, size_t workspace_size_limit); -void resize_convolutional_layer(convolutional_layer *layer, int w, int h); -void forward_convolutional_layer(const convolutional_layer layer, network_state state); -void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay); -image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_weights); -void binarize_weights(float *weights, int n, int size, float *binary); -void swap_binary(convolutional_layer *l); -void binarize_weights2(float *weights, int n, int size, char *binary, float *scales); - -void binary_align_weights(convolutional_layer *l); - -void backward_convolutional_layer(convolutional_layer layer, network_state state); - -void add_bias(float *output, float *biases, int batch, int n, int size); -void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); - -image get_convolutional_image(convolutional_layer layer); -image get_convolutional_delta(convolutional_layer layer); -image get_convolutional_weight(convolutional_layer layer, int i); - - -int convolutional_out_height(convolutional_layer layer); -int convolutional_out_width(convolutional_layer layer); -void rescale_weights(convolutional_layer l, float scale, float trans); -void rgbgr_weights(convolutional_layer l); -void assisted_excitation_forward(convolutional_layer l, network_state state); -void assisted_excitation_forward_gpu(convolutional_layer l, network_state state); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/cost_layer.c b/src/Detector/darknet/src/cost_layer.c deleted file mode 100644 index ed1cc1344..000000000 --- a/src/Detector/darknet/src/cost_layer.c +++ /dev/null @@ -1,148 +0,0 @@ -#include "cost_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include -#include -#include -#include - -COST_TYPE get_cost_type(char *s) -{ - if (strcmp(s, "sse")==0) return SSE; - if (strcmp(s, "masked")==0) return MASKED; - if (strcmp(s, "smooth")==0) return SMOOTH; - fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); - return SSE; -} - -char *get_cost_string(COST_TYPE a) -{ - switch(a){ - case SSE: - return "sse"; - case MASKED: - return "masked"; - case SMOOTH: - return "smooth"; - default: - return "sse"; - } -} - -cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) -{ - fprintf(stderr, "cost %4d\n", inputs); - cost_layer l = { (LAYER_TYPE)0 }; - l.type = COST; - - l.scale = scale; - l.batch = batch; - l.inputs = inputs; - l.outputs = inputs; - l.cost_type = cost_type; - l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); - l.output = (float*)xcalloc(inputs * batch, sizeof(float)); - l.cost = (float*)xcalloc(1, sizeof(float)); - - l.forward = forward_cost_layer; - l.backward = backward_cost_layer; - #ifdef GPU - l.forward_gpu = forward_cost_layer_gpu; - l.backward_gpu = backward_cost_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, inputs*batch); - l.output_gpu = cuda_make_array(l.output, inputs*batch); - #endif - return l; -} - -void resize_cost_layer(cost_layer *l, int inputs) -{ - l->inputs = inputs; - l->outputs = inputs; - l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float)); -#ifdef GPU - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); - l->output_gpu = cuda_make_array(l->output, inputs*l->batch); -#endif -} - -void forward_cost_layer(cost_layer l, network_state state) -{ - if (!state.truth) return; - if(l.cost_type == MASKED){ - int i; - for(i = 0; i < l.batch*l.inputs; ++i){ - if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; - } - } - if(l.cost_type == SMOOTH){ - smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); - } else { - l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); - } - l.cost[0] = sum_array(l.output, l.batch*l.inputs); -} - -void backward_cost_layer(const cost_layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); -} - -#ifdef GPU - -void pull_cost_layer(cost_layer l) -{ - cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); -} - -void push_cost_layer(cost_layer l) -{ - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); -} - -int float_abs_compare (const void * a, const void * b) -{ - float fa = *(const float*) a; - if(fa < 0) fa = -fa; - float fb = *(const float*) b; - if(fb < 0) fb = -fb; - return (fa > fb) - (fa < fb); -} - -void forward_cost_layer_gpu(cost_layer l, network_state state) -{ - if (!state.truth) return; - if (l.cost_type == MASKED) { - mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); - } - - if(l.cost_type == SMOOTH){ - smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); - } else { - l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); - } - - if(l.ratio){ - cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); - qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); - int n = (1-l.ratio) * l.batch*l.inputs; - float thresh = l.delta[n]; - thresh = 0; - printf("%f\n", thresh); - supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); - } - - cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); - l.cost[0] = sum_array(l.output, l.batch*l.inputs); -} - -void backward_cost_layer_gpu(const cost_layer l, network_state state) -{ - axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); -} -#endif diff --git a/src/Detector/darknet/src/cost_layer.h b/src/Detector/darknet/src/cost_layer.h deleted file mode 100644 index b350003e0..000000000 --- a/src/Detector/darknet/src/cost_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef COST_LAYER_H -#define COST_LAYER_H -#include "layer.h" -#include "network.h" - -typedef layer cost_layer; - -#ifdef __cplusplus -extern "C" { -#endif -COST_TYPE get_cost_type(char *s); -char *get_cost_string(COST_TYPE a); -cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale); -void forward_cost_layer(const cost_layer l, network_state state); -void backward_cost_layer(const cost_layer l, network_state state); -void resize_cost_layer(cost_layer *l, int inputs); - -#ifdef GPU -void forward_cost_layer_gpu(cost_layer l, network_state state); -void backward_cost_layer_gpu(const cost_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/cpu_gemm.c b/src/Detector/darknet/src/cpu_gemm.c deleted file mode 100644 index ca1a8e424..000000000 --- a/src/Detector/darknet/src/cpu_gemm.c +++ /dev/null @@ -1,96 +0,0 @@ -//#include "mini_blas.h" -#ifdef __cplusplus -#define PUT_IN_REGISTER -#else -#define PUT_IN_REGISTER register -#endif - -void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; - for(j = 0; j < N; ++j){ - C[i*ldc+j] += A_PART*B[k*ldb+j]; - } - } - } -} - -void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - PUT_IN_REGISTER float sum = 0; - for(k = 0; k < K; ++k){ - sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; - } - C[i*ldc+j] += sum; - } - } -} - -void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - PUT_IN_REGISTER float A_PART = ALPHA * A[k * lda + i]; - for(j = 0; j < N; ++j){ - C[i*ldc+j] += A_PART*B[k*ldb+j]; - } - } - } -} -void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - for(k = 0; k < K; ++k){ - C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb]; - } - } - } -} - - -void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i, j; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - C[i*ldc + j] *= BETA; - } - } - if(!TA && !TB) - cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else if(TA && !TB) - cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else if(!TA && TB) - cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else - cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); -} diff --git a/src/Detector/darknet/src/crnn_layer.c b/src/Detector/darknet/src/crnn_layer.c deleted file mode 100644 index 84646b4af..000000000 --- a/src/Detector/darknet/src/crnn_layer.c +++ /dev/null @@ -1,383 +0,0 @@ -#include "crnn_layer.h" -#include "convolutional_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -static void increment_layer(layer *l, int steps) -{ - int num = l->outputs*l->batch*steps; - l->output += num; - l->delta += num; - l->x += num; - l->x_norm += num; - -#ifdef GPU - l->output_gpu += num; - l->delta_gpu += num; - l->x_gpu += num; - l->x_norm_gpu += num; -#endif -} - -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor, int train) -{ - fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters); - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.train = train; - l.batch = batch; - l.type = CRNN; - l.steps = steps; - l.size = size; - l.stride = stride; - l.dilation = dilation; - l.pad = pad; - l.h = h; - l.w = w; - l.c = c; - l.groups = groups; - l.out_c = output_filters; - l.inputs = h * w * c; - l.hidden = h * w * hidden_filters; - l.xnor = xnor; - - l.state = (float*)xcalloc(l.hidden * l.batch * (l.steps + 1), sizeof(float)); - - l.input_layer = (layer*)xcalloc(1, sizeof(layer)); - *(l.input_layer) = make_convolutional_layer(batch, steps, h, w, c, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.input_layer->batch = batch; - if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; - - l.self_layer = (layer*)xcalloc(1, sizeof(layer)); - *(l.self_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, hidden_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.self_layer->batch = batch; - if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; - - l.output_layer = (layer*)xcalloc(1, sizeof(layer)); - *(l.output_layer) = make_convolutional_layer(batch, steps, h, w, hidden_filters, output_filters, groups, size, stride, stride, dilation, pad, activation, batch_normalize, 0, xnor, 0, 0, 0, 0, NULL, 0, 0, train); - l.output_layer->batch = batch; - if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; - - l.out_h = l.output_layer->out_h; - l.out_w = l.output_layer->out_w; - l.outputs = l.output_layer->outputs; - - assert(l.input_layer->outputs == l.self_layer->outputs); - assert(l.input_layer->outputs == l.output_layer->inputs); - - l.output = l.output_layer->output; - l.delta = l.output_layer->delta; - - l.forward = forward_crnn_layer; - l.backward = backward_crnn_layer; - l.update = update_crnn_layer; - -#ifdef GPU - l.forward_gpu = forward_crnn_layer_gpu; - l.backward_gpu = backward_crnn_layer_gpu; - l.update_gpu = update_crnn_layer_gpu; - l.state_gpu = cuda_make_array(l.state, l.batch*l.hidden*(l.steps + 1)); - l.output_gpu = l.output_layer->output_gpu; - l.delta_gpu = l.output_layer->delta_gpu; -#endif - - l.bflops = l.input_layer->bflops + l.self_layer->bflops + l.output_layer->bflops; - - return l; -} - -void resize_crnn_layer(layer *l, int w, int h) -{ - resize_convolutional_layer(l->input_layer, w, h); - if (l->workspace_size < l->input_layer->workspace_size) l->workspace_size = l->input_layer->workspace_size; - - resize_convolutional_layer(l->self_layer, w, h); - if (l->workspace_size < l->self_layer->workspace_size) l->workspace_size = l->self_layer->workspace_size; - - resize_convolutional_layer(l->output_layer, w, h); - if (l->workspace_size < l->output_layer->workspace_size) l->workspace_size = l->output_layer->workspace_size; - - l->output = l->output_layer->output; - l->delta = l->output_layer->delta; - - int hidden_filters = l->self_layer->c; - l->w = w; - l->h = h; - l->inputs = h * w * l->c; - l->hidden = h * w * hidden_filters; - - l->out_h = l->output_layer->out_h; - l->out_w = l->output_layer->out_w; - l->outputs = l->output_layer->outputs; - - assert(l->input_layer->inputs == l->inputs); - assert(l->self_layer->inputs == l->hidden); - assert(l->input_layer->outputs == l->self_layer->outputs); - assert(l->input_layer->outputs == l->output_layer->inputs); - - l->state = (float*)xrealloc(l->state, l->batch*l->hidden*(l->steps + 1)*sizeof(float)); - -#ifdef GPU - if (l->state_gpu) cudaFree(l->state_gpu); - l->state_gpu = cuda_make_array(l->state, l->batch*l->hidden*(l->steps + 1)); - - l->output_gpu = l->output_layer->output_gpu; - l->delta_gpu = l->output_layer->delta_gpu; -#endif -} - -void free_state_crnn(layer l) -{ - int i; - for (i = 0; i < l.outputs * l.batch; ++i) l.self_layer->output[i] = rand_uniform(-1, 1); - -#ifdef GPU - cuda_push_array(l.self_layer->output_gpu, l.self_layer->output, l.outputs * l.batch); -#endif // GPU -} - -void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - update_convolutional_layer(*(l.input_layer), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.self_layer), batch, learning_rate, momentum, decay); - update_convolutional_layer(*(l.output_layer), batch, learning_rate, momentum, decay); -} - -void forward_crnn_layer(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - //s.index = state.index; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - - if (state.train) { - fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); - fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); - fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); - fill_cpu(l.hidden * l.batch, 0, l.state, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = state.input; - forward_convolutional_layer(input_layer, s); - - s.input = l.state; - forward_convolutional_layer(self_layer, s); - - float *old_state = l.state; - if(state.train) l.state += l.hidden*l.batch; - if(l.shortcut){ - copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); - }else{ - fill_cpu(l.hidden * l.batch, 0, l.state, 1); - } - axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); - - s.input = l.state; - forward_convolutional_layer(output_layer, s); - - state.input += l.inputs*l.batch; - increment_layer(&input_layer, 1); - increment_layer(&self_layer, 1); - increment_layer(&output_layer, 1); - } -} - -void backward_crnn_layer(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - //s.index = state.index; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - - increment_layer(&input_layer, l.steps-1); - increment_layer(&self_layer, l.steps-1); - increment_layer(&output_layer, l.steps-1); - - l.state += l.hidden*l.batch*l.steps; - for (i = l.steps-1; i >= 0; --i) { - copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); - - s.input = l.state; - s.delta = self_layer.delta; - backward_convolutional_layer(output_layer, s); - - l.state -= l.hidden*l.batch; - /* - if(i > 0){ - copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); - }else{ - fill_cpu(l.hidden * l.batch, 0, l.state, 1); - } - */ - - s.input = l.state; - s.delta = self_layer.delta - l.hidden*l.batch; - if (i == 0) s.delta = 0; - backward_convolutional_layer(self_layer, s); - - copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); - if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); - s.input = state.input + i*l.inputs*l.batch; - if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; - else s.delta = 0; - backward_convolutional_layer(input_layer, s); - - increment_layer(&input_layer, -1); - increment_layer(&self_layer, -1); - increment_layer(&output_layer, -1); - } -} - -#ifdef GPU - -void pull_crnn_layer(layer l) -{ - pull_convolutional_layer(*(l.input_layer)); - pull_convolutional_layer(*(l.self_layer)); - pull_convolutional_layer(*(l.output_layer)); -} - -void push_crnn_layer(layer l) -{ - push_convolutional_layer(*(l.input_layer)); - push_convolutional_layer(*(l.self_layer)); - push_convolutional_layer(*(l.output_layer)); -} - -void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - update_convolutional_layer_gpu(*(l.input_layer), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.self_layer), batch, learning_rate, momentum, decay, loss_scale); - update_convolutional_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay, loss_scale); -} - -void forward_crnn_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - if(!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - -/* -#ifdef CUDNN_HALF // slow and bad for training - if (!state.train && state.net.cudnn_half) { - s.index = state.index; - cuda_convert_f32_to_f16(input_layer.weights_gpu, input_layer.c*input_layer.n*input_layer.size*input_layer.size, input_layer.weights_gpu16); - cuda_convert_f32_to_f16(self_layer.weights_gpu, self_layer.c*self_layer.n*self_layer.size*self_layer.size, self_layer.weights_gpu16); - cuda_convert_f32_to_f16(output_layer.weights_gpu, output_layer.c*output_layer.n*output_layer.size*output_layer.size, output_layer.weights_gpu16); - } -#endif //CUDNN_HALF -*/ - - if (state.train) { - fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); - fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); - fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); - fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = state.input; - forward_convolutional_layer_gpu(input_layer, s); - - s.input = l.state_gpu; - forward_convolutional_layer_gpu(self_layer, s); - - float *old_state = l.state_gpu; - if(state.train) l.state_gpu += l.hidden*l.batch; - if(l.shortcut){ - copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); - }else{ - fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); - } - axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); - axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); - - s.input = l.state_gpu; - forward_convolutional_layer_gpu(output_layer, s); - - state.input += l.inputs*l.batch; - increment_layer(&input_layer, 1); - increment_layer(&self_layer, 1); - increment_layer(&output_layer, 1); - } -} - -void backward_crnn_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - //s.index = state.index; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - increment_layer(&input_layer, l.steps - 1); - increment_layer(&self_layer, l.steps - 1); - increment_layer(&output_layer, l.steps - 1); - float *init_state_gpu = l.state_gpu; - l.state_gpu += l.hidden*l.batch*l.steps; - for (i = l.steps-1; i >= 0; --i) { - //copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN - //axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); // commented in RNN - - s.input = l.state_gpu; - s.delta = self_layer.delta_gpu; - backward_convolutional_layer_gpu(output_layer, s); - - l.state_gpu -= l.hidden*l.batch; - - copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); - - s.input = l.state_gpu; - s.delta = self_layer.delta_gpu - l.hidden*l.batch; - if (i == 0) s.delta = 0; - backward_convolutional_layer_gpu(self_layer, s); - - if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); - s.input = state.input + i*l.inputs*l.batch; - if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; - else s.delta = 0; - backward_convolutional_layer_gpu(input_layer, s); - - if (state.net.try_fix_nan) { - fix_nan_and_inf(output_layer.delta_gpu, output_layer.inputs * output_layer.batch); - fix_nan_and_inf(self_layer.delta_gpu, self_layer.inputs * self_layer.batch); - fix_nan_and_inf(input_layer.delta_gpu, input_layer.inputs * input_layer.batch); - } - - increment_layer(&input_layer, -1); - increment_layer(&self_layer, -1); - increment_layer(&output_layer, -1); - } - fill_ongpu(l.hidden * l.batch, 0, init_state_gpu, 1); //clean l.state_gpu -} -#endif diff --git a/src/Detector/darknet/src/crnn_layer.h b/src/Detector/darknet/src/crnn_layer.h deleted file mode 100644 index b85df6ffa..000000000 --- a/src/Detector/darknet/src/crnn_layer.h +++ /dev/null @@ -1,32 +0,0 @@ - -#ifndef CRNN_LAYER_H -#define CRNN_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int groups, int steps, int size, int stride, int dilation, int pad, ACTIVATION activation, int batch_normalize, int xnor, int train); -void resize_crnn_layer(layer *l, int w, int h); -void free_state_crnn(layer l); - -void forward_crnn_layer(layer l, network_state state); -void backward_crnn_layer(layer l, network_state state); -void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -#ifdef GPU -void forward_crnn_layer_gpu(layer l, network_state state); -void backward_crnn_layer_gpu(layer l, network_state state); -void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale); -void push_crnn_layer(layer l); -void pull_crnn_layer(layer l); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/crop_layer.c b/src/Detector/darknet/src/crop_layer.c deleted file mode 100644 index 2d1fafc22..000000000 --- a/src/Detector/darknet/src/crop_layer.c +++ /dev/null @@ -1,103 +0,0 @@ -#include "utils.h" -#include "crop_layer.h" -#include "dark_cuda.h" -#include - -image get_crop_image(crop_layer l) -{ - int h = l.out_h; - int w = l.out_w; - int c = l.out_c; - return float_to_image(w,h,c,l.output); -} - -void backward_crop_layer(const crop_layer l, network_state state){} -void backward_crop_layer_gpu(const crop_layer l, network_state state){} - -crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure) -{ - fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c); - crop_layer l = { (LAYER_TYPE)0 }; - l.type = CROP; - l.batch = batch; - l.h = h; - l.w = w; - l.c = c; - l.scale = (float)crop_height / h; - l.flip = flip; - l.angle = angle; - l.saturation = saturation; - l.exposure = exposure; - l.out_w = crop_width; - l.out_h = crop_height; - l.out_c = c; - l.inputs = l.w * l.h * l.c; - l.outputs = l.out_w * l.out_h * l.out_c; - l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); - l.forward = forward_crop_layer; - l.backward = backward_crop_layer; - - #ifdef GPU - l.forward_gpu = forward_crop_layer_gpu; - l.backward_gpu = backward_crop_layer_gpu; - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); - l.rand_gpu = cuda_make_array(0, l.batch*8); - #endif - return l; -} - -void resize_crop_layer(layer *l, int w, int h) -{ - l->w = w; - l->h = h; - - l->out_w = l->scale*w; - l->out_h = l->scale*h; - - l->inputs = l->w * l->h * l->c; - l->outputs = l->out_h * l->out_w * l->out_c; - - l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float)); - #ifdef GPU - cuda_free(l->output_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - #endif -} - - -void forward_crop_layer(const crop_layer l, network_state state) -{ - int i,j,c,b,row,col; - int index; - int count = 0; - int flip = (l.flip && rand()%2); - int dh = rand()%(l.h - l.out_h + 1); - int dw = rand()%(l.w - l.out_w + 1); - float scale = 2; - float trans = -1; - if(l.noadjust){ - scale = 1; - trans = 0; - } - if(!state.train){ - flip = 0; - dh = (l.h - l.out_h)/2; - dw = (l.w - l.out_w)/2; - } - for(b = 0; b < l.batch; ++b){ - for(c = 0; c < l.c; ++c){ - for(i = 0; i < l.out_h; ++i){ - for(j = 0; j < l.out_w; ++j){ - if(flip){ - col = l.w - dw - j - 1; - }else{ - col = j + dw; - } - row = i + dh; - index = col+l.w*(row+l.h*(c + l.c*b)); - l.output[count++] = state.input[index]*scale + trans; - } - } - } - } -} diff --git a/src/Detector/darknet/src/crop_layer.h b/src/Detector/darknet/src/crop_layer.h deleted file mode 100644 index 319582441..000000000 --- a/src/Detector/darknet/src/crop_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CROP_LAYER_H -#define CROP_LAYER_H - -#include "image.h" -#include "layer.h" -#include "network.h" - -typedef layer crop_layer; - -#ifdef __cplusplus -extern "C" { -#endif -image get_crop_image(crop_layer l); -crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure); -void forward_crop_layer(const crop_layer l, network_state state); -void resize_crop_layer(layer *l, int w, int h); - -#ifdef GPU -void forward_crop_layer_gpu(crop_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/crop_layer_kernels.cu b/src/Detector/darknet/src/crop_layer_kernels.cu deleted file mode 100644 index 85783bcc3..000000000 --- a/src/Detector/darknet/src/crop_layer_kernels.cu +++ /dev/null @@ -1,222 +0,0 @@ -#include -#include -#include - -#include "crop_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "image.h" - -__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) -{ - if(x < 0 || x >= w || y < 0 || y >= h) return 0; - return image[x + w*(y + c*h)]; -} - -__device__ float3 rgb_to_hsv_kernel(float3 rgb) -{ - float r = rgb.x; - float g = rgb.y; - float b = rgb.z; - - float h, s, v; - float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); - float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); - float delta = max - min; - v = max; - if(max == 0){ - s = 0; - h = -1; - }else{ - s = delta/max; - if(r == max){ - h = (g - b) / delta; - } else if (g == max) { - h = 2 + (b - r) / delta; - } else { - h = 4 + (r - g) / delta; - } - if (h < 0) h += 6; - } - return make_float3(h, s, v); -} - -__device__ float3 hsv_to_rgb_kernel(float3 hsv) -{ - float h = hsv.x; - float s = hsv.y; - float v = hsv.z; - - float r, g, b; - float f, p, q, t; - - if (s == 0) { - r = g = b = v; - } else { - int index = (int) floorf(h); - f = h - index; - p = v*(1-s); - q = v*(1-s*f); - t = v*(1-s*(1-f)); - if(index == 0){ - r = v; g = t; b = p; - } else if(index == 1){ - r = q; g = v; b = p; - } else if(index == 2){ - r = p; g = v; b = t; - } else if(index == 3){ - r = p; g = q; b = v; - } else if(index == 4){ - r = t; g = p; b = v; - } else { - r = v; g = p; b = q; - } - } - r = (r < 0) ? 0 : ((r > 1) ? 1 : r); - g = (g < 0) ? 0 : ((g > 1) ? 1 : g); - b = (b < 0) ? 0 : ((b > 1) ? 1 : b); - return make_float3(r, g, b); -} - -__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) -{ - int ix = (int) floorf(x); - int iy = (int) floorf(y); - - float dx = x - ix; - float dy = y - iy; - - float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + - dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + - (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + - dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); - return val; -} - -__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) -{ - int size = batch * w * h; - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= size) return; - int x = id % w; - id /= w; - int y = id % h; - id /= h; - float rshift = rand[0]; - float gshift = rand[1]; - float bshift = rand[2]; - float r0 = rand[8*id + 0]; - float r1 = rand[8*id + 1]; - float r2 = rand[8*id + 2]; - float r3 = rand[8*id + 3]; - - saturation = r0*(saturation - 1) + 1; - saturation = (r1 > .5) ? 1./saturation : saturation; - exposure = r2*(exposure - 1) + 1; - exposure = (r3 > .5) ? 1./exposure : exposure; - - size_t offset = id * h * w * 3; - image += offset; - float r = image[x + w*(y + h*0)]; - float g = image[x + w*(y + h*1)]; - float b = image[x + w*(y + h*2)]; - float3 rgb = make_float3(r,g,b); - if(train){ - float3 hsv = rgb_to_hsv_kernel(rgb); - hsv.y *= saturation; - hsv.z *= exposure; - rgb = hsv_to_rgb_kernel(hsv); - } else { - shift = 0; - } - image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift; - image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift; - image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift; -} - -__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= size) return; - - float cx = w/2.; - float cy = h/2.; - - int count = id; - int j = id % crop_width; - id /= crop_width; - int i = id % crop_height; - id /= crop_height; - int k = id % c; - id /= c; - int b = id; - - float r4 = rand[8*b + 4]; - float r5 = rand[8*b + 5]; - float r6 = rand[8*b + 6]; - float r7 = rand[8*b + 7]; - - float dw = (w - crop_width)*r4; - float dh = (h - crop_height)*r5; - flip = (flip && (r6 > .5)); - angle = 2*angle*r7 - angle; - if(!train){ - dw = (w - crop_width)/2.; - dh = (h - crop_height)/2.; - flip = 0; - angle = 0; - } - - input += w*h*c*b; - - float x = (flip) ? w - dw - j - 1 : j + dw; - float y = i + dh; - - float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx; - float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy; - - output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); -} - -extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) -{ - cuda_random(layer.rand_gpu, layer.batch*8); - - float radians = layer.angle*3.14159265/180.; - - float scale = 2; - float translate = -1; - if(layer.noadjust){ - scale = 1; - translate = 0; - } - - int size = layer.batch * layer.w * layer.h; - - levels_image_kernel<<>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift); - CHECK_CUDA(cudaPeekAtLastError()); - - size = layer.batch*layer.c*layer.out_w*layer.out_h; - - forward_crop_layer_kernel<<>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - -/* - cuda_pull_array(layer.output_gpu, layer.output, size); - image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); - image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); - image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); - - translate_image(im, -translate); - scale_image(im, 1/scale); - translate_image(im2, -translate); - scale_image(im2, 1/scale); - translate_image(im3, -translate); - scale_image(im3, 1/scale); - - show_image(im, "cropped"); - show_image(im2, "cropped2"); - show_image(im3, "cropped3"); - cvWaitKey(0); - */ -} diff --git a/src/Detector/darknet/src/dark_cuda.c b/src/Detector/darknet/src/dark_cuda.c deleted file mode 100644 index fec064738..000000000 --- a/src/Detector/darknet/src/dark_cuda.c +++ /dev/null @@ -1,526 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif -int cuda_debug_sync = 0; -int gpu_index = 0; -#ifdef __cplusplus -} -#endif // __cplusplus - -#ifdef GPU - -#include "dark_cuda.h" -#include "utils.h" -#include "blas.h" -#include "assert.h" -#include -#include -#include -#include - -#pragma comment(lib, "cuda.lib") - - -#ifdef CUDNN -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "cudnn.lib") -#endif // USE_CMAKE_LIBS -#endif // CUDNN - -#if defined(CUDNN_HALF) && !defined(CUDNN) -#error "If you set CUDNN_HALF=1 then you must set CUDNN=1" -#endif - - -void cuda_set_device(int n) -{ - gpu_index = n; - cudaError_t status = cudaSetDevice(n); - if(status != cudaSuccess) CHECK_CUDA(status); -} - -int cuda_get_device() -{ - int n = 0; - cudaError_t status = cudaGetDevice(&n); - CHECK_CUDA(status); - return n; -} - -void *cuda_get_context() -{ - CUcontext pctx; - CUresult status = cuCtxGetCurrent(&pctx); - if(status != CUDA_SUCCESS) fprintf(stderr, " Error: cuCtxGetCurrent() is failed \n"); - return (void *)pctx; -} - -void check_error(cudaError_t status) -{ - cudaError_t status2 = cudaGetLastError(); - if (status != cudaSuccess) - { - const char *s = cudaGetErrorString(status); - char buffer[256]; - printf("\n CUDA Error: %s\n", s); - snprintf(buffer, 256, "CUDA Error: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer); - } - if (status2 != cudaSuccess) - { - const char *s = cudaGetErrorString(status2); - char buffer[256]; - printf("\n CUDA Error Prev: %s\n", s); - snprintf(buffer, 256, "CUDA Error Prev: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer); - } -} - -void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time) -{ - if (status != cudaSuccess) { - printf("CUDA status Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); - check_error(status); - } -#if defined(DEBUG) || defined(CUDA_DEBUG) - cuda_debug_sync = 1; -#endif - if (cuda_debug_sync) { - status = cudaDeviceSynchronize(); - if (status != cudaSuccess) - printf("CUDA status = cudaDeviceSynchronize() Error: file: %s() : line: %d : build time: %s \n", file, line, date_time); - } - check_error(status); -} - -dim3 cuda_gridsize(size_t n){ - size_t k = (n-1) / BLOCK + 1; - size_t x = k; - size_t y = 1; - if(x > 65535){ - x = ceil(sqrt(k)); - y = (n-1)/(x*BLOCK) + 1; - } - //dim3 d = { (unsigned int)x, (unsigned int)y, 1 }; - dim3 d; - d.x = x; - d.y = y; - d.z = 1; - //printf("%ld %ld %ld %ld\n", n, x, y, x*y*BLOCK); - return d; -} - -static cudaStream_t streamsArray[16]; // cudaStreamSynchronize( get_cuda_stream() ); -static int streamInit[16] = { 0 }; - -cudaStream_t get_cuda_stream() { - int i = cuda_get_device(); - if (!streamInit[i]) { - //printf("Create CUDA-stream \n"); - cudaError_t status = cudaStreamCreate(&streamsArray[i]); - //cudaError_t status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamNonBlocking); - if (status != cudaSuccess) { - printf(" cudaStreamCreate error: %d \n", status); - const char *s = cudaGetErrorString(status); - printf("CUDA Error: %s\n", s); - status = cudaStreamCreateWithFlags(&streamsArray[i], cudaStreamDefault); - CHECK_CUDA(status); - } - streamInit[i] = 1; - } - return streamsArray[i]; -} - -static cudaStream_t streamsArray2[16]; // cudaStreamSynchronize( get_cuda_memcpy_stream() ); -static int streamInit2[16] = { 0 }; - -cudaStream_t get_cuda_memcpy_stream() { - int i = cuda_get_device(); - if (!streamInit2[i]) { - cudaError_t status = cudaStreamCreate(&streamsArray2[i]); - //cudaError_t status = cudaStreamCreateWithFlags(&streamsArray2[i], cudaStreamNonBlocking); - if (status != cudaSuccess) { - printf(" cudaStreamCreate-Memcpy error: %d \n", status); - const char *s = cudaGetErrorString(status); - printf("CUDA Error: %s\n", s); - status = cudaStreamCreateWithFlags(&streamsArray2[i], cudaStreamDefault); - CHECK_CUDA(status); - } - streamInit2[i] = 1; - } - return streamsArray2[i]; -} - - -#ifdef CUDNN -cudnnHandle_t cudnn_handle() -{ - static int init[16] = {0}; - static cudnnHandle_t handle[16]; - int i = cuda_get_device(); - if(!init[i]) { - cudnnCreate(&handle[i]); - init[i] = 1; - cudnnStatus_t status = cudnnSetStream(handle[i], get_cuda_stream()); - CHECK_CUDNN(status); - } - return handle[i]; -} - - -void cudnn_check_error(cudnnStatus_t status) -{ -#if defined(DEBUG) || defined(CUDA_DEBUG) - cudaDeviceSynchronize(); -#endif - if (cuda_debug_sync) { - cudaDeviceSynchronize(); - } - cudnnStatus_t status2 = CUDNN_STATUS_SUCCESS; -#ifdef CUDNN_ERRQUERY_RAWCODE - cudnnStatus_t status_tmp = cudnnQueryRuntimeError(cudnn_handle(), &status2, CUDNN_ERRQUERY_RAWCODE, NULL); -#endif - if (status != CUDNN_STATUS_SUCCESS) - { - const char *s = cudnnGetErrorString(status); - char buffer[256]; - printf("\n cuDNN Error: %s\n", s); - snprintf(buffer, 256, "cuDNN Error: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer); - } - if (status2 != CUDNN_STATUS_SUCCESS) - { - const char *s = cudnnGetErrorString(status2); - char buffer[256]; - printf("\n cuDNN Error Prev: %s\n", s); - snprintf(buffer, 256, "cuDNN Error Prev: %s", s); -#ifdef WIN32 - getchar(); -#endif - error(buffer); - } -} - -void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time) -{ - if (status != CUDNN_STATUS_SUCCESS) { - printf("\n cuDNN status Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); - cudnn_check_error(status); - } -#if defined(DEBUG) || defined(CUDA_DEBUG) - cuda_debug_sync = 1; -#endif - if (cuda_debug_sync) { - cudaError_t status = cudaDeviceSynchronize(); - if (status != CUDNN_STATUS_SUCCESS) - printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s() : line: %d : build time: %s \n", file, line, date_time); - } - cudnn_check_error(status); -} -#endif - -cublasHandle_t blas_handle() -{ - static int init[16] = {0}; - static cublasHandle_t handle[16]; - int i = cuda_get_device(); - if(!init[i]) { - cublasCreate(&handle[i]); - cublasStatus_t status = cublasSetStream(handle[i], get_cuda_stream()); - CHECK_CUDA((cudaError_t)status); - init[i] = 1; - } - return handle[i]; -} - -static float **pinned_ptr = NULL; -static size_t pinned_num_of_blocks = 0; -static size_t pinned_index = 0; -static size_t pinned_block_id = 0; -static const size_t pinned_block_size = (size_t)1024 * 1024 * 1024 * 1; // 1 GB block size -static pthread_mutex_t mutex_pinned = PTHREAD_MUTEX_INITIALIZER; - -// free CPU-pinned memory -void free_pinned_memory() -{ - if (pinned_ptr) { - int k; - for (k = 0; k < pinned_num_of_blocks; ++k) { - cuda_free_host(pinned_ptr[k]); - } - free(pinned_ptr); - pinned_ptr = NULL; - } -} - -// custom CPU-pinned memory allocation -void pre_allocate_pinned_memory(const size_t size) -{ - const size_t num_of_blocks = size / pinned_block_size + ((size % pinned_block_size) ? 1 : 0); - printf("pre_allocate... pinned_ptr = %p \n", pinned_ptr); - - pthread_mutex_lock(&mutex_pinned); - if (!pinned_ptr) { - pinned_ptr = (float **)calloc(num_of_blocks, sizeof(float *)); - if(!pinned_ptr) error("calloc failed in pre_allocate() \n"); - - printf("pre_allocate: size = %Iu MB, num_of_blocks = %Iu, block_size = %Iu MB \n", - size / (1024*1024), num_of_blocks, pinned_block_size / (1024 * 1024)); - - int k; - for (k = 0; k < num_of_blocks; ++k) { - cudaError_t status = cudaHostAlloc((void **)&pinned_ptr[k], pinned_block_size, cudaHostRegisterMapped); - if (status != cudaSuccess) fprintf(stderr, " Can't pre-allocate CUDA-pinned buffer on CPU-RAM \n"); - CHECK_CUDA(status); - if (!pinned_ptr[k]) error("cudaHostAlloc failed\n"); - else { - printf(" Allocated %d pinned block \n", pinned_block_size); - } - } - pinned_num_of_blocks = num_of_blocks; - } - pthread_mutex_unlock(&mutex_pinned); -} - -// simple - get pre-allocated pinned memory -float *cuda_make_array_pinned_preallocated(float *x, size_t n) -{ - pthread_mutex_lock(&mutex_pinned); - float *x_cpu = NULL; - const size_t memory_step = 512;// 4096; - const size_t size = sizeof(float)*n; - const size_t allocation_size = ((size / memory_step) + 1) * memory_step; - - if (pinned_ptr && pinned_block_id < pinned_num_of_blocks && (allocation_size < pinned_block_size/2)) - { - if ((allocation_size + pinned_index) > pinned_block_size) { - const float filled = (float)100 * pinned_index / pinned_block_size; - printf("\n Pinned block_id = %d, filled = %f %% \n", pinned_block_id, filled); - pinned_block_id++; - pinned_index = 0; - } - if ((allocation_size + pinned_index) < pinned_block_size && pinned_block_id < pinned_num_of_blocks) { - x_cpu = (float *)((char *)pinned_ptr[pinned_block_id] + pinned_index); - pinned_index += allocation_size; - } - else { - //printf("Pre-allocated pinned memory is over! \n"); - } - } - - if(!x_cpu) { - if (allocation_size > pinned_block_size / 2) { - printf("Try to allocate new pinned memory, size = %d MB \n", size / (1024 * 1024)); - cudaError_t status = cudaHostAlloc((void **)&x_cpu, size, cudaHostRegisterMapped); - if (status != cudaSuccess) fprintf(stderr, " Can't allocate CUDA-pinned memory on CPU-RAM (pre-allocated memory is over too) \n"); - CHECK_CUDA(status); - } - else { - printf("Try to allocate new pinned BLOCK, size = %d MB \n", size / (1024 * 1024)); - pinned_num_of_blocks++; - pinned_block_id = pinned_num_of_blocks - 1; - pinned_index = 0; - pinned_ptr = (float **)realloc(pinned_ptr, pinned_num_of_blocks * sizeof(float *)); - cudaError_t status = cudaHostAlloc((void **)&pinned_ptr[pinned_block_id], pinned_block_size, cudaHostRegisterMapped); - if (status != cudaSuccess) fprintf(stderr, " Can't pre-allocate CUDA-pinned buffer on CPU-RAM \n"); - CHECK_CUDA(status); - x_cpu = pinned_ptr[pinned_block_id]; - } - } - - if (x) { - cudaError_t status = cudaMemcpyAsync(x_cpu, x, size, cudaMemcpyDefault, get_cuda_stream()); - CHECK_CUDA(status); - } - - pthread_mutex_unlock(&mutex_pinned); - return x_cpu; -} - -float *cuda_make_array_pinned(float *x, size_t n) -{ - float *x_gpu; - size_t size = sizeof(float)*n; - //cudaError_t status = cudaMalloc((void **)&x_gpu, size); - cudaError_t status = cudaHostAlloc((void **)&x_gpu, size, cudaHostRegisterMapped); - if (status != cudaSuccess) fprintf(stderr, " Can't allocate CUDA-pinned memory on CPU-RAM \n"); - CHECK_CUDA(status); - if (x) { - status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyDefault, get_cuda_stream()); - CHECK_CUDA(status); - } - if (!x_gpu) error("cudaHostAlloc failed\n"); - return x_gpu; -} - -float *cuda_make_array(float *x, size_t n) -{ - float *x_gpu; - size_t size = sizeof(float)*n; - cudaError_t status = cudaMalloc((void **)&x_gpu, size); - //cudaError_t status = cudaMallocManaged((void **)&x_gpu, size, cudaMemAttachGlobal); - //status = cudaMemAdvise(x_gpu, size, cudaMemAdviseSetPreferredLocation, cudaCpuDeviceId); - if (status != cudaSuccess) fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n"); - CHECK_CUDA(status); - if(x){ - //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); - status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyDefault, get_cuda_stream()); - CHECK_CUDA(status); - } - if(!x_gpu) error("Cuda malloc failed\n"); - return x_gpu; -} - -void **cuda_make_array_pointers(void **x, size_t n) -{ - void **x_gpu; - size_t size = sizeof(void*) * n; - cudaError_t status = cudaMalloc((void **)&x_gpu, size); - if (status != cudaSuccess) fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n"); - CHECK_CUDA(status); - if (x) { - status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyDefault, get_cuda_stream()); - CHECK_CUDA(status); - } - if (!x_gpu) error("Cuda malloc failed\n"); - return x_gpu; -} - -void cuda_random(float *x_gpu, size_t n) -{ - static curandGenerator_t gen[16]; - static int init[16] = {0}; - int i = cuda_get_device(); - if(!init[i]){ - curandCreateGenerator(&gen[i], CURAND_RNG_PSEUDO_DEFAULT); - curandSetPseudoRandomGeneratorSeed(gen[i], time(0)); - init[i] = 1; - } - curandGenerateUniform(gen[i], x_gpu, n); - CHECK_CUDA(cudaPeekAtLastError()); -} - -float cuda_compare(float *x_gpu, float *x, size_t n, char *s) -{ - float* tmp = (float*)xcalloc(n, sizeof(float)); - cuda_pull_array(x_gpu, tmp, n); - //int i; - //for(i = 0; i < n; ++i) printf("%f %f\n", tmp[i], x[i]); - axpy_cpu(n, -1, x, 1, tmp, 1); - float err = dot_cpu(n, tmp, 1, tmp, 1); - printf("Error %s: %f\n", s, sqrt(err/n)); - free(tmp); - return err; -} - -int *cuda_make_int_array(size_t n) -{ - int *x_gpu; - size_t size = sizeof(int)*n; - cudaError_t status = cudaMalloc((void **)&x_gpu, size); - if(status != cudaSuccess) fprintf(stderr, " Try to set subdivisions=64 in your cfg-file. \n"); - CHECK_CUDA(status); - return x_gpu; -} - -int *cuda_make_int_array_new_api(int *x, size_t n) -{ - int *x_gpu; - size_t size = sizeof(int)*n; - cudaError_t status = cudaMalloc((void **)&x_gpu, size); - CHECK_CUDA(status); - if (x) { - //status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); - cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); - CHECK_CUDA(status); - } - if (!x_gpu) error("Cuda malloc failed\n"); - return x_gpu; -} - -void cuda_free(float *x_gpu) -{ - //cudaStreamSynchronize(get_cuda_stream()); - cudaError_t status = cudaFree(x_gpu); - CHECK_CUDA(status); -} - -void cuda_free_host(float *x_cpu) -{ - //cudaStreamSynchronize(get_cuda_stream()); - cudaError_t status = cudaFreeHost(x_cpu); - CHECK_CUDA(status); -} - -void cuda_push_array(float *x_gpu, float *x, size_t n) -{ - size_t size = sizeof(float)*n; - //cudaError_t status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); - cudaError_t status = cudaMemcpyAsync(x_gpu, x, size, cudaMemcpyHostToDevice, get_cuda_stream()); - CHECK_CUDA(status); -} - -void cuda_pull_array(float *x_gpu, float *x, size_t n) -{ - size_t size = sizeof(float)*n; - //cudaError_t status = cudaMemcpy(x, x_gpu, size, cudaMemcpyDeviceToHost); - cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDeviceToHost, get_cuda_stream()); - CHECK_CUDA(status); - cudaStreamSynchronize(get_cuda_stream()); -} - -void cuda_pull_array_async(float *x_gpu, float *x, size_t n) -{ - size_t size = sizeof(float)*n; - cudaError_t status = cudaMemcpyAsync(x, x_gpu, size, cudaMemcpyDefault, get_cuda_stream()); - check_error(status); - //cudaStreamSynchronize(get_cuda_stream()); -} - -int get_number_of_blocks(int array_size, int block_size) -{ - return array_size / block_size + ((array_size % block_size > 0) ? 1 : 0); -} - -int get_gpu_compute_capability(int i, char *device_name) -{ - typedef struct cudaDeviceProp cudaDeviceProp; - cudaDeviceProp prop; - cudaError_t status = cudaGetDeviceProperties(&prop, i); - CHECK_CUDA(status); - if (device_name) strcpy(device_name, prop.name); - int cc = prop.major * 100 + prop.minor * 10; // __CUDA_ARCH__ format - return cc; -} - -void show_cuda_cudnn_info() -{ - int cuda_version = 0, cuda_driver_version = 0, device_count = 0; - CHECK_CUDA(cudaRuntimeGetVersion(&cuda_version)); - CHECK_CUDA(cudaDriverGetVersion(&cuda_driver_version)); - fprintf(stderr, " CUDA-version: %d (%d)", cuda_version, cuda_driver_version); - if(cuda_version > cuda_driver_version) fprintf(stderr, "\n Warning: CUDA-version is higher than Driver-version! \n"); -#ifdef CUDNN - fprintf(stderr, ", cuDNN: %d.%d.%d", CUDNN_MAJOR, CUDNN_MINOR, CUDNN_PATCHLEVEL); -#endif // CUDNN -#ifdef CUDNN_HALF - fprintf(stderr, ", CUDNN_HALF=1"); -#endif // CUDNN_HALF - CHECK_CUDA(cudaGetDeviceCount(&device_count)); - fprintf(stderr, ", GPU count: %d ", device_count); - fprintf(stderr, " \n"); -} - -#else // GPU -#include "darknet.h" -void cuda_set_device(int n) {} -#endif // GPU diff --git a/src/Detector/darknet/src/dark_cuda.h b/src/Detector/darknet/src/dark_cuda.h deleted file mode 100644 index 0e5f39f9a..000000000 --- a/src/Detector/darknet/src/dark_cuda.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef DARKCUDA_H -#define DARKCUDA_H -#include "darknet.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -extern int cuda_debug_sync; -extern int gpu_index; -#ifdef __cplusplus -} -#endif // __cplusplus - -#ifdef GPU - -#define BLOCK 512 -#define FULL_MASK 0xffffffff -#define WARP_SIZE 32 -#define BLOCK_TRANSPOSE32 256 - -#include -#include -#include -#include -#include -//#include - -#ifdef CUDNN -#include -#endif // CUDNN - -#ifndef __DATE__ -#define __DATE__ -#endif - -#ifndef __TIME__ -#define __TIME__ -#endif - -#ifndef __FUNCTION__ -#define __FUNCTION__ -#endif - -#ifndef __LINE__ -#define __LINE__ 0 -#endif - -#ifndef __FILE__ -#define __FILE__ -#endif - -#ifdef __cplusplus -extern "C" { -#endif // __cplusplus - void check_error(cudaError_t status); - void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); -#define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); - - cublasHandle_t blas_handle(); - void free_pinned_memory(); - void pre_allocate_pinned_memory(size_t size); - float *cuda_make_array_pinned_preallocated(float *x, size_t n); - float *cuda_make_array_pinned(float *x, size_t n); - float *cuda_make_array(float *x, size_t n); - void **cuda_make_array_pointers(void **x, size_t n); - int *cuda_make_int_array(size_t n); - int *cuda_make_int_array_new_api(int *x, size_t n); - void cuda_push_array(float *x_gpu, float *x, size_t n); - //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); - //LIB_API void cuda_set_device(int n); - int cuda_get_device(); - void cuda_free_host(float *x_cpu); - void cuda_free(float *x_gpu); - void cuda_random(float *x_gpu, size_t n); - float cuda_compare(float *x_gpu, float *x, size_t n, char *s); - dim3 cuda_gridsize(size_t n); - cudaStream_t get_cuda_stream(); - cudaStream_t get_cuda_memcpy_stream(); - int get_number_of_blocks(int array_size, int block_size); - int get_gpu_compute_capability(int i, char *device_name); - void show_cuda_cudnn_info(); - -#ifdef CUDNN -cudnnHandle_t cudnn_handle(); -enum {cudnn_fastest, cudnn_smallest, cudnn_specify}; - -void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); -#define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); -#endif - -#ifdef __cplusplus -} -#endif // __cplusplus - -#else // GPU -//LIB_API void cuda_set_device(int n); -#endif // GPU -#endif // DARKCUDA_H diff --git a/src/Detector/darknet/src/darknet.c b/src/Detector/darknet/src/darknet.c deleted file mode 100644 index 13ab75f3d..000000000 --- a/src/Detector/darknet/src/darknet.c +++ /dev/null @@ -1,559 +0,0 @@ -#include "darknet.h" -#include -#include -#include -#if defined(_MSC_VER) && defined(_DEBUG) -#include -#endif - -#include "parser.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "connected_layer.h" - - -extern void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int top); -extern void run_voxel(int argc, char **argv); -extern void run_yolo(int argc, char **argv); -extern void run_detector(int argc, char **argv); -extern void run_coco(int argc, char **argv); -extern void run_writing(int argc, char **argv); -extern void run_captcha(int argc, char **argv); -extern void run_nightmare(int argc, char **argv); -extern void run_dice(int argc, char **argv); -extern void run_compare(int argc, char **argv); -extern void run_classifier(int argc, char **argv); -extern void run_char_rnn(int argc, char **argv); -extern void run_vid_rnn(int argc, char **argv); -extern void run_tag(int argc, char **argv); -extern void run_cifar(int argc, char **argv); -extern void run_go(int argc, char **argv); -extern void run_art(int argc, char **argv); -extern void run_super(int argc, char **argv); - -void average(int argc, char *argv[]) -{ - char *cfgfile = argv[2]; - char *outfile = argv[3]; - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - network sum = parse_network_cfg(cfgfile); - - char *weightfile = argv[4]; - load_weights(&sum, weightfile); - - int i, j; - int n = argc - 5; - for(i = 0; i < n; ++i){ - weightfile = argv[i+5]; - load_weights(&net, weightfile); - for(j = 0; j < net.n; ++j){ - layer l = net.layers[j]; - layer out = sum.layers[j]; - if(l.type == CONVOLUTIONAL){ - int num = l.n*l.c*l.size*l.size; - axpy_cpu(l.n, 1, l.biases, 1, out.biases, 1); - axpy_cpu(num, 1, l.weights, 1, out.weights, 1); - if(l.batch_normalize){ - axpy_cpu(l.n, 1, l.scales, 1, out.scales, 1); - axpy_cpu(l.n, 1, l.rolling_mean, 1, out.rolling_mean, 1); - axpy_cpu(l.n, 1, l.rolling_variance, 1, out.rolling_variance, 1); - } - } - if(l.type == CONNECTED){ - axpy_cpu(l.outputs, 1, l.biases, 1, out.biases, 1); - axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, out.weights, 1); - } - } - } - n = n+1; - for(j = 0; j < net.n; ++j){ - layer l = sum.layers[j]; - if(l.type == CONVOLUTIONAL){ - int num = l.n*l.c*l.size*l.size; - scal_cpu(l.n, 1./n, l.biases, 1); - scal_cpu(num, 1./n, l.weights, 1); - if(l.batch_normalize){ - scal_cpu(l.n, 1./n, l.scales, 1); - scal_cpu(l.n, 1./n, l.rolling_mean, 1); - scal_cpu(l.n, 1./n, l.rolling_variance, 1); - } - } - if(l.type == CONNECTED){ - scal_cpu(l.outputs, 1./n, l.biases, 1); - scal_cpu(l.outputs*l.inputs, 1./n, l.weights, 1); - } - } - save_weights(sum, outfile); -} - -void speed(char *cfgfile, int tics) -{ - if (tics == 0) tics = 1000; - network net = parse_network_cfg(cfgfile); - set_batch_network(&net, 1); - int i; - time_t start = time(0); - image im = make_image(net.w, net.h, net.c); - for(i = 0; i < tics; ++i){ - network_predict(net, im.data); - } - double t = difftime(time(0), start); - printf("\n%d evals, %f Seconds\n", tics, t); - printf("Speed: %f sec/eval\n", t/tics); - printf("Speed: %f Hz\n", tics/t); -} - -void operations(char *cfgfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - int i; - long ops = 0; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w; - } else if(l.type == CONNECTED){ - ops += 2l * l.inputs * l.outputs; - } else if (l.type == RNN){ - ops += 2l * l.input_layer->inputs * l.input_layer->outputs; - ops += 2l * l.self_layer->inputs * l.self_layer->outputs; - ops += 2l * l.output_layer->inputs * l.output_layer->outputs; - } else if (l.type == GRU){ - ops += 2l * l.uz->inputs * l.uz->outputs; - ops += 2l * l.uh->inputs * l.uh->outputs; - ops += 2l * l.ur->inputs * l.ur->outputs; - ops += 2l * l.wz->inputs * l.wz->outputs; - ops += 2l * l.wh->inputs * l.wh->outputs; - ops += 2l * l.wr->inputs * l.wr->outputs; - } else if (l.type == LSTM){ - ops += 2l * l.uf->inputs * l.uf->outputs; - ops += 2l * l.ui->inputs * l.ui->outputs; - ops += 2l * l.ug->inputs * l.ug->outputs; - ops += 2l * l.uo->inputs * l.uo->outputs; - ops += 2l * l.wf->inputs * l.wf->outputs; - ops += 2l * l.wi->inputs * l.wi->outputs; - ops += 2l * l.wg->inputs * l.wg->outputs; - ops += 2l * l.wo->inputs * l.wo->outputs; - } - } - printf("Floating Point Operations: %ld\n", ops); - printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.); -} - -void oneoff(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - int oldn = net.layers[net.n - 2].n; - int c = net.layers[net.n - 2].c; - net.layers[net.n - 2].n = 9372; - net.layers[net.n - 2].biases += 5; - net.layers[net.n - 2].weights += 5*c; - if(weightfile){ - load_weights(&net, weightfile); - } - net.layers[net.n - 2].biases -= 5; - net.layers[net.n - 2].weights -= 5*c; - net.layers[net.n - 2].n = oldn; - printf("%d\n", oldn); - layer l = net.layers[net.n - 2]; - copy_cpu(l.n/3, l.biases, 1, l.biases + l.n/3, 1); - copy_cpu(l.n/3, l.biases, 1, l.biases + 2*l.n/3, 1); - copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + l.n/3*l.c, 1); - copy_cpu(l.n/3*l.c, l.weights, 1, l.weights + 2*l.n/3*l.c, 1); - *net.seen = 0; - *net.cur_iteration = 0; - save_weights(net, outfile); -} - -void partial(char *cfgfile, char *weightfile, char *outfile, int max) -{ - gpu_index = -1; - network net = parse_network_cfg_custom(cfgfile, 1, 1); - if(weightfile){ - load_weights_upto(&net, weightfile, max); - } - *net.seen = 0; - *net.cur_iteration = 0; - save_weights_upto(net, outfile, max, 0); -} - -#include "convolutional_layer.h" -void rescale_net(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int i; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - rescale_weights(l, 2, -.5); - break; - } - } - save_weights(net, outfile); -} - -void rgbgr_net(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int i; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - rgbgr_weights(l); - break; - } - } - save_weights(net, outfile); -} - -void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if (weightfile) { - load_weights(&net, weightfile); - } - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL && l.batch_normalize) { - denormalize_convolutional_layer(l); - } - if (l.type == CONNECTED && l.batch_normalize) { - denormalize_connected_layer(l); - } - if (l.type == GRU && l.batch_normalize) { - denormalize_connected_layer(*l.input_z_layer); - denormalize_connected_layer(*l.input_r_layer); - denormalize_connected_layer(*l.input_h_layer); - denormalize_connected_layer(*l.state_z_layer); - denormalize_connected_layer(*l.state_r_layer); - denormalize_connected_layer(*l.state_h_layer); - } - if (l.type == LSTM && l.batch_normalize) { - denormalize_connected_layer(*l.wf); - denormalize_connected_layer(*l.wi); - denormalize_connected_layer(*l.wg); - denormalize_connected_layer(*l.wo); - denormalize_connected_layer(*l.uf); - denormalize_connected_layer(*l.ui); - denormalize_connected_layer(*l.ug); - denormalize_connected_layer(*l.uo); - } - } - save_weights(net, outfile); -} - -layer normalize_layer(layer l, int n) -{ - int j; - l.batch_normalize=1; - l.scales = (float*)xcalloc(n, sizeof(float)); - for(j = 0; j < n; ++j){ - l.scales[j] = 1; - } - l.rolling_mean = (float*)xcalloc(n, sizeof(float)); - l.rolling_variance = (float*)xcalloc(n, sizeof(float)); - return l; -} - -void normalize_net(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int i; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL && !l.batch_normalize){ - net.layers[i] = normalize_layer(l, l.n); - } - if (l.type == CONNECTED && !l.batch_normalize) { - net.layers[i] = normalize_layer(l, l.outputs); - } - if (l.type == GRU && l.batch_normalize) { - *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs); - *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs); - *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs); - *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs); - *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs); - *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs); - net.layers[i].batch_normalize=1; - } - if (l.type == LSTM && l.batch_normalize) { - *l.wf = normalize_layer(*l.wf, l.wf->outputs); - *l.wi = normalize_layer(*l.wi, l.wi->outputs); - *l.wg = normalize_layer(*l.wg, l.wg->outputs); - *l.wo = normalize_layer(*l.wo, l.wo->outputs); - *l.uf = normalize_layer(*l.uf, l.uf->outputs); - *l.ui = normalize_layer(*l.ui, l.ui->outputs); - *l.ug = normalize_layer(*l.ug, l.ug->outputs); - *l.uo = normalize_layer(*l.uo, l.uo->outputs); - net.layers[i].batch_normalize=1; - } - } - save_weights(net, outfile); -} - -void statistics_net(char *cfgfile, char *weightfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if (weightfile) { - load_weights(&net, weightfile); - } - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONNECTED && l.batch_normalize) { - printf("Connected Layer %d\n", i); - statistics_connected_layer(l); - } - if (l.type == GRU && l.batch_normalize) { - printf("GRU Layer %d\n", i); - printf("Input Z\n"); - statistics_connected_layer(*l.input_z_layer); - printf("Input R\n"); - statistics_connected_layer(*l.input_r_layer); - printf("Input H\n"); - statistics_connected_layer(*l.input_h_layer); - printf("State Z\n"); - statistics_connected_layer(*l.state_z_layer); - printf("State R\n"); - statistics_connected_layer(*l.state_r_layer); - printf("State H\n"); - statistics_connected_layer(*l.state_h_layer); - } - if (l.type == LSTM && l.batch_normalize) { - printf("LSTM Layer %d\n", i); - printf("wf\n"); - statistics_connected_layer(*l.wf); - printf("wi\n"); - statistics_connected_layer(*l.wi); - printf("wg\n"); - statistics_connected_layer(*l.wg); - printf("wo\n"); - statistics_connected_layer(*l.wo); - printf("uf\n"); - statistics_connected_layer(*l.uf); - printf("ui\n"); - statistics_connected_layer(*l.ui); - printf("ug\n"); - statistics_connected_layer(*l.ug); - printf("uo\n"); - statistics_connected_layer(*l.uo); - } - printf("\n"); - } -} - -void denormalize_net(char *cfgfile, char *weightfile, char *outfile) -{ - gpu_index = -1; - network net = parse_network_cfg(cfgfile); - if (weightfile) { - load_weights(&net, weightfile); - } - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL && l.batch_normalize) { - denormalize_convolutional_layer(l); - net.layers[i].batch_normalize=0; - } - if (l.type == CONNECTED && l.batch_normalize) { - denormalize_connected_layer(l); - net.layers[i].batch_normalize=0; - } - if (l.type == GRU && l.batch_normalize) { - denormalize_connected_layer(*l.input_z_layer); - denormalize_connected_layer(*l.input_r_layer); - denormalize_connected_layer(*l.input_h_layer); - denormalize_connected_layer(*l.state_z_layer); - denormalize_connected_layer(*l.state_r_layer); - denormalize_connected_layer(*l.state_h_layer); - l.input_z_layer->batch_normalize = 0; - l.input_r_layer->batch_normalize = 0; - l.input_h_layer->batch_normalize = 0; - l.state_z_layer->batch_normalize = 0; - l.state_r_layer->batch_normalize = 0; - l.state_h_layer->batch_normalize = 0; - net.layers[i].batch_normalize=0; - } - if (l.type == GRU && l.batch_normalize) { - denormalize_connected_layer(*l.wf); - denormalize_connected_layer(*l.wi); - denormalize_connected_layer(*l.wg); - denormalize_connected_layer(*l.wo); - denormalize_connected_layer(*l.uf); - denormalize_connected_layer(*l.ui); - denormalize_connected_layer(*l.ug); - denormalize_connected_layer(*l.uo); - l.wf->batch_normalize = 0; - l.wi->batch_normalize = 0; - l.wg->batch_normalize = 0; - l.wo->batch_normalize = 0; - l.uf->batch_normalize = 0; - l.ui->batch_normalize = 0; - l.ug->batch_normalize = 0; - l.uo->batch_normalize = 0; - net.layers[i].batch_normalize=0; - } - } - save_weights(net, outfile); -} - -void visualize(char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - visualize_network(net); -#ifdef OPENCV - wait_until_press_key_cv(); -#endif -} - -int main(int argc, char **argv) -{ -#ifdef _DEBUG - _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF); - printf(" _DEBUG is used \n"); -#endif - -#ifdef DEBUG - printf(" DEBUG=1 \n"); -#endif - - int i; - for (i = 0; i < argc; ++i) { - if (!argv[i]) continue; - strip_args(argv[i]); - } - - //test_resize("data/bad.jpg"); - //test_box(); - //test_convolutional_layer(); - if(argc < 2){ - fprintf(stderr, "usage: %s \n", argv[0]); - return 0; - } - gpu_index = find_int_arg(argc, argv, "-i", 0); - if(find_arg(argc, argv, "-nogpu")) { - gpu_index = -1; - printf("\n Currently Darknet doesn't support -nogpu flag. If you want to use CPU - please compile Darknet with GPU=0 in the Makefile, or compile darknet_no_gpu.sln on Windows.\n"); - exit(-1); - } - -#ifndef GPU - gpu_index = -1; - printf(" GPU isn't used \n"); - init_cpu(); -#else // GPU - if(gpu_index >= 0){ - cuda_set_device(gpu_index); - CHECK_CUDA(cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync)); - } - - show_cuda_cudnn_info(); - cuda_debug_sync = find_arg(argc, argv, "-cuda_debug_sync"); - -#ifdef CUDNN_HALF - printf(" CUDNN_HALF=1 \n"); -#endif // CUDNN_HALF - -#endif // GPU - - show_opencv_info(); - - if (0 == strcmp(argv[1], "average")){ - average(argc, argv); - } else if (0 == strcmp(argv[1], "yolo")){ - run_yolo(argc, argv); - } else if (0 == strcmp(argv[1], "voxel")){ - run_voxel(argc, argv); - } else if (0 == strcmp(argv[1], "super")){ - run_super(argc, argv); - } else if (0 == strcmp(argv[1], "detector")){ - run_detector(argc, argv); - } else if (0 == strcmp(argv[1], "detect")){ - float thresh = find_float_arg(argc, argv, "-thresh", .24); - int ext_output = find_arg(argc, argv, "-ext_output"); - char *filename = (argc > 4) ? argv[4]: 0; - test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0); - } else if (0 == strcmp(argv[1], "cifar")){ - run_cifar(argc, argv); - } else if (0 == strcmp(argv[1], "go")){ - run_go(argc, argv); - } else if (0 == strcmp(argv[1], "rnn")){ - run_char_rnn(argc, argv); - } else if (0 == strcmp(argv[1], "vid")){ - run_vid_rnn(argc, argv); - } else if (0 == strcmp(argv[1], "coco")){ - run_coco(argc, argv); - } else if (0 == strcmp(argv[1], "classify")){ - predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5); - } else if (0 == strcmp(argv[1], "classifier")){ - run_classifier(argc, argv); - } else if (0 == strcmp(argv[1], "art")){ - run_art(argc, argv); - } else if (0 == strcmp(argv[1], "tag")){ - run_tag(argc, argv); - } else if (0 == strcmp(argv[1], "compare")){ - run_compare(argc, argv); - } else if (0 == strcmp(argv[1], "dice")){ - run_dice(argc, argv); - } else if (0 == strcmp(argv[1], "writing")){ - run_writing(argc, argv); - } else if (0 == strcmp(argv[1], "3d")){ - composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0); - } else if (0 == strcmp(argv[1], "test")){ - test_resize(argv[2]); - } else if (0 == strcmp(argv[1], "captcha")){ - run_captcha(argc, argv); - } else if (0 == strcmp(argv[1], "nightmare")){ - run_nightmare(argc, argv); - } else if (0 == strcmp(argv[1], "rgbgr")){ - rgbgr_net(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "reset")){ - reset_normalize_net(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "denormalize")){ - denormalize_net(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "statistics")){ - statistics_net(argv[2], argv[3]); - } else if (0 == strcmp(argv[1], "normalize")){ - normalize_net(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "rescale")){ - rescale_net(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "ops")){ - operations(argv[2]); - } else if (0 == strcmp(argv[1], "speed")){ - speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0); - } else if (0 == strcmp(argv[1], "oneoff")){ - oneoff(argv[2], argv[3], argv[4]); - } else if (0 == strcmp(argv[1], "partial")){ - partial(argv[2], argv[3], argv[4], atoi(argv[5])); - } else if (0 == strcmp(argv[1], "visualize")){ - visualize(argv[2], (argc > 3) ? argv[3] : 0); - } else if (0 == strcmp(argv[1], "imtest")){ - test_resize(argv[2]); - } else { - fprintf(stderr, "Not an option: %s\n", argv[1]); - } - return 0; -} diff --git a/src/Detector/darknet/src/darkunistd.h b/src/Detector/darknet/src/darkunistd.h deleted file mode 100644 index 6c06aef1c..000000000 --- a/src/Detector/darknet/src/darkunistd.h +++ /dev/null @@ -1,56 +0,0 @@ -#ifdef _WIN32 -#ifndef _UNISTD_H -#define _UNISTD_H 1 - -/* This file intended to serve as a drop-in replacement for -* unistd.h on Windows -* Please add functionality as needed -*/ - -#include -#include /* for _getcwd() and _chdir() */ -#include "getopt.h" -#include -#include /* for getpid() and the exec..() family */ -#include - -#define srandom srand -#define random rand - -/* Values for the second argument to access. -These may be OR'd together. */ -#define R_OK 4 /* Test for read permission. */ -#define W_OK 2 /* Test for write permission. */ -#define X_OK R_OK /* execute permission - unsupported in Windows, */ -#define F_OK 0 /* Test for existence. */ - -#define access _access -#define dup2 _dup2 -#define execve _execve -#define ftruncate _chsize -#define unlink _unlink -#define fileno _fileno -#define getcwd _getcwd -#define chdir _chdir -#define isatty _isatty -#define lseek _lseek -/* read, write, and close are NOT being #defined here, because while there are file handle specific versions for Windows, they probably don't work for sockets. You need to look at your app and consider whether to call e.g. closesocket(). */ - -#define ssize_t int - -#define STDIN_FILENO 0 -#define STDOUT_FILENO 1 -#define STDERR_FILENO 2 -/* should be in some equivalent to */ -//typedef __int8 int8_t; -//typedef __int16 int16_t; -//typedef __int32 int32_t; -//typedef __int64 int64_t; -//typedef unsigned __int8 uint8_t; -//typedef unsigned __int16 uint16_t; -//typedef unsigned __int32 uint32_t; -//typedef unsigned __int64 uint64_t; -#endif /* _UNISTD_H */ -#else -#include -#endif /* _WIN32 */ diff --git a/src/Detector/darknet/src/data.c b/src/Detector/darknet/src/data.c deleted file mode 100644 index d33f21990..000000000 --- a/src/Detector/darknet/src/data.c +++ /dev/null @@ -1,2326 +0,0 @@ -#include "data.h" -#include "utils.h" -#include "image.h" -#include "dark_cuda.h" -#include "box.h" -#include "http_stream.h" - -#include -#include -#include - -extern int check_mistakes; - -#define NUMCHARS 37 - -pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - -list *get_paths(char *filename) -{ - char *path; - FILE *file = fopen(filename, "r"); - if(!file) file_error(filename); - list *lines = make_list(); - while((path=fgetl(file))){ - list_insert(lines, path); - } - fclose(file); - return lines; -} - -/* -char **get_random_paths_indexes(char **paths, int n, int m, int *indexes) -{ - char **random_paths = calloc(n, sizeof(char*)); - int i; - pthread_mutex_lock(&mutex); - for(i = 0; i < n; ++i){ - int index = random_gen()%m; - indexes[i] = index; - random_paths[i] = paths[index]; - if(i == 0) printf("%s\n", paths[index]); - } - pthread_mutex_unlock(&mutex); - return random_paths; -} -*/ - -char **get_sequential_paths(char **paths, int n, int m, int mini_batch, int augment_speed, int contrastive) -{ - int speed = rand_int(1, augment_speed); - if (speed < 1) speed = 1; - char** sequentia_paths = (char**)xcalloc(n, sizeof(char*)); - int i; - pthread_mutex_lock(&mutex); - //printf("n = %d, mini_batch = %d \n", n, mini_batch); - unsigned int *start_time_indexes = (unsigned int *)xcalloc(mini_batch, sizeof(unsigned int)); - for (i = 0; i < mini_batch; ++i) { - if (contrastive && (i % 2) == 1) start_time_indexes[i] = start_time_indexes[i - 1]; - else start_time_indexes[i] = random_gen() % m; - - //printf(" start_time_indexes[i] = %u, ", start_time_indexes[i]); - } - - for (i = 0; i < n; ++i) { - do { - int time_line_index = i % mini_batch; - unsigned int index = start_time_indexes[time_line_index] % m; - start_time_indexes[time_line_index] += speed; - - //int index = random_gen() % m; - sequentia_paths[i] = paths[index]; - //printf(" index = %d, ", index); - //if(i == 0) printf("%s\n", paths[index]); - //printf(" index = %u - grp: %s \n", index, paths[index]); - if (strlen(sequentia_paths[i]) <= 4) printf(" Very small path to the image: %s \n", sequentia_paths[i]); - } while (strlen(sequentia_paths[i]) == 0); - } - free(start_time_indexes); - pthread_mutex_unlock(&mutex); - return sequentia_paths; -} - -char **get_random_paths_custom(char **paths, int n, int m, int contrastive) -{ - char** random_paths = (char**)xcalloc(n, sizeof(char*)); - int i; - pthread_mutex_lock(&mutex); - int old_index = 0; - //printf("n = %d \n", n); - for(i = 0; i < n; ++i){ - do { - int index = random_gen() % m; - if (contrastive && (i % 2 == 1)) index = old_index; - else old_index = index; - random_paths[i] = paths[index]; - //if(i == 0) printf("%s\n", paths[index]); - //printf("grp: %s\n", paths[index]); - if (strlen(random_paths[i]) <= 4) printf(" Very small path to the image: %s \n", random_paths[i]); - } while (strlen(random_paths[i]) == 0); - } - pthread_mutex_unlock(&mutex); - return random_paths; -} - -char **get_random_paths(char **paths, int n, int m) -{ - return get_random_paths_custom(paths, n, m, 0); -} - -char **find_replace_paths(char **paths, int n, char *find, char *replace) -{ - char** replace_paths = (char**)xcalloc(n, sizeof(char*)); - int i; - for(i = 0; i < n; ++i){ - char replaced[4096]; - find_replace(paths[i], find, replace, replaced); - replace_paths[i] = copy_string(replaced); - } - return replace_paths; -} - -matrix load_image_paths_gray(char **paths, int n, int w, int h) -{ - int i; - matrix X; - X.rows = n; - X.vals = (float**)xcalloc(X.rows, sizeof(float*)); - X.cols = 0; - - for(i = 0; i < n; ++i){ - image im = load_image(paths[i], w, h, 3); - - image gray = grayscale_image(im); - free_image(im); - im = gray; - - X.vals[i] = im.data; - X.cols = im.h*im.w*im.c; - } - return X; -} - -matrix load_image_paths(char **paths, int n, int w, int h) -{ - int i; - matrix X; - X.rows = n; - X.vals = (float**)xcalloc(X.rows, sizeof(float*)); - X.cols = 0; - - for(i = 0; i < n; ++i){ - image im = load_image_color(paths[i], w, h); - X.vals[i] = im.data; - X.cols = im.h*im.w*im.c; - } - return X; -} - -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive) -{ - int i; - matrix X; - X.rows = n; - X.vals = (float**)xcalloc(X.rows, sizeof(float*)); - X.cols = 0; - - for(i = 0; i < n; ++i){ - int size = w > h ? w : h; - image im; - const int img_index = (contrastive) ? (i / 2) : i; - if(dontuse_opencv) im = load_image_stb_resize(paths[img_index], 0, 0, 3); - else im = load_image_color(paths[img_index], 0, 0); - - image crop = random_augment_image(im, angle, aspect, min, max, size); - int flip = use_flip ? random_gen() % 2 : 0; - if (flip) - flip_image(crop); - random_distort_image(crop, hue, saturation, exposure); - - image sized = resize_image(crop, w, h); - - //show_image(im, "orig"); - //show_image(sized, "sized"); - //show_image(sized, paths[img_index]); - //wait_until_press_key_cv(); - //printf("w = %d, h = %d \n", sized.w, sized.h); - - free_image(im); - free_image(crop); - X.vals[i] = sized.data; - X.cols = sized.h*sized.w*sized.c; - } - return X; -} - - -box_label *read_boxes(char *filename, int *n) -{ - box_label* boxes = (box_label*)xcalloc(1, sizeof(box_label)); - FILE *file = fopen(filename, "r"); - if (!file) { - printf("Can't open label file. (This can be normal only if you use MSCOCO): %s \n", filename); - //file_error(filename); - FILE* fw = fopen("bad.list", "a"); - fwrite(filename, sizeof(char), strlen(filename), fw); - char *new_line = "\n"; - fwrite(new_line, sizeof(char), strlen(new_line), fw); - fclose(fw); - if (check_mistakes) { - printf("\n Error in read_boxes() \n"); - getchar(); - } - - *n = 0; - return boxes; - } - const int max_obj_img = 4000;// 30000; - const int img_hash = (custom_hash(filename) % max_obj_img)*max_obj_img; - //printf(" img_hash = %d, filename = %s; ", img_hash, filename); - float x, y, h, w; - int id; - int count = 0; - while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ - boxes = (box_label*)xrealloc(boxes, (count + 1) * sizeof(box_label)); - boxes[count].track_id = count + img_hash; - //printf(" boxes[count].track_id = %d, count = %d \n", boxes[count].track_id, count); - boxes[count].id = id; - boxes[count].x = x; - boxes[count].y = y; - boxes[count].h = h; - boxes[count].w = w; - boxes[count].left = x - w/2; - boxes[count].right = x + w/2; - boxes[count].top = y - h/2; - boxes[count].bottom = y + h/2; - ++count; - } - fclose(file); - *n = count; - return boxes; -} - -void randomize_boxes(box_label *b, int n) -{ - int i; - for(i = 0; i < n; ++i){ - box_label swap = b[i]; - int index = random_gen()%n; - b[i] = b[index]; - b[index] = swap; - } -} - -void correct_boxes(box_label *boxes, int n, float dx, float dy, float sx, float sy, int flip) -{ - int i; - for(i = 0; i < n; ++i){ - if(boxes[i].x == 0 && boxes[i].y == 0) { - boxes[i].x = 999999; - boxes[i].y = 999999; - boxes[i].w = 999999; - boxes[i].h = 999999; - continue; - } - if ((boxes[i].x + boxes[i].w / 2) < 0 || (boxes[i].y + boxes[i].h / 2) < 0 || - (boxes[i].x - boxes[i].w / 2) > 1 || (boxes[i].y - boxes[i].h / 2) > 1) - { - boxes[i].x = 999999; - boxes[i].y = 999999; - boxes[i].w = 999999; - boxes[i].h = 999999; - continue; - } - boxes[i].left = boxes[i].left * sx - dx; - boxes[i].right = boxes[i].right * sx - dx; - boxes[i].top = boxes[i].top * sy - dy; - boxes[i].bottom = boxes[i].bottom* sy - dy; - - if(flip){ - float swap = boxes[i].left; - boxes[i].left = 1. - boxes[i].right; - boxes[i].right = 1. - swap; - } - - boxes[i].left = constrain(0, 1, boxes[i].left); - boxes[i].right = constrain(0, 1, boxes[i].right); - boxes[i].top = constrain(0, 1, boxes[i].top); - boxes[i].bottom = constrain(0, 1, boxes[i].bottom); - - boxes[i].x = (boxes[i].left+boxes[i].right)/2; - boxes[i].y = (boxes[i].top+boxes[i].bottom)/2; - boxes[i].w = (boxes[i].right - boxes[i].left); - boxes[i].h = (boxes[i].bottom - boxes[i].top); - - boxes[i].w = constrain(0, 1, boxes[i].w); - boxes[i].h = constrain(0, 1, boxes[i].h); - } -} - -void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy) -{ - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int count = 0; - box_label *boxes = read_boxes(labelpath, &count); - randomize_boxes(boxes, count); - correct_boxes(boxes, count, dx, dy, sx, sy, flip); - float x,y,w,h; - int id; - int i; - - for (i = 0; i < count && i < 30; ++i) { - x = boxes[i].x; - y = boxes[i].y; - w = boxes[i].w; - h = boxes[i].h; - id = boxes[i].id; - - if (w < .0 || h < .0) continue; - - int index = (4+classes) * i; - - truth[index++] = x; - truth[index++] = y; - truth[index++] = w; - truth[index++] = h; - - if (id < classes) truth[index+id] = 1; - } - free(boxes); -} - -void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy) -{ - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int count = 0; - box_label *boxes = read_boxes(labelpath, &count); - randomize_boxes(boxes, count); - correct_boxes(boxes, count, dx, dy, sx, sy, flip); - float x,y,w,h; - int id; - int i; - - for (i = 0; i < count; ++i) { - x = boxes[i].x; - y = boxes[i].y; - w = boxes[i].w; - h = boxes[i].h; - id = boxes[i].id; - - if (w < .001 || h < .001) continue; - - int col = (int)(x*num_boxes); - int row = (int)(y*num_boxes); - - x = x*num_boxes - col; - y = y*num_boxes - row; - - int index = (col+row*num_boxes)*(5+classes); - if (truth[index]) continue; - truth[index++] = 1; - - if (id < classes) truth[index+id] = 1; - index += classes; - - truth[index++] = x; - truth[index++] = y; - truth[index++] = w; - truth[index++] = h; - } - free(boxes); -} - -int fill_truth_detection(const char *path, int num_boxes, int truth_size, float *truth, int classes, int flip, float dx, float dy, float sx, float sy, - int net_w, int net_h) -{ - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int count = 0; - int i; - box_label *boxes = read_boxes(labelpath, &count); - int min_w_h = 0; - float lowest_w = 1.F / net_w; - float lowest_h = 1.F / net_h; - randomize_boxes(boxes, count); - correct_boxes(boxes, count, dx, dy, sx, sy, flip); - if (count > num_boxes) count = num_boxes; - float x, y, w, h; - int id; - int sub = 0; - - for (i = 0; i < count; ++i) { - x = boxes[i].x; - y = boxes[i].y; - w = boxes[i].w; - h = boxes[i].h; - id = boxes[i].id; - int track_id = boxes[i].track_id; - - // not detect small objects - //if ((w < 0.001F || h < 0.001F)) continue; - // if truth (box for object) is smaller than 1x1 pix - char buff[256]; - if (id >= classes) { - printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d], file: %s \n", id, (classes-1), labelpath); - sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1)); - system(buff); - if (check_mistakes) getchar(); - ++sub; - continue; - } - if ((w < lowest_w || h < lowest_h)) { - //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath); - //system(buff); - ++sub; - continue; - } - if (x == 999999 || y == 999999) { - printf("\n Wrong annotation: x = 0, y = 0, < 0 or > 1, file: %s \n", labelpath); - sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); - system(buff); - ++sub; - if (check_mistakes) getchar(); - continue; - } - if (x <= 0 || x > 1 || y <= 0 || y > 1) { - printf("\n Wrong annotation: x = %f, y = %f, file: %s \n", x, y, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); - system(buff); - ++sub; - if (check_mistakes) getchar(); - continue; - } - if (w > 1) { - printf("\n Wrong annotation: w = %f, file: %s \n", w, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); - system(buff); - w = 1; - if (check_mistakes) getchar(); - } - if (h > 1) { - printf("\n Wrong annotation: h = %f, file: %s \n", h, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); - system(buff); - h = 1; - if (check_mistakes) getchar(); - } - if (x == 0) x += lowest_w; - if (y == 0) y += lowest_h; - - truth[(i-sub)*truth_size +0] = x; - truth[(i-sub)*truth_size +1] = y; - truth[(i-sub)*truth_size +2] = w; - truth[(i-sub)*truth_size +3] = h; - truth[(i-sub)*truth_size +4] = id; - truth[(i-sub)*truth_size +5] = track_id; - //float val = track_id; - //printf(" i = %d, sub = %d, truth_size = %d, track_id = %d, %f, %f\n", i, sub, truth_size, track_id, truth[(i - sub)*truth_size + 5], val); - - if (min_w_h == 0) min_w_h = w*net_w; - if (min_w_h > w*net_w) min_w_h = w*net_w; - if (min_w_h > h*net_h) min_w_h = h*net_h; - } - free(boxes); - return min_w_h; -} - - -void print_letters(float *pred, int n) -{ - int i; - for(i = 0; i < n; ++i){ - int index = max_index(pred+i*NUMCHARS, NUMCHARS); - printf("%c", int_to_alphanum(index)); - } - printf("\n"); -} - -void fill_truth_captcha(char *path, int n, float *truth) -{ - char *begin = strrchr(path, '/'); - ++begin; - int i; - for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ - int index = alphanum_to_int(begin[i]); - if(index > 35) printf("Bad %c\n", begin[i]); - truth[i*NUMCHARS+index] = 1; - } - for(;i < n; ++i){ - truth[i*NUMCHARS + NUMCHARS-1] = 1; - } -} - -data load_data_captcha(char **paths, int n, int m, int k, int w, int h) -{ - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.shallow = 0; - d.X = load_image_paths(paths, n, w, h); - d.y = make_matrix(n, k*NUMCHARS); - int i; - for(i = 0; i < n; ++i){ - fill_truth_captcha(paths[i], k, d.y.vals[i]); - } - if(m) free(paths); - return d; -} - -data load_data_captcha_encode(char **paths, int n, int m, int w, int h) -{ - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.shallow = 0; - d.X = load_image_paths(paths, n, w, h); - d.X.cols = 17100; - d.y = d.X; - if(m) free(paths); - return d; -} - -void fill_truth(char *path, char **labels, int k, float *truth) -{ - int i; - memset(truth, 0, k*sizeof(float)); - int count = 0; - for(i = 0; i < k; ++i){ - if(strstr(path, labels[i])){ - truth[i] = 1; - ++count; - } - } - if (count != 1) { - printf("Too many or too few labels: %d, %s\n", count, path); - count = 0; - for (i = 0; i < k; ++i) { - if (strstr(path, labels[i])) { - printf("\t label %d: %s \n", count, labels[i]); - count++; - } - } - } -} - -void fill_truth_smooth(char *path, char **labels, int k, float *truth, float label_smooth_eps) -{ - int i; - memset(truth, 0, k * sizeof(float)); - int count = 0; - for (i = 0; i < k; ++i) { - if (strstr(path, labels[i])) { - truth[i] = (1 - label_smooth_eps); - ++count; - } - else { - truth[i] = label_smooth_eps / (k - 1); - } - } - if (count != 1) { - printf("Too many or too few labels: %d, %s\n", count, path); - count = 0; - for (i = 0; i < k; ++i) { - if (strstr(path, labels[i])) { - printf("\t label %d: %s \n", count, labels[i]); - count++; - } - } - } -} - -void fill_hierarchy(float *truth, int k, tree *hierarchy) -{ - int j; - for(j = 0; j < k; ++j){ - if(truth[j]){ - int parent = hierarchy->parent[j]; - while(parent >= 0){ - truth[parent] = 1; - parent = hierarchy->parent[parent]; - } - } - } - int i; - int count = 0; - for(j = 0; j < hierarchy->groups; ++j){ - //printf("%d\n", count); - int mask = 1; - for(i = 0; i < hierarchy->group_size[j]; ++i){ - if(truth[count + i]){ - mask = 0; - break; - } - } - if (mask) { - for(i = 0; i < hierarchy->group_size[j]; ++i){ - truth[count + i] = SECRET_NUM; - } - } - count += hierarchy->group_size[j]; - } -} - -int find_max(float *arr, int size) { - int i; - float max = 0; - int n = 0; - for (i = 0; i < size; ++i) { - if (arr[i] > max) { - max = arr[i]; - n = i; - } - } - return n; -} - -matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy, float label_smooth_eps, int contrastive) -{ - matrix y = make_matrix(n, k); - int i; - if (labels) { - // supervised learning - for (i = 0; i < n; ++i) { - const int img_index = (contrastive) ? (i / 2) : i; - fill_truth_smooth(paths[img_index], labels, k, y.vals[i], label_smooth_eps); - //printf(" n = %d, i = %d, img_index = %d, class_id = %d \n", n, i, img_index, find_max(y.vals[i], k)); - if (hierarchy) { - fill_hierarchy(y.vals[i], k, hierarchy); - } - } - } else { - // unsupervised learning - for (i = 0; i < n; ++i) { - const int img_index = (contrastive) ? (i / 2) : i; - const uintptr_t path_p = (uintptr_t)paths[img_index];// abs(random_gen()); - const int class_id = path_p % k; - int l; - for (l = 0; l < k; ++l) y.vals[i][l] = 0; - y.vals[i][class_id] = 1; - } - } - return y; -} - -matrix load_tags_paths(char **paths, int n, int k) -{ - matrix y = make_matrix(n, k); - int i; - int count = 0; - for(i = 0; i < n; ++i){ - char label[4096]; - find_replace(paths[i], "imgs", "labels", label); - find_replace(label, "_iconl.jpeg", ".txt", label); - FILE *file = fopen(label, "r"); - if(!file){ - find_replace(label, "labels", "labels2", label); - file = fopen(label, "r"); - if(!file) continue; - } - ++count; - int tag; - while(fscanf(file, "%d", &tag) == 1){ - if(tag < k){ - y.vals[i][tag] = 1; - } - } - fclose(file); - } - printf("%d/%d\n", count, n); - return y; -} - -char **get_labels_custom(char *filename, int *size) -{ - list *plist = get_paths(filename); - if(size) *size = plist->size; - char **labels = (char **)list_to_array(plist); - free_list(plist); - return labels; -} - -char **get_labels(char *filename) -{ - return get_labels_custom(filename, NULL); -} - -void free_data(data d) -{ - if(!d.shallow){ - free_matrix(d.X); - free_matrix(d.y); - }else{ - free(d.X.vals); - free(d.y.vals); - } -} - -data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure) -{ - char **random_paths = get_random_paths(paths, n, m); - int i; - data d = {0}; - d.shallow = 0; - - d.X.rows = n; - d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*3; - - - int k = size*size*(5+classes); - d.y = make_matrix(n, k); - for(i = 0; i < n; ++i){ - image orig = load_image_color(random_paths[i], 0, 0); - - int oh = orig.h; - int ow = orig.w; - - int dw = (ow*jitter); - int dh = (oh*jitter); - - int pleft = rand_uniform(-dw, dw); - int pright = rand_uniform(-dw, dw); - int ptop = rand_uniform(-dh, dh); - int pbot = rand_uniform(-dh, dh); - - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; - - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; - - int flip = random_gen()%2; - image cropped = crop_image(orig, pleft, ptop, swidth, sheight); - - float dx = ((float)pleft/ow)/sx; - float dy = ((float)ptop /oh)/sy; - - image sized = resize_image(cropped, w, h); - if(flip) flip_image(sized); - random_distort_image(sized, hue, saturation, exposure); - d.X.vals[i] = sized.data; - - fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy); - - free_image(orig); - free_image(cropped); - } - free(random_paths); - return d; -} - -data load_data_compare(int n, char **paths, int m, int classes, int w, int h) -{ - if(m) paths = get_random_paths(paths, 2*n, m); - int i,j; - data d = {0}; - d.shallow = 0; - - d.X.rows = n; - d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*6; - - int k = 2*(classes); - d.y = make_matrix(n, k); - for(i = 0; i < n; ++i){ - image im1 = load_image_color(paths[i*2], w, h); - image im2 = load_image_color(paths[i*2+1], w, h); - - d.X.vals[i] = (float*)xcalloc(d.X.cols, sizeof(float)); - memcpy(d.X.vals[i], im1.data, h*w*3*sizeof(float)); - memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float)); - - int id; - float iou; - - char imlabel1[4096]; - char imlabel2[4096]; - find_replace(paths[i*2], "imgs", "labels", imlabel1); - find_replace(imlabel1, "jpg", "txt", imlabel1); - FILE *fp1 = fopen(imlabel1, "r"); - - while(fscanf(fp1, "%d %f", &id, &iou) == 2){ - if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou; - } - - find_replace(paths[i*2+1], "imgs", "labels", imlabel2); - find_replace(imlabel2, "jpg", "txt", imlabel2); - FILE *fp2 = fopen(imlabel2, "r"); - - while(fscanf(fp2, "%d %f", &id, &iou) == 2){ - if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou; - } - - for (j = 0; j < classes; ++j){ - if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){ - d.y.vals[i][2*j] = 1; - d.y.vals[i][2*j+1] = 0; - } else if (d.y.vals[i][2*j] < .5 && d.y.vals[i][2*j+1] > .5){ - d.y.vals[i][2*j] = 0; - d.y.vals[i][2*j+1] = 1; - } else { - d.y.vals[i][2*j] = SECRET_NUM; - d.y.vals[i][2*j+1] = SECRET_NUM; - } - } - fclose(fp1); - fclose(fp2); - - free_image(im1); - free_image(im2); - } - if(m) free(paths); - return d; -} - -data load_data_swag(char **paths, int n, int classes, float jitter) -{ - int index = random_gen()%n; - char *random_path = paths[index]; - - image orig = load_image_color(random_path, 0, 0); - int h = orig.h; - int w = orig.w; - - data d = {0}; - d.shallow = 0; - d.w = w; - d.h = h; - - d.X.rows = 1; - d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*3; - - int k = (4+classes)*30; - d.y = make_matrix(1, k); - - int dw = w*jitter; - int dh = h*jitter; - - int pleft = rand_uniform(-dw, dw); - int pright = rand_uniform(-dw, dw); - int ptop = rand_uniform(-dh, dh); - int pbot = rand_uniform(-dh, dh); - - int swidth = w - pleft - pright; - int sheight = h - ptop - pbot; - - float sx = (float)swidth / w; - float sy = (float)sheight / h; - - int flip = random_gen()%2; - image cropped = crop_image(orig, pleft, ptop, swidth, sheight); - - float dx = ((float)pleft/w)/sx; - float dy = ((float)ptop /h)/sy; - - image sized = resize_image(cropped, w, h); - if(flip) flip_image(sized); - d.X.vals[0] = sized.data; - - fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy); - - free_image(orig); - free_image(cropped); - - return d; -} - -void blend_truth(float *new_truth, int boxes, int truth_size, float *old_truth) -{ - int count_new_truth = 0; - int t; - for (t = 0; t < boxes; ++t) { - float x = new_truth[t*truth_size]; - if (!x) break; - count_new_truth++; - - } - for (t = count_new_truth; t < boxes; ++t) { - float *new_truth_ptr = new_truth + t*truth_size; - float *old_truth_ptr = old_truth + (t - count_new_truth)*truth_size; - float x = old_truth_ptr[0]; - if (!x) break; - - new_truth_ptr[0] = old_truth_ptr[0]; - new_truth_ptr[1] = old_truth_ptr[1]; - new_truth_ptr[2] = old_truth_ptr[2]; - new_truth_ptr[3] = old_truth_ptr[3]; - new_truth_ptr[4] = old_truth_ptr[4]; - } - //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t); -} - - -void blend_truth_mosaic(float *new_truth, int boxes, int truth_size, float *old_truth, int w, int h, float cut_x, float cut_y, int i_mixup, - int left_shift, int right_shift, int top_shift, int bot_shift, - int net_w, int net_h, int mosaic_bound) -{ - const float lowest_w = 1.F / net_w; - const float lowest_h = 1.F / net_h; - - int count_new_truth = 0; - int t; - for (t = 0; t < boxes; ++t) { - float x = new_truth[t*truth_size]; - if (!x) break; - count_new_truth++; - - } - int new_t = count_new_truth; - for (t = count_new_truth; t < boxes; ++t) { - float *new_truth_ptr = new_truth + new_t*truth_size; - new_truth_ptr[0] = 0; - float *old_truth_ptr = old_truth + (t - count_new_truth)*truth_size; - float x = old_truth_ptr[0]; - if (!x) break; - - float xb = old_truth_ptr[0]; - float yb = old_truth_ptr[1]; - float wb = old_truth_ptr[2]; - float hb = old_truth_ptr[3]; - - - - // shift 4 images - if (i_mixup == 0) { - xb = xb - (float)(w - cut_x - right_shift) / w; - yb = yb - (float)(h - cut_y - bot_shift) / h; - } - if (i_mixup == 1) { - xb = xb + (float)(cut_x - left_shift) / w; - yb = yb - (float)(h - cut_y - bot_shift) / h; - } - if (i_mixup == 2) { - xb = xb - (float)(w - cut_x - right_shift) / w; - yb = yb + (float)(cut_y - top_shift) / h; - } - if (i_mixup == 3) { - xb = xb + (float)(cut_x - left_shift) / w; - yb = yb + (float)(cut_y - top_shift) / h; - } - - int left = (xb - wb / 2)*w; - int right = (xb + wb / 2)*w; - int top = (yb - hb / 2)*h; - int bot = (yb + hb / 2)*h; - - if(mosaic_bound) - { - // fix out of Mosaic-bound - float left_bound = 0, right_bound = 0, top_bound = 0, bot_bound = 0; - if (i_mixup == 0) { - left_bound = 0; - right_bound = cut_x; - top_bound = 0; - bot_bound = cut_y; - } - if (i_mixup == 1) { - left_bound = cut_x; - right_bound = w; - top_bound = 0; - bot_bound = cut_y; - } - if (i_mixup == 2) { - left_bound = 0; - right_bound = cut_x; - top_bound = cut_y; - bot_bound = h; - } - if (i_mixup == 3) { - left_bound = cut_x; - right_bound = w; - top_bound = cut_y; - bot_bound = h; - } - - - if (left < left_bound) { - //printf(" i_mixup = %d, left = %d, left_bound = %f \n", i_mixup, left, left_bound); - left = left_bound; - } - if (right > right_bound) { - //printf(" i_mixup = %d, right = %d, right_bound = %f \n", i_mixup, right, right_bound); - right = right_bound; - } - if (top < top_bound) top = top_bound; - if (bot > bot_bound) bot = bot_bound; - - - xb = ((float)(right + left) / 2) / w; - wb = ((float)(right - left)) / w; - yb = ((float)(bot + top) / 2) / h; - hb = ((float)(bot - top)) / h; - } - else - { - // fix out of bound - if (left < 0) { - float diff = (float)left / w; - xb = xb - diff / 2; - wb = wb + diff; - } - - if (right > w) { - float diff = (float)(right - w) / w; - xb = xb - diff / 2; - wb = wb - diff; - } - - if (top < 0) { - float diff = (float)top / h; - yb = yb - diff / 2; - hb = hb + diff; - } - - if (bot > h) { - float diff = (float)(bot - h) / h; - yb = yb - diff / 2; - hb = hb - diff; - } - - left = (xb - wb / 2)*w; - right = (xb + wb / 2)*w; - top = (yb - hb / 2)*h; - bot = (yb + hb / 2)*h; - } - - - // leave only within the image - if(left >= 0 && right <= w && top >= 0 && bot <= h && - wb > 0 && wb < 1 && hb > 0 && hb < 1 && - xb > 0 && xb < 1 && yb > 0 && yb < 1 && - wb > lowest_w && hb > lowest_h) - { - new_truth_ptr[0] = xb; - new_truth_ptr[1] = yb; - new_truth_ptr[2] = wb; - new_truth_ptr[3] = hb; - new_truth_ptr[4] = old_truth_ptr[4]; - new_t++; - } - } - //printf("\n was %d bboxes, now %d bboxes \n", count_new_truth, t); -} - -#ifdef OPENCV - -#include "http_stream.h" - -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int use_gaussian_noise, int use_blur, int use_mixup, - float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs) -{ - const int random_index = random_gen(); - c = c ? c : 3; - - if (use_mixup == 2 || use_mixup == 4) { - printf("\n cutmix=1 - isn't supported for Detector (use cutmix=1 only for Classifier) \n"); - if (check_mistakes) getchar(); - if(use_mixup == 2) use_mixup = 0; - else use_mixup = 3; - } - if (use_mixup == 3 && letter_box) { - //printf("\n Combination: letter_box=1 & mosaic=1 - isn't supported, use only 1 of these parameters \n"); - //if (check_mistakes) getchar(); - //exit(0); - } - if (random_gen() % 2 == 0) use_mixup = 0; - int i; - - int *cut_x = NULL, *cut_y = NULL; - if (use_mixup == 3) { - cut_x = (int*)calloc(n, sizeof(int)); - cut_y = (int*)calloc(n, sizeof(int)); - const float min_offset = 0.2; // 20% - for (i = 0; i < n; ++i) { - cut_x[i] = rand_int(w*min_offset, w*(1 - min_offset)); - cut_y[i] = rand_int(h*min_offset, h*(1 - min_offset)); - } - } - - data d = {0}; - d.shallow = 0; - - d.X.rows = n; - d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*c; - - float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0; - float resize_r1 = 0, resize_r2 = 0; - float dhue = 0, dsat = 0, dexp = 0, flip = 0, blur = 0; - int augmentation_calculated = 0, gaussian_noise = 0; - - d.y = make_matrix(n, truth_size*boxes); - int i_mixup = 0; - for (i_mixup = 0; i_mixup <= use_mixup; i_mixup++) { - if (i_mixup) augmentation_calculated = 0; // recalculate augmentation for the 2nd sequence if(track==1) - - char **random_paths; - if (track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed, contrastive); - else random_paths = get_random_paths_custom(paths, n, m, contrastive); - - for (i = 0; i < n; ++i) { - float *truth = (float*)xcalloc(truth_size * boxes, sizeof(float)); - const char *filename = random_paths[i]; - - int flag = (c >= 3); - mat_cv *src; - src = load_image_mat_cv(filename, flag); - if (src == NULL) { - printf("\n Error in load_data_detection() - OpenCV \n"); - fflush(stdout); - if (check_mistakes) { - getchar(); - } - continue; - } - - int oh = get_height_mat(src); - int ow = get_width_mat(src); - - int dw = (ow*jitter); - int dh = (oh*jitter); - - float resize_down = resize, resize_up = resize; - if (resize_down > 1.0) resize_down = 1 / resize_down; - int min_rdw = ow*(1 - (1 / resize_down)) / 2; // < 0 - int min_rdh = oh*(1 - (1 / resize_down)) / 2; // < 0 - - if (resize_up < 1.0) resize_up = 1 / resize_up; - int max_rdw = ow*(1 - (1 / resize_up)) / 2; // > 0 - int max_rdh = oh*(1 - (1 / resize_up)) / 2; // > 0 - //printf(" down = %f, up = %f \n", (1 - (1 / resize_down)) / 2, (1 - (1 / resize_up)) / 2); - - if (!augmentation_calculated || !track) - { - augmentation_calculated = 1; - resize_r1 = random_float(); - resize_r2 = random_float(); - - if (!contrastive || contrastive_jit_flip || i % 2 == 0) - { - r1 = random_float(); - r2 = random_float(); - r3 = random_float(); - r4 = random_float(); - - flip = use_flip ? random_gen() % 2 : 0; - } - - r_scale = random_float(); - - if (!contrastive || contrastive_color || i % 2 == 0) - { - dhue = rand_uniform_strong(-hue, hue); - dsat = rand_scale(saturation); - dexp = rand_scale(exposure); - } - - if (use_blur) { - int tmp_blur = rand_int(0, 2); // 0 - disable, 1 - blur background, 2 - blur the whole image - if (tmp_blur == 0) blur = 0; - else if (tmp_blur == 1) blur = 1; - else blur = use_blur; - } - - if (use_gaussian_noise && rand_int(0, 1) == 1) gaussian_noise = use_gaussian_noise; - else gaussian_noise = 0; - } - - int pleft = rand_precalc_random(-dw, dw, r1); - int pright = rand_precalc_random(-dw, dw, r2); - int ptop = rand_precalc_random(-dh, dh, r3); - int pbot = rand_precalc_random(-dh, dh, r4); - - if (resize < 1) { - // downsize only - pleft += rand_precalc_random(min_rdw, 0, resize_r1); - pright += rand_precalc_random(min_rdw, 0, resize_r2); - ptop += rand_precalc_random(min_rdh, 0, resize_r1); - pbot += rand_precalc_random(min_rdh, 0, resize_r2); - } - else { - pleft += rand_precalc_random(min_rdw, max_rdw, resize_r1); - pright += rand_precalc_random(min_rdw, max_rdw, resize_r2); - ptop += rand_precalc_random(min_rdh, max_rdh, resize_r1); - pbot += rand_precalc_random(min_rdh, max_rdh, resize_r2); - } - - //printf("\n pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", pleft, pright, ptop, pbot, ow, oh); - - //float scale = rand_precalc_random(.25, 2, r_scale); // unused currently - //printf(" letter_box = %d \n", letter_box); - - if (letter_box) - { - float img_ar = (float)ow / (float)oh; - float net_ar = (float)w / (float)h; - float result_ar = img_ar / net_ar; - //printf(" ow = %d, oh = %d, w = %d, h = %d, img_ar = %f, net_ar = %f, result_ar = %f \n", ow, oh, w, h, img_ar, net_ar, result_ar); - if (result_ar > 1) // sheight - should be increased - { - float oh_tmp = ow / net_ar; - float delta_h = (oh_tmp - oh)/2; - ptop = ptop - delta_h; - pbot = pbot - delta_h; - //printf(" result_ar = %f, oh_tmp = %f, delta_h = %d, ptop = %f, pbot = %f \n", result_ar, oh_tmp, delta_h, ptop, pbot); - } - else // swidth - should be increased - { - float ow_tmp = oh * net_ar; - float delta_w = (ow_tmp - ow)/2; - pleft = pleft - delta_w; - pright = pright - delta_w; - //printf(" result_ar = %f, ow_tmp = %f, delta_w = %d, pleft = %f, pright = %f \n", result_ar, ow_tmp, delta_w, pleft, pright); - } - - //printf("\n pleft = %d, pright = %d, ptop = %d, pbot = %d, ow = %d, oh = %d \n", pleft, pright, ptop, pbot, ow, oh); - } - - // move each 2nd image to the corner - so that most of it was visible - if (use_mixup == 3 && random_gen() % 2 == 0) { - if (flip) { - if (i_mixup == 0) pleft += pright, pright = 0, pbot += ptop, ptop = 0; - if (i_mixup == 1) pright += pleft, pleft = 0, pbot += ptop, ptop = 0; - if (i_mixup == 2) pleft += pright, pright = 0, ptop += pbot, pbot = 0; - if (i_mixup == 3) pright += pleft, pleft = 0, ptop += pbot, pbot = 0; - } - else { - if (i_mixup == 0) pright += pleft, pleft = 0, pbot += ptop, ptop = 0; - if (i_mixup == 1) pleft += pright, pright = 0, pbot += ptop, ptop = 0; - if (i_mixup == 2) pright += pleft, pleft = 0, ptop += pbot, pbot = 0; - if (i_mixup == 3) pleft += pright, pright = 0, ptop += pbot, pbot = 0; - } - } - - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; - - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; - - float dx = ((float)pleft / ow) / sx; - float dy = ((float)ptop / oh) / sy; - - - int min_w_h = fill_truth_detection(filename, boxes, truth_size, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - //for (int z = 0; z < boxes; ++z) if(truth[z*truth_size] > 0) printf(" track_id = %f \n", truth[z*truth_size + 5]); - //printf(" truth_size = %d \n", truth_size); - - if ((min_w_h / 8) < blur && blur > 1) blur = min_w_h / 8; // disable blur if one of the objects is too small - - image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, dhue, dsat, dexp, - gaussian_noise, blur, boxes, truth_size, truth); - - if (use_mixup == 0) { - d.X.vals[i] = ai.data; - memcpy(d.y.vals[i], truth, truth_size * boxes * sizeof(float)); - } - else if (use_mixup == 1) { - if (i_mixup == 0) { - d.X.vals[i] = ai.data; - memcpy(d.y.vals[i], truth, truth_size * boxes * sizeof(float)); - } - else if (i_mixup == 1) { - image old_img = make_empty_image(w, h, c); - old_img.data = d.X.vals[i]; - //show_image(ai, "new"); - //show_image(old_img, "old"); - //wait_until_press_key_cv(); - blend_images_cv(ai, 0.5, old_img, 0.5); - blend_truth(d.y.vals[i], boxes, truth_size, truth); - free_image(old_img); - d.X.vals[i] = ai.data; - } - } - else if (use_mixup == 3) { - if (i_mixup == 0) { - image tmp_img = make_image(w, h, c); - d.X.vals[i] = tmp_img.data; - } - - if (flip) { - int tmp = pleft; - pleft = pright; - pright = tmp; - } - - const int left_shift = min_val_cmp(cut_x[i], max_val_cmp(0, (-pleft*w / ow))); - const int top_shift = min_val_cmp(cut_y[i], max_val_cmp(0, (-ptop*h / oh))); - - const int right_shift = min_val_cmp((w - cut_x[i]), max_val_cmp(0, (-pright*w / ow))); - const int bot_shift = min_val_cmp(h - cut_y[i], max_val_cmp(0, (-pbot*h / oh))); - - - int k, x, y; - for (k = 0; k < c; ++k) { - for (y = 0; y < h; ++y) { - int j = y*w + k*w*h; - if (i_mixup == 0 && y < cut_y[i]) { - int j_src = (w - cut_x[i] - right_shift) + (y + h - cut_y[i] - bot_shift)*w + k*w*h; - memcpy(&d.X.vals[i][j + 0], &ai.data[j_src], cut_x[i] * sizeof(float)); - } - if (i_mixup == 1 && y < cut_y[i]) { - int j_src = left_shift + (y + h - cut_y[i] - bot_shift)*w + k*w*h; - memcpy(&d.X.vals[i][j + cut_x[i]], &ai.data[j_src], (w-cut_x[i]) * sizeof(float)); - } - if (i_mixup == 2 && y >= cut_y[i]) { - int j_src = (w - cut_x[i] - right_shift) + (top_shift + y - cut_y[i])*w + k*w*h; - memcpy(&d.X.vals[i][j + 0], &ai.data[j_src], cut_x[i] * sizeof(float)); - } - if (i_mixup == 3 && y >= cut_y[i]) { - int j_src = left_shift + (top_shift + y - cut_y[i])*w + k*w*h; - memcpy(&d.X.vals[i][j + cut_x[i]], &ai.data[j_src], (w - cut_x[i]) * sizeof(float)); - } - } - } - - blend_truth_mosaic(d.y.vals[i], boxes, truth_size, truth, w, h, cut_x[i], cut_y[i], i_mixup, left_shift, right_shift, top_shift, bot_shift, w, h, mosaic_bound); - - free_image(ai); - ai.data = d.X.vals[i]; - } - - - if (show_imgs && i_mixup == use_mixup) // delete i_mixup - { - image tmp_ai = copy_image(ai); - char buff[1000]; - //sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg((char*)filename), random_gen()); - sprintf(buff, "aug_%d_%d_%d", random_index, i, random_gen()); - int t; - for (t = 0; t < boxes; ++t) { - box b = float_to_box_stride(d.y.vals[i] + t*truth_size, 1); - if (!b.x) break; - int left = (b.x - b.w / 2.)*ai.w; - int right = (b.x + b.w / 2.)*ai.w; - int top = (b.y - b.h / 2.)*ai.h; - int bot = (b.y + b.h / 2.)*ai.h; - draw_box_width(tmp_ai, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB - } - - save_image(tmp_ai, buff); - if (show_imgs == 1) { - //char buff_src[1000]; - //sprintf(buff_src, "src_%d_%d_%s_%d", random_index, i, basecfg((char*)filename), random_gen()); - //show_image_mat(src, buff_src); - show_image(tmp_ai, buff); - wait_until_press_key_cv(); - } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n"); - free_image(tmp_ai); - } - - release_mat(&src); - free(truth); - } - if (random_paths) free(random_paths); - } - - - return d; -} -#else // OPENCV -void blend_images(image new_img, float alpha, image old_img, float beta) -{ - int data_size = new_img.w * new_img.h * new_img.c; - int i; - #pragma omp parallel for - for (i = 0; i < data_size; ++i) - new_img.data[i] = new_img.data[i] * alpha + old_img.data[i] * beta; -} - -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup, - float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs) -{ - const int random_index = random_gen(); - c = c ? c : 3; - char **random_paths; - char **mixup_random_paths = NULL; - if(track) random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed, contrastive); - else random_paths = get_random_paths_custom(paths, n, m, contrastive); - - //assert(use_mixup < 2); - if (use_mixup == 2) { - printf("\n cutmix=1 - isn't supported for Detector \n"); - exit(0); - } - if (use_mixup == 3 || use_mixup == 4) { - printf("\n mosaic=1 - compile Darknet with OpenCV for using mosaic=1 \n"); - exit(0); - } - int mixup = use_mixup ? random_gen() % 2 : 0; - //printf("\n mixup = %d \n", mixup); - if (mixup) { - if (track) mixup_random_paths = get_sequential_paths(paths, n, m, mini_batch, augment_speed, contrastive); - else mixup_random_paths = get_random_paths(paths, n, m); - } - - int i; - data d = { 0 }; - d.shallow = 0; - - d.X.rows = n; - d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); - d.X.cols = h*w*c; - - float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale; - float resize_r1 = 0, resize_r2 = 0; - float dhue = 0, dsat = 0, dexp = 0, flip = 0; - int augmentation_calculated = 0; - - d.y = make_matrix(n, truth_size * boxes); - int i_mixup = 0; - for (i_mixup = 0; i_mixup <= mixup; i_mixup++) { - if (i_mixup) augmentation_calculated = 0; - for (i = 0; i < n; ++i) { - float *truth = (float*)xcalloc(truth_size * boxes, sizeof(float)); - char *filename = (i_mixup) ? mixup_random_paths[i] : random_paths[i]; - - image orig = load_image(filename, 0, 0, c); - - int oh = orig.h; - int ow = orig.w; - - int dw = (ow*jitter); - int dh = (oh*jitter); - - float resize_down = resize, resize_up = resize; - if (resize_down > 1.0) resize_down = 1 / resize_down; - int min_rdw = ow*(1 - (1 / resize_down)) / 2; - int min_rdh = oh*(1 - (1 / resize_down)) / 2; - - if (resize_up < 1.0) resize_up = 1 / resize_up; - int max_rdw = ow*(1 - (1 / resize_up)) / 2; - int max_rdh = oh*(1 - (1 / resize_up)) / 2; - - if (!augmentation_calculated || !track) - { - augmentation_calculated = 1; - resize_r1 = random_float(); - resize_r2 = random_float(); - - if (!contrastive || contrastive_jit_flip || i % 2 == 0) - { - r1 = random_float(); - r2 = random_float(); - r3 = random_float(); - r4 = random_float(); - - flip = use_flip ? random_gen() % 2 : 0; - } - - r_scale = random_float(); - - if (!contrastive || contrastive_color || i % 2 == 0) - { - dhue = rand_uniform_strong(-hue, hue); - dsat = rand_scale(saturation); - dexp = rand_scale(exposure); - } - } - - int pleft = rand_precalc_random(-dw, dw, r1); - int pright = rand_precalc_random(-dw, dw, r2); - int ptop = rand_precalc_random(-dh, dh, r3); - int pbot = rand_precalc_random(-dh, dh, r4); - - if (resize < 1) { - // downsize only - pleft += rand_precalc_random(min_rdw, 0, resize_r1); - pright += rand_precalc_random(min_rdw, 0, resize_r2); - ptop += rand_precalc_random(min_rdh, 0, resize_r1); - pbot += rand_precalc_random(min_rdh, 0, resize_r2); - } - else { - pleft += rand_precalc_random(min_rdw, max_rdw, resize_r1); - pright += rand_precalc_random(min_rdw, max_rdw, resize_r2); - ptop += rand_precalc_random(min_rdh, max_rdh, resize_r1); - pbot += rand_precalc_random(min_rdh, max_rdh, resize_r2); - } - - if (letter_box) - { - float img_ar = (float)ow / (float)oh; - float net_ar = (float)w / (float)h; - float result_ar = img_ar / net_ar; - //printf(" ow = %d, oh = %d, w = %d, h = %d, img_ar = %f, net_ar = %f, result_ar = %f \n", ow, oh, w, h, img_ar, net_ar, result_ar); - if (result_ar > 1) // sheight - should be increased - { - float oh_tmp = ow / net_ar; - float delta_h = (oh_tmp - oh) / 2; - ptop = ptop - delta_h; - pbot = pbot - delta_h; - //printf(" result_ar = %f, oh_tmp = %f, delta_h = %d, ptop = %f, pbot = %f \n", result_ar, oh_tmp, delta_h, ptop, pbot); - } - else // swidth - should be increased - { - float ow_tmp = oh * net_ar; - float delta_w = (ow_tmp - ow) / 2; - pleft = pleft - delta_w; - pright = pright - delta_w; - //printf(" result_ar = %f, ow_tmp = %f, delta_w = %d, pleft = %f, pright = %f \n", result_ar, ow_tmp, delta_w, pleft, pright); - } - } - - int swidth = ow - pleft - pright; - int sheight = oh - ptop - pbot; - - float sx = (float)swidth / ow; - float sy = (float)sheight / oh; - - image cropped = crop_image(orig, pleft, ptop, swidth, sheight); - - float dx = ((float)pleft / ow) / sx; - float dy = ((float)ptop / oh) / sy; - - image sized = resize_image(cropped, w, h); - if (flip) flip_image(sized); - distort_image(sized, dhue, dsat, dexp); - //random_distort_image(sized, hue, saturation, exposure); - - fill_truth_detection(filename, boxes, truth_size, truth, classes, flip, dx, dy, 1. / sx, 1. / sy, w, h); - - if (i_mixup) { - image old_img = sized; - old_img.data = d.X.vals[i]; - //show_image(sized, "new"); - //show_image(old_img, "old"); - //wait_until_press_key_cv(); - blend_images(sized, 0.5, old_img, 0.5); - blend_truth(truth, boxes, truth_size, d.y.vals[i]); - free_image(old_img); - } - - d.X.vals[i] = sized.data; - memcpy(d.y.vals[i], truth, truth_size * boxes * sizeof(float)); - - if (show_imgs)// && i_mixup) - { - char buff[1000]; - sprintf(buff, "aug_%d_%d_%s_%d", random_index, i, basecfg(filename), random_gen()); - - int t; - for (t = 0; t < boxes; ++t) { - box b = float_to_box_stride(d.y.vals[i] + t*truth_size, 1); - if (!b.x) break; - int left = (b.x - b.w / 2.)*sized.w; - int right = (b.x + b.w / 2.)*sized.w; - int top = (b.y - b.h / 2.)*sized.h; - int bot = (b.y + b.h / 2.)*sized.h; - draw_box_width(sized, left, top, right, bot, 1, 150, 100, 50); // 3 channels RGB - } - - save_image(sized, buff); - if (show_imgs == 1) { - show_image(sized, buff); - wait_until_press_key_cv(); - } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Press Enter: \n"); - //getchar(); - } - - free_image(orig); - free_image(cropped); - free(truth); - } - } - free(random_paths); - if (mixup_random_paths) free(mixup_random_paths); - return d; -} -#endif // OPENCV - -void *load_thread(void *ptr) -{ - //srand(time(0)); - //printf("Loading data: %d\n", random_gen()); - load_args a = *(struct load_args*)ptr; - if(a.exposure == 0) a.exposure = 1; - if(a.saturation == 0) a.saturation = 1; - if(a.aspect == 0) a.aspect = 1; - - if (a.type == OLD_CLASSIFICATION_DATA){ - *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); - } else if (a.type == CLASSIFICATION_DATA){ - *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv, a.contrastive); - } else if (a.type == SUPER_DATA){ - *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); - } else if (a.type == WRITING_DATA){ - *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); - } else if (a.type == REGION_DATA){ - *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); - } else if (a.type == DETECTION_DATA){ - *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.truth_size, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize, - a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.contrastive_color, a.show_imgs); - } else if (a.type == SWAG_DATA){ - *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); - } else if (a.type == COMPARE_DATA){ - *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); - } else if (a.type == IMAGE_DATA){ - *(a.im) = load_image(a.path, 0, 0, a.c); - *(a.resized) = resize_image(*(a.im), a.w, a.h); - }else if (a.type == LETTERBOX_DATA) { - *(a.im) = load_image(a.path, 0, 0, a.c); - *(a.resized) = letterbox_image(*(a.im), a.w, a.h); - } else if (a.type == TAG_DATA){ - *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure); - } - free(ptr); - return 0; -} - -pthread_t load_data_in_thread(load_args args) -{ - pthread_t thread; - struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args)); - *ptr = args; - if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed"); - return thread; -} - -static const int thread_wait_ms = 5; -static volatile int flag_exit; -static volatile int * run_load_data = NULL; -static load_args * args_swap = NULL; -static pthread_t* threads = NULL; - -pthread_mutex_t mtx_load_data = PTHREAD_MUTEX_INITIALIZER; - -void *run_thread_loop(void *ptr) -{ - const int i = *(int *)ptr; - - while (!custom_atomic_load_int(&flag_exit)) { - while (!custom_atomic_load_int(&run_load_data[i])) { - if (custom_atomic_load_int(&flag_exit)) { - free(ptr); - return 0; - } - this_thread_sleep_for(thread_wait_ms); - } - - pthread_mutex_lock(&mtx_load_data); - load_args *args_local = (load_args *)xcalloc(1, sizeof(load_args)); - *args_local = args_swap[i]; - pthread_mutex_unlock(&mtx_load_data); - - load_thread(args_local); - - custom_atomic_store_int(&run_load_data[i], 0); - } - free(ptr); - return 0; -} - -void *load_threads(void *ptr) -{ - //srand(time(0)); - int i; - load_args args = *(load_args *)ptr; - if (args.threads == 0) args.threads = 1; - data *out = args.d; - int total = args.n; - free(ptr); - data* buffers = (data*)xcalloc(args.threads, sizeof(data)); - if (!threads) { - threads = (pthread_t*)xcalloc(args.threads, sizeof(pthread_t)); - run_load_data = (volatile int *)xcalloc(args.threads, sizeof(int)); - args_swap = (load_args *)xcalloc(args.threads, sizeof(load_args)); - fprintf(stderr, " Create %d permanent cpu-threads \n", args.threads); - - for (i = 0; i < args.threads; ++i) { - int* ptr = (int*)xcalloc(1, sizeof(int)); - *ptr = i; - if (pthread_create(&threads[i], 0, run_thread_loop, ptr)) error("Thread creation failed"); - } - } - - for (i = 0; i < args.threads; ++i) { - args.d = buffers + i; - args.n = (i + 1) * total / args.threads - i * total / args.threads; - - pthread_mutex_lock(&mtx_load_data); - args_swap[i] = args; - pthread_mutex_unlock(&mtx_load_data); - - custom_atomic_store_int(&run_load_data[i], 1); // run thread - } - for (i = 0; i < args.threads; ++i) { - while (custom_atomic_load_int(&run_load_data[i])) this_thread_sleep_for(thread_wait_ms); // join - } - - /* - pthread_t* threads = (pthread_t*)xcalloc(args.threads, sizeof(pthread_t)); - for(i = 0; i < args.threads; ++i){ - args.d = buffers + i; - args.n = (i+1) * total/args.threads - i * total/args.threads; - threads[i] = load_data_in_thread(args); - } - for(i = 0; i < args.threads; ++i){ - pthread_join(threads[i], 0); - } - */ - - *out = concat_datas(buffers, args.threads); - out->shallow = 0; - for(i = 0; i < args.threads; ++i){ - buffers[i].shallow = 1; - free_data(buffers[i]); - } - free(buffers); - //free(threads); - return 0; -} - -void free_load_threads(void *ptr) -{ - load_args args = *(load_args *)ptr; - if (args.threads == 0) args.threads = 1; - int i; - if (threads) { - custom_atomic_store_int(&flag_exit, 1); - for (i = 0; i < args.threads; ++i) { - pthread_join(threads[i], 0); - } - free((void*)run_load_data); - free(args_swap); - free(threads); - threads = NULL; - custom_atomic_store_int(&flag_exit, 0); - } -} - -pthread_t load_data(load_args args) -{ - pthread_t thread; - struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args)); - *ptr = args; - if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed"); - return thread; -} - -data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h) -{ - if(m) paths = get_random_paths(paths, n, m); - char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png"); - data d = {0}; - d.shallow = 0; - d.X = load_image_paths(paths, n, w, h); - d.y = load_image_paths_gray(replace_paths, n, out_w, out_h); - if(m) free(paths); - int i; - for(i = 0; i < n; ++i) free(replace_paths[i]); - free(replace_paths); - return d; -} - -data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h) -{ - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.shallow = 0; - d.X = load_image_paths(paths, n, w, h); - d.y = load_labels_paths(paths, n, labels, k, 0, 0, 0); - if(m) free(paths); - return d; -} - -/* - data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) - { - data d = {0}; - d.indexes = calloc(n, sizeof(int)); - if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); - d.shallow = 0; - d.X = load_image_augment_paths(paths, n, flip, min, max, size, angle, aspect, hue, saturation, exposure); - d.y = load_labels_paths(paths, n, labels, k); - if(m) free(paths); - return d; - } - */ - -data load_data_super(char **paths, int n, int m, int w, int h, int scale) -{ - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.shallow = 0; - - int i; - d.X.rows = n; - d.X.vals = (float**)xcalloc(n, sizeof(float*)); - d.X.cols = w*h*3; - - d.y.rows = n; - d.y.vals = (float**)xcalloc(n, sizeof(float*)); - d.y.cols = w*scale * h*scale * 3; - - for(i = 0; i < n; ++i){ - image im = load_image_color(paths[i], 0, 0); - image crop = random_crop_image(im, w*scale, h*scale); - int flip = random_gen()%2; - if (flip) flip_image(crop); - image resize = resize_image(crop, w, h); - d.X.vals[i] = resize.data; - d.y.vals[i] = crop.data; - free_image(im); - } - - if(m) free(paths); - return d; -} - -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, - float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive) -{ - char **paths_stored = paths; - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv, contrastive); - d.y = load_labels_paths(paths, n, labels, k, hierarchy, label_smooth_eps, contrastive); - - if (use_mixup && rand_int(0, 1)) { - char **paths_mix = get_random_paths(paths_stored, n, m); - data d2 = { 0 }; - d2.shallow = 0; - d2.X = load_image_augment_paths(paths_mix, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv, contrastive); - d2.y = load_labels_paths(paths_mix, n, labels, k, hierarchy, label_smooth_eps, contrastive); - free(paths_mix); - - data d3 = { 0 }; - d3.shallow = 0; - data d4 = { 0 }; - d4.shallow = 0; - if (use_mixup >= 3) { - char **paths_mix3 = get_random_paths(paths_stored, n, m); - d3.X = load_image_augment_paths(paths_mix3, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv, contrastive); - d3.y = load_labels_paths(paths_mix3, n, labels, k, hierarchy, label_smooth_eps, contrastive); - free(paths_mix3); - - char **paths_mix4 = get_random_paths(paths_stored, n, m); - d4.X = load_image_augment_paths(paths_mix4, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, dontuse_opencv, contrastive); - d4.y = load_labels_paths(paths_mix4, n, labels, k, hierarchy, label_smooth_eps, contrastive); - free(paths_mix4); - } - - - // mix - int i, j; - for (i = 0; i < d2.X.rows; ++i) { - - int mixup = use_mixup; - if (use_mixup == 4) mixup = rand_int(2, 3); // alternate CutMix and Mosaic - - // MixUp ----------------------------------- - if (mixup == 1) { - // mix images - for (j = 0; j < d2.X.cols; ++j) { - d.X.vals[i][j] = (d.X.vals[i][j] + d2.X.vals[i][j]) / 2.0f; - } - - // mix labels - for (j = 0; j < d2.y.cols; ++j) { - d.y.vals[i][j] = (d.y.vals[i][j] + d2.y.vals[i][j]) / 2.0f; - } - } - // CutMix ----------------------------------- - else if (mixup == 2) { - const float min = 0.3; // 0.3*0.3 = 9% - const float max = 0.8; // 0.8*0.8 = 64% - const int cut_w = rand_int(w*min, w*max); - const int cut_h = rand_int(h*min, h*max); - const int cut_x = rand_int(0, w - cut_w - 1); - const int cut_y = rand_int(0, h - cut_h - 1); - const int left = cut_x; - const int right = cut_x + cut_w; - const int top = cut_y; - const int bot = cut_y + cut_h; - - assert(cut_x >= 0 && cut_x <= w); - assert(cut_y >= 0 && cut_y <= h); - assert(cut_w >= 0 && cut_w <= w); - assert(cut_h >= 0 && cut_h <= h); - - assert(right >= 0 && right <= w); - assert(bot >= 0 && bot <= h); - - assert(top <= bot); - assert(left <= right); - - const float alpha = (float)(cut_w*cut_h) / (float)(w*h); - const float beta = 1 - alpha; - - int c, x, y; - for (c = 0; c < 3; ++c) { - for (y = top; y < bot; ++y) { - for (x = left; x < right; ++x) { - int j = x + y*w + c*w*h; - d.X.vals[i][j] = d2.X.vals[i][j]; - } - } - } - - //printf("\n alpha = %f, beta = %f \n", alpha, beta); - // mix labels - for (j = 0; j < d.y.cols; ++j) { - d.y.vals[i][j] = d.y.vals[i][j] * beta + d2.y.vals[i][j] * alpha; - } - } - // Mosaic ----------------------------------- - else if (mixup == 3) - { - const float min_offset = 0.2; // 20% - const int cut_x = rand_int(w*min_offset, w*(1 - min_offset)); - const int cut_y = rand_int(h*min_offset, h*(1 - min_offset)); - - float s1 = (float)(cut_x * cut_y) / (w*h); - float s2 = (float)((w - cut_x) * cut_y) / (w*h); - float s3 = (float)(cut_x * (h - cut_y)) / (w*h); - float s4 = (float)((w - cut_x) * (h - cut_y)) / (w*h); - - int c, x, y; - for (c = 0; c < 3; ++c) { - for (y = 0; y < h; ++y) { - for (x = 0; x < w; ++x) { - int j = x + y*w + c*w*h; - if (x < cut_x && y < cut_y) d.X.vals[i][j] = d.X.vals[i][j]; - if (x >= cut_x && y < cut_y) d.X.vals[i][j] = d2.X.vals[i][j]; - if (x < cut_x && y >= cut_y) d.X.vals[i][j] = d3.X.vals[i][j]; - if (x >= cut_x && y >= cut_y) d.X.vals[i][j] = d4.X.vals[i][j]; - } - } - } - - for (j = 0; j < d.y.cols; ++j) { - const float max_s = 1;// max_val_cmp(s1, max_val_cmp(s2, max_val_cmp(s3, s4))); - - d.y.vals[i][j] = d.y.vals[i][j] * s1 / max_s + d2.y.vals[i][j] * s2 / max_s + d3.y.vals[i][j] * s3 / max_s + d4.y.vals[i][j] * s4 / max_s; - } - } - } - - free_data(d2); - - if (use_mixup >= 3) { - free_data(d3); - free_data(d4); - } - } - -#ifdef OPENCV - if (use_blur) { - int i; - for (i = 0; i < d.X.rows; ++i) { - if (random_gen() % 4 == 0) { - image im = make_empty_image(w, h, 3); - im.data = d.X.vals[i]; - int ksize = use_blur; - if (use_blur == 1) ksize = 15; - image blurred = blur_image(im, ksize); - free_image(im); - d.X.vals[i] = blurred.data; - //if (i == 0) { - // show_image(im, "Not blurred"); - // show_image(blurred, "blurred"); - // wait_until_press_key_cv(); - //} - } - } - } -#endif // OPENCV - - if (show_imgs) { - int i, j; - for (i = 0; i < d.X.rows; ++i) { - image im = make_empty_image(w, h, 3); - im.data = d.X.vals[i]; - char buff[1000]; - sprintf(buff, "aug_%d_%s_%d", i, basecfg((char*)paths[i]), random_gen()); - save_image(im, buff); - - char buff_string[1000]; - sprintf(buff_string, "\n Classes: "); - for (j = 0; j < d.y.cols; ++j) { - if (d.y.vals[i][j] > 0) { - char buff_tmp[100]; - sprintf(buff_tmp, " %d (%f), ", j, d.y.vals[i][j]); - strcat(buff_string, buff_tmp); - } - } - printf("%s \n", buff_string); - - if (show_imgs == 1) { - show_image(im, buff); - wait_until_press_key_cv(); - } - } - printf("\nYou use flag -show_imgs, so will be saved aug_...jpg images. Click on window and press ESC button \n"); - } - - if (m) free(paths); - - return d; -} - -data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure) -{ - if(m) paths = get_random_paths(paths, n, m); - data d = {0}; - d.w = w; - d.h = h; - d.shallow = 0; - d.X = load_image_augment_paths(paths, n, use_flip, min, max, w, h, angle, aspect, hue, saturation, exposure, 0, 0); - d.y = load_tags_paths(paths, n, k); - if(m) free(paths); - return d; -} - -matrix concat_matrix(matrix m1, matrix m2) -{ - int i, count = 0; - matrix m; - m.cols = m1.cols; - m.rows = m1.rows+m2.rows; - m.vals = (float**)xcalloc(m1.rows + m2.rows, sizeof(float*)); - for(i = 0; i < m1.rows; ++i){ - m.vals[count++] = m1.vals[i]; - } - for(i = 0; i < m2.rows; ++i){ - m.vals[count++] = m2.vals[i]; - } - return m; -} - -data concat_data(data d1, data d2) -{ - data d = {0}; - d.shallow = 1; - d.X = concat_matrix(d1.X, d2.X); - d.y = concat_matrix(d1.y, d2.y); - return d; -} - -data concat_datas(data *d, int n) -{ - int i; - data out = {0}; - for(i = 0; i < n; ++i){ - data newdata = concat_data(d[i], out); - free_data(out); - out = newdata; - } - return out; -} - -data load_categorical_data_csv(char *filename, int target, int k) -{ - data d = {0}; - d.shallow = 0; - matrix X = csv_to_matrix(filename); - float *truth_1d = pop_column(&X, target); - float **truth = one_hot_encode(truth_1d, X.rows, k); - matrix y; - y.rows = X.rows; - y.cols = k; - y.vals = truth; - d.X = X; - d.y = y; - free(truth_1d); - return d; -} - -data load_cifar10_data(char *filename) -{ - data d = {0}; - d.shallow = 0; - long i,j; - matrix X = make_matrix(10000, 3072); - matrix y = make_matrix(10000, 10); - d.X = X; - d.y = y; - - FILE *fp = fopen(filename, "rb"); - if(!fp) file_error(filename); - for(i = 0; i < 10000; ++i){ - unsigned char bytes[3073]; - fread(bytes, 1, 3073, fp); - int class_id = bytes[0]; - y.vals[i][class_id] = 1; - for(j = 0; j < X.cols; ++j){ - X.vals[i][j] = (double)bytes[j+1]; - } - } - //translate_data_rows(d, -128); - scale_data_rows(d, 1./255); - //normalize_data_rows(d); - fclose(fp); - return d; -} - -void get_random_batch(data d, int n, float *X, float *y) -{ - int j; - for(j = 0; j < n; ++j){ - int index = random_gen()%d.X.rows; - memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); - memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); - } -} - -void get_next_batch(data d, int n, int offset, float *X, float *y) -{ - int j; - for(j = 0; j < n; ++j){ - int index = offset + j; - memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float)); - memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float)); - } -} - -void smooth_data(data d) -{ - int i, j; - float scale = 1. / d.y.cols; - float eps = .1; - for(i = 0; i < d.y.rows; ++i){ - for(j = 0; j < d.y.cols; ++j){ - d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j]; - } - } -} - -data load_all_cifar10() -{ - data d = {0}; - d.shallow = 0; - int i,j,b; - matrix X = make_matrix(50000, 3072); - matrix y = make_matrix(50000, 10); - d.X = X; - d.y = y; - - - for(b = 0; b < 5; ++b){ - char buff[256]; - sprintf(buff, "data/cifar/cifar-10-batches-bin/data_batch_%d.bin", b+1); - FILE *fp = fopen(buff, "rb"); - if(!fp) file_error(buff); - for(i = 0; i < 10000; ++i){ - unsigned char bytes[3073]; - fread(bytes, 1, 3073, fp); - int class_id = bytes[0]; - y.vals[i+b*10000][class_id] = 1; - for(j = 0; j < X.cols; ++j){ - X.vals[i+b*10000][j] = (double)bytes[j+1]; - } - } - fclose(fp); - } - //normalize_data_rows(d); - //translate_data_rows(d, -128); - scale_data_rows(d, 1./255); - smooth_data(d); - return d; -} - -data load_go(char *filename) -{ - FILE *fp = fopen(filename, "rb"); - matrix X = make_matrix(3363059, 361); - matrix y = make_matrix(3363059, 361); - int row, col; - - if(!fp) file_error(filename); - char *label; - int count = 0; - while((label = fgetl(fp))){ - int i; - if(count == X.rows){ - X = resize_matrix(X, count*2); - y = resize_matrix(y, count*2); - } - sscanf(label, "%d %d", &row, &col); - char *board = fgetl(fp); - - int index = row*19 + col; - y.vals[count][index] = 1; - - for(i = 0; i < 19*19; ++i){ - float val = 0; - if(board[i] == '1') val = 1; - else if(board[i] == '2') val = -1; - X.vals[count][i] = val; - } - ++count; - free(label); - free(board); - } - X = resize_matrix(X, count); - y = resize_matrix(y, count); - - data d = {0}; - d.shallow = 0; - d.X = X; - d.y = y; - - - fclose(fp); - - return d; -} - - -void randomize_data(data d) -{ - int i; - for(i = d.X.rows-1; i > 0; --i){ - int index = random_gen()%i; - float *swap = d.X.vals[index]; - d.X.vals[index] = d.X.vals[i]; - d.X.vals[i] = swap; - - swap = d.y.vals[index]; - d.y.vals[index] = d.y.vals[i]; - d.y.vals[i] = swap; - } -} - -void scale_data_rows(data d, float s) -{ - int i; - for(i = 0; i < d.X.rows; ++i){ - scale_array(d.X.vals[i], d.X.cols, s); - } -} - -void translate_data_rows(data d, float s) -{ - int i; - for(i = 0; i < d.X.rows; ++i){ - translate_array(d.X.vals[i], d.X.cols, s); - } -} - -void normalize_data_rows(data d) -{ - int i; - for(i = 0; i < d.X.rows; ++i){ - normalize_array(d.X.vals[i], d.X.cols); - } -} - -data get_data_part(data d, int part, int total) -{ - data p = {0}; - p.shallow = 1; - p.X.rows = d.X.rows * (part + 1) / total - d.X.rows * part / total; - p.y.rows = d.y.rows * (part + 1) / total - d.y.rows * part / total; - p.X.cols = d.X.cols; - p.y.cols = d.y.cols; - p.X.vals = d.X.vals + d.X.rows * part / total; - p.y.vals = d.y.vals + d.y.rows * part / total; - return p; -} - -data get_random_data(data d, int num) -{ - data r = {0}; - r.shallow = 1; - - r.X.rows = num; - r.y.rows = num; - - r.X.cols = d.X.cols; - r.y.cols = d.y.cols; - - r.X.vals = (float**)xcalloc(num, sizeof(float*)); - r.y.vals = (float**)xcalloc(num, sizeof(float*)); - - int i; - for(i = 0; i < num; ++i){ - int index = random_gen()%d.X.rows; - r.X.vals[i] = d.X.vals[index]; - r.y.vals[i] = d.y.vals[index]; - } - return r; -} - -data *split_data(data d, int part, int total) -{ - data* split = (data*)xcalloc(2, sizeof(data)); - int i; - int start = part*d.X.rows/total; - int end = (part+1)*d.X.rows/total; - data train ={0}; - data test ={0}; - train.shallow = test.shallow = 1; - - test.X.rows = test.y.rows = end-start; - train.X.rows = train.y.rows = d.X.rows - (end-start); - train.X.cols = test.X.cols = d.X.cols; - train.y.cols = test.y.cols = d.y.cols; - - train.X.vals = (float**)xcalloc(train.X.rows, sizeof(float*)); - test.X.vals = (float**)xcalloc(test.X.rows, sizeof(float*)); - train.y.vals = (float**)xcalloc(train.y.rows, sizeof(float*)); - test.y.vals = (float**)xcalloc(test.y.rows, sizeof(float*)); - - for(i = 0; i < start; ++i){ - train.X.vals[i] = d.X.vals[i]; - train.y.vals[i] = d.y.vals[i]; - } - for(i = start; i < end; ++i){ - test.X.vals[i-start] = d.X.vals[i]; - test.y.vals[i-start] = d.y.vals[i]; - } - for(i = end; i < d.X.rows; ++i){ - train.X.vals[i-(end-start)] = d.X.vals[i]; - train.y.vals[i-(end-start)] = d.y.vals[i]; - } - split[0] = train; - split[1] = test; - return split; -} diff --git a/src/Detector/darknet/src/data.h b/src/Detector/darknet/src/data.h deleted file mode 100644 index ecbd0188e..000000000 --- a/src/Detector/darknet/src/data.h +++ /dev/null @@ -1,125 +0,0 @@ -#ifndef DATA_H -#define DATA_H -#include - -#include "darknet.h" -#include "darknet.h" -#include "matrix.h" -#include "list.h" -#include "image.h" -#ifdef __cplusplus -extern "C" { -#endif -#include "tree.h" - -static inline float distance_from_edge(int x, int max) -{ - int dx = (max/2) - x; - if (dx < 0) dx = -dx; - dx = (max/2) + 1 - dx; - dx *= 2; - float dist = (float)dx/max; - if (dist > 1) dist = 1; - return dist; -} - -//typedef struct{ -// int w, h; -// matrix X; -// matrix y; -// int shallow; -// int *num_boxes; -// box **boxes; -//} data; - -//typedef enum { -// CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, LETTERBOX_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA -//} data_type; -/* -typedef struct load_args{ - int threads; - char **paths; - char *path; - int n; - int m; - char **labels; - int h; - int w; - int c; // color depth - int out_w; - int out_h; - int nh; - int nw; - int num_boxes; - int min, max, size; - int classes; - int background; - int scale; - int small_object; - float jitter; - int flip; - float angle; - float aspect; - float saturation; - float exposure; - float hue; - data *d; - image *im; - image *resized; - data_type type; - tree *hierarchy; -} load_args; - -typedef struct{ - int id; - float x,y,w,h; - float left, right, top, bottom; -} box_label; - -void free_data(data d); - -pthread_t load_data(load_args args); - -pthread_t load_data_in_thread(load_args args); -*/ -void print_letters(float *pred, int n); -data load_data_captcha(char **paths, int n, int m, int k, int w, int h); -data load_data_captcha_encode(char **paths, int n, int m, int w, int h); -data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); -data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int truth_size, int classes, int use_flip, int gaussian_noise, int use_blur, int use_mixup, - float jitter, float resize, float hue, float saturation, float exposure, int mini_batch, int track, int augment_speed, int letter_box, int mosaic_bound, int contrastive, int contrastive_jit_flip, int contrastive_color, int show_imgs); -data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure); -matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive); -data load_data_super(char **paths, int n, int m, int w, int h, int scale); -data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int w, int h, float angle, - float aspect, float hue, float saturation, float exposure, int use_mixup, int use_blur, int show_imgs, float label_smooth_eps, int dontuse_opencv, int contrastive); -data load_go(char *filename); - -box_label *read_boxes(char *filename, int *n); -data load_cifar10_data(char *filename); -data load_all_cifar10(); - -data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h); - -list *get_paths(char *filename); -char **get_labels(char *filename); -char **get_labels_custom(char *filename, int *size); -void get_random_batch(data d, int n, float *X, float *y); -data get_data_part(data d, int part, int total); -data get_random_data(data d, int num); -void get_next_batch(data d, int n, int offset, float *X, float *y); -data load_categorical_data_csv(char *filename, int target, int k); -void normalize_data_rows(data d); -void scale_data_rows(data d, float s); -void translate_data_rows(data d, float s); -void randomize_data(data d); -data *split_data(data d, int part, int total); -data concat_data(data d1, data d2); -data concat_datas(data *d, int n); -void fill_truth(char *path, char **labels, int k, float *truth); -void fill_truth_smooth(char *path, char **labels, int k, float *truth, float label_smooth_eps); -#ifdef __cplusplus -} - -#endif -#endif diff --git a/src/Detector/darknet/src/deconvolutional_kernels.cu b/src/Detector/darknet/src/deconvolutional_kernels.cu deleted file mode 100644 index 6af65ebbd..000000000 --- a/src/Detector/darknet/src/deconvolutional_kernels.cu +++ /dev/null @@ -1,106 +0,0 @@ -#include -#include -#include - -#include "convolutional_layer.h" -#include "deconvolutional_layer.h" -#include "gemm.h" -#include "blas.h" -#include "im2col.h" -#include "col2im.h" -#include "utils.h" -#include "dark_cuda.h" - -extern "C" void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) -{ - int i; - int out_h = deconvolutional_out_height(layer); - int out_w = deconvolutional_out_width(layer); - int size = out_h*out_w; - - int m = layer.size*layer.size*layer.n; - int n = layer.h*layer.w; - int k = layer.c; - - fill_ongpu(layer.outputs*layer.batch, 0, layer.output_gpu, 1); - - for(i = 0; i < layer.batch; ++i){ - float *a = layer.weights_gpu; - float *b = state.input + i*layer.c*layer.h*layer.w; - float *c = layer.col_image_gpu; - - gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); - - col2im_ongpu(c, layer.n, out_h, out_w, layer.size, layer.stride, 0, layer.output_gpu+i*layer.n*size); - } - add_bias_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, size); - activate_array(layer.output_gpu, layer.batch*layer.n*size, layer.activation); -} - -extern "C" void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state) -{ - float alpha = 1./layer.batch; - int out_h = deconvolutional_out_height(layer); - int out_w = deconvolutional_out_width(layer); - int size = out_h*out_w; - int i; - - gradient_array(layer.output_gpu, size*layer.n*layer.batch, layer.activation, layer.delta_gpu); - backward_bias(layer.bias_updates_gpu, layer.delta, layer.batch, layer.n, size); - - if(state.delta) memset(state.delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); - - for(i = 0; i < layer.batch; ++i){ - int m = layer.c; - int n = layer.size*layer.size*layer.n; - int k = layer.h*layer.w; - - float *a = state.input + i*m*n; - float *b = layer.col_image_gpu; - float *c = layer.weight_updates_gpu; - - im2col_ongpu(layer.delta_gpu + i*layer.n*size, layer.n, out_h, out_w, - layer.size, layer.stride, 0, b); - gemm_ongpu(0,1,m,n,k,alpha,a,k,b,k,1,c,n); - - if(state.delta){ - int m = layer.c; - int n = layer.h*layer.w; - int k = layer.size*layer.size*layer.n; - - float *a = layer.weights_gpu; - float *b = layer.col_image_gpu; - float *c = state.delta + i*n*m; - - gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); - } - } -} - -extern "C" void pull_deconvolutional_layer(deconvolutional_layer layer) -{ - cuda_pull_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size); - cuda_pull_array(layer.biases_gpu, layer.biases, layer.n); - cuda_pull_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size); - cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); -} - -extern "C" void push_deconvolutional_layer(deconvolutional_layer layer) -{ - cuda_push_array(layer.weights_gpu, layer.weights, layer.c*layer.n*layer.size*layer.size); - cuda_push_array(layer.biases_gpu, layer.biases, layer.n); - cuda_push_array(layer.weight_updates_gpu, layer.weight_updates, layer.c*layer.n*layer.size*layer.size); - cuda_push_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); -} - -extern "C" void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay) -{ - int size = layer.size*layer.size*layer.c*layer.n; - - axpy_ongpu(layer.n, learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); - scal_ongpu(layer.n, momentum, layer.bias_updates_gpu, 1); - - axpy_ongpu(size, -decay, layer.weights_gpu, 1, layer.weight_updates_gpu, 1); - axpy_ongpu(size, learning_rate, layer.weight_updates_gpu, 1, layer.weights_gpu, 1); - scal_ongpu(size, momentum, layer.weight_updates_gpu, 1); -} diff --git a/src/Detector/darknet/src/deconvolutional_layer.c b/src/Detector/darknet/src/deconvolutional_layer.c deleted file mode 100644 index 4f4e4cc22..000000000 --- a/src/Detector/darknet/src/deconvolutional_layer.c +++ /dev/null @@ -1,203 +0,0 @@ -#include "deconvolutional_layer.h" -#include "convolutional_layer.h" -#include "utils.h" -#include "im2col.h" -#include "col2im.h" -#include "blas.h" -#include "gemm.h" -#include -#include - -int deconvolutional_out_height(deconvolutional_layer l) -{ - int h = l.stride*(l.h - 1) + l.size; - return h; -} - -int deconvolutional_out_width(deconvolutional_layer l) -{ - int w = l.stride*(l.w - 1) + l.size; - return w; -} - -int deconvolutional_out_size(deconvolutional_layer l) -{ - return deconvolutional_out_height(l) * deconvolutional_out_width(l); -} - -image get_deconvolutional_image(deconvolutional_layer l) -{ - int h,w,c; - h = deconvolutional_out_height(l); - w = deconvolutional_out_width(l); - c = l.n; - return float_to_image(w,h,c,l.output); -} - -image get_deconvolutional_delta(deconvolutional_layer l) -{ - int h,w,c; - h = deconvolutional_out_height(l); - w = deconvolutional_out_width(l); - c = l.n; - return float_to_image(w,h,c,l.delta); -} - -deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation) -{ - int i; - deconvolutional_layer l = { (LAYER_TYPE)0 }; - l.type = DECONVOLUTIONAL; - - l.h = h; - l.w = w; - l.c = c; - l.n = n; - l.batch = batch; - l.stride = stride; - l.size = size; - - l.weights = (float*)xcalloc(c * n * size * size, sizeof(float)); - l.weight_updates = (float*)xcalloc(c * n * size * size, sizeof(float)); - - l.biases = (float*)xcalloc(n, sizeof(float)); - l.bias_updates = (float*)xcalloc(n, sizeof(float)); - float scale = 1./sqrt(size*size*c); - for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal(); - for(i = 0; i < n; ++i){ - l.biases[i] = scale; - } - int out_h = deconvolutional_out_height(l); - int out_w = deconvolutional_out_width(l); - - l.out_h = out_h; - l.out_w = out_w; - l.out_c = n; - l.outputs = l.out_w * l.out_h * l.out_c; - l.inputs = l.w * l.h * l.c; - - l.col_image = (float*)xcalloc(h * w * size * size * n, sizeof(float)); - l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float)); - l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float)); - - l.forward = forward_deconvolutional_layer; - l.backward = backward_deconvolutional_layer; - l.update = update_deconvolutional_layer; - - #ifdef GPU - l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); - l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); - - l.biases_gpu = cuda_make_array(l.biases, n); - l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); - - l.col_image_gpu = cuda_make_array(l.col_image, h*w*size*size*n); - l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); - l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); - #endif - - l.activation = activation; - - fprintf(stderr, "Deconvolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); - - return l; -} - -void resize_deconvolutional_layer(deconvolutional_layer *l, int h, int w) -{ - l->h = h; - l->w = w; - int out_h = deconvolutional_out_height(*l); - int out_w = deconvolutional_out_width(*l); - - l->col_image = (float*)xrealloc(l->col_image, - out_h*out_w*l->size*l->size*l->c*sizeof(float)); - l->output = (float*)xrealloc(l->output, - l->batch*out_h * out_w * l->n*sizeof(float)); - l->delta = (float*)xrealloc(l->delta, - l->batch*out_h * out_w * l->n*sizeof(float)); - #ifdef GPU - cuda_free(l->col_image_gpu); - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - - l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c); - l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n); - l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n); - #endif -} - -void forward_deconvolutional_layer(const deconvolutional_layer l, network_state state) -{ - int i; - int out_h = deconvolutional_out_height(l); - int out_w = deconvolutional_out_width(l); - int size = out_h*out_w; - - int m = l.size*l.size*l.n; - int n = l.h*l.w; - int k = l.c; - - fill_cpu(l.outputs*l.batch, 0, l.output, 1); - - for(i = 0; i < l.batch; ++i){ - float *a = l.weights; - float *b = state.input + i*l.c*l.h*l.w; - float *c = l.col_image; - - gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); - - col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size); - } - add_bias(l.output, l.biases, l.batch, l.n, size); - activate_array(l.output, l.batch*l.n*size, l.activation); -} - -void backward_deconvolutional_layer(deconvolutional_layer l, network_state state) -{ - float alpha = 1./l.batch; - int out_h = deconvolutional_out_height(l); - int out_w = deconvolutional_out_width(l); - int size = out_h*out_w; - int i; - - gradient_array(l.output, size*l.n*l.batch, l.activation, l.delta); - backward_bias(l.bias_updates, l.delta, l.batch, l.n, size); - - for(i = 0; i < l.batch; ++i){ - int m = l.c; - int n = l.size*l.size*l.n; - int k = l.h*l.w; - - float *a = state.input + i*m*n; - float *b = l.col_image; - float *c = l.weight_updates; - - im2col_cpu(l.delta + i*l.n*size, l.n, out_h, out_w, - l.size, l.stride, 0, b); - gemm(0,1,m,n,k,alpha,a,k,b,k,1,c,n); - - if(state.delta){ - int m = l.c; - int n = l.h*l.w; - int k = l.size*l.size*l.n; - - float *a = l.weights; - float *b = l.col_image; - float *c = state.delta + i*n*m; - - gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); - } - } -} - -void update_deconvolutional_layer(deconvolutional_layer l, int skip, float learning_rate, float momentum, float decay) -{ - int size = l.size*l.size*l.c*l.n; - axpy_cpu(l.n, learning_rate, l.bias_updates, 1, l.biases, 1); - scal_cpu(l.n, momentum, l.bias_updates, 1); - - axpy_cpu(size, -decay, l.weights, 1, l.weight_updates, 1); - axpy_cpu(size, learning_rate, l.weight_updates, 1, l.weights, 1); - scal_cpu(size, momentum, l.weight_updates, 1); -} diff --git a/src/Detector/darknet/src/deconvolutional_layer.h b/src/Detector/darknet/src/deconvolutional_layer.h deleted file mode 100644 index bb15a4296..000000000 --- a/src/Detector/darknet/src/deconvolutional_layer.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef DECONVOLUTIONAL_LAYER_H -#define DECONVOLUTIONAL_LAYER_H - -#include "dark_cuda.h" -#include "image.h" -#include "activations.h" -#include "layer.h" -#include "network.h" - -typedef layer deconvolutional_layer; - -#ifdef __cplusplus -extern "C" { -#endif -#ifdef GPU -void forward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); -void backward_deconvolutional_layer_gpu(deconvolutional_layer layer, network_state state); -void update_deconvolutional_layer_gpu(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay); -void push_deconvolutional_layer(deconvolutional_layer layer); -void pull_deconvolutional_layer(deconvolutional_layer layer); -#endif - -deconvolutional_layer make_deconvolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, ACTIVATION activation); -void resize_deconvolutional_layer(deconvolutional_layer *layer, int h, int w); -void forward_deconvolutional_layer(const deconvolutional_layer layer, network_state state); -void update_deconvolutional_layer(deconvolutional_layer layer, int skip, float learning_rate, float momentum, float decay); -void backward_deconvolutional_layer(deconvolutional_layer layer, network_state state); - -image get_deconvolutional_image(deconvolutional_layer layer); -image get_deconvolutional_delta(deconvolutional_layer layer); -image get_deconvolutional_filter(deconvolutional_layer layer, int i); - -int deconvolutional_out_height(deconvolutional_layer layer); -int deconvolutional_out_width(deconvolutional_layer layer); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/demo.c b/src/Detector/darknet/src/demo.c deleted file mode 100644 index 08e06f196..000000000 --- a/src/Detector/darknet/src/demo.c +++ /dev/null @@ -1,429 +0,0 @@ -#include "network.h" -#include "detection_layer.h" -#include "region_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" -#include "image.h" -#include "demo.h" -#include "darknet.h" -#ifdef WIN32 -#include -#include "gettimeofday.h" -#else -#include -#endif - -#ifdef OPENCV - -#include "http_stream.h" - -static char **demo_names; -static image **demo_alphabet; -static int demo_classes; - -static int nboxes = 0; -static detection *dets = NULL; - -static network net; -static image in_s ; -static image det_s; - -static cap_cv *cap; -static float fps = 0; -static float demo_thresh = 0; -static int demo_ext_output = 0; -static long long int frame_id = 0; -static int demo_json_port = -1; - - -static int avg_frames; -static int demo_index = 0; -static mat_cv** cv_images; - -mat_cv* in_img; -mat_cv* det_img; -mat_cv* show_img; - -static volatile int flag_exit; -static int letter_box = 0; - -static const int thread_wait_ms = 1; -static volatile int run_fetch_in_thread = 0; -static volatile int run_detect_in_thread = 0; - - -void *fetch_in_thread(void *ptr) -{ - while (!custom_atomic_load_int(&flag_exit)) { - while (!custom_atomic_load_int(&run_fetch_in_thread)) { - if (custom_atomic_load_int(&flag_exit)) return 0; - this_thread_yield(); - } - int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream - if (letter_box) - in_s = get_image_from_stream_letterbox(cap, net.w, net.h, net.c, &in_img, dont_close_stream); - else - in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, dont_close_stream); - if (!in_s.data) { - printf("Stream closed.\n"); - custom_atomic_store_int(&flag_exit, 1); - custom_atomic_store_int(&run_fetch_in_thread, 0); - //exit(EXIT_FAILURE); - return 0; - } - //in_s = resize_image(in, net.w, net.h); - - custom_atomic_store_int(&run_fetch_in_thread, 0); - } - return 0; -} - -void *fetch_in_thread_sync(void *ptr) -{ - custom_atomic_store_int(&run_fetch_in_thread, 1); - while (custom_atomic_load_int(&run_fetch_in_thread)) this_thread_sleep_for(thread_wait_ms); - return 0; -} - -void *detect_in_thread(void *ptr) -{ - while (!custom_atomic_load_int(&flag_exit)) { - while (!custom_atomic_load_int(&run_detect_in_thread)) { - if (custom_atomic_load_int(&flag_exit)) return 0; - this_thread_yield(); - } - - layer l = net.layers[net.n - 1]; - float *X = det_s.data; - float *prediction = network_predict(net, X); - - cv_images[demo_index] = det_img; - det_img = cv_images[(demo_index + avg_frames / 2 + 1) % avg_frames]; - demo_index = (demo_index + 1) % avg_frames; - - if (letter_box) - dets = get_network_boxes(&net, get_width_mat(in_img), get_height_mat(in_img), demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box - else - dets = get_network_boxes(&net, net.w, net.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized - - //const float nms = .45; - //if (nms) { - // if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - // else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - //} - - custom_atomic_store_int(&run_detect_in_thread, 0); - } - - return 0; -} - -void *detect_in_thread_sync(void *ptr) -{ - custom_atomic_store_int(&run_detect_in_thread, 1); - while (custom_atomic_load_int(&run_detect_in_thread)) this_thread_sleep_for(thread_wait_ms); - return 0; -} - -double get_wall_time() -{ - struct timeval walltime; - if (gettimeofday(&walltime, NULL)) { - return 0; - } - return (double)walltime.tv_sec + (double)walltime.tv_usec * .000001; -} - -void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, - int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, - int benchmark, int benchmark_layers) -{ - if (avgframes < 1) avgframes = 1; - avg_frames = avgframes; - letter_box = letter_box_in; - in_img = det_img = show_img = NULL; - //skip = frame_skip; - image **alphabet = load_alphabet(); - int delay = frame_skip; - demo_names = names; - demo_alphabet = alphabet; - demo_classes = classes; - demo_thresh = thresh; - demo_ext_output = ext_output; - demo_json_port = json_port; - printf("Demo\n"); - net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if(weightfile){ - load_weights(&net, weightfile); - } - if (net.letter_box) letter_box = 1; - net.benchmark_layers = benchmark_layers; - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - srand(2222222); - - if(filename){ - printf("video file: %s\n", filename); - cap = get_capture_video_stream(filename); - }else{ - printf("Webcam index: %d\n", cam_index); - cap = get_capture_webcam(cam_index); - } - - if (!cap) { -#ifdef WIN32 - printf("Check that you have copied file opencv_ffmpeg340_64.dll to the same directory where is darknet.exe \n"); -#endif - error("Couldn't connect to webcam.\n"); - } - - layer l = net.layers[net.n-1]; - int j; - - cv_images = (mat_cv**)xcalloc(avg_frames, sizeof(mat_cv)); - - int i; - for (i = 0; i < net.n; ++i) { - layer lc = net.layers[i]; - if (lc.type == YOLO) { - lc.mean_alpha = 1.0 / avg_frames; - l = lc; - } - } - - if (l.classes != demo_classes) { - printf("\n Parameters don't match: in cfg-file classes=%d, in data-file classes=%d \n", l.classes, demo_classes); - getchar(); - exit(0); - } - - flag_exit = 0; - - custom_thread_t fetch_thread = NULL; - custom_thread_t detect_thread = NULL; - if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); - if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); - - fetch_in_thread_sync(0); //fetch_in_thread(0); - det_img = in_img; - det_s = in_s; - - fetch_in_thread_sync(0); //fetch_in_thread(0); - detect_in_thread_sync(0); //fetch_in_thread(0); - det_img = in_img; - det_s = in_s; - - for (j = 0; j < avg_frames / 2; ++j) { - free_detections(dets, nboxes); - fetch_in_thread_sync(0); //fetch_in_thread(0); - detect_in_thread_sync(0); //fetch_in_thread(0); - det_img = in_img; - det_s = in_s; - } - - int count = 0; - if(!prefix && !dont_show){ - int full_screen = 0; - create_window_cv("Demo", full_screen, 1352, 1013); - } - - - write_cv* output_video_writer = NULL; - if (out_filename && !flag_exit) - { - int src_fps = 25; - src_fps = get_stream_fps_cpp_cv(cap); - output_video_writer = - create_video_writer(out_filename, 'D', 'I', 'V', 'X', src_fps, get_width_mat(det_img), get_height_mat(det_img), 1); - - //'H', '2', '6', '4' - //'D', 'I', 'V', 'X' - //'M', 'J', 'P', 'G' - //'M', 'P', '4', 'V' - //'M', 'P', '4', '2' - //'X', 'V', 'I', 'D' - //'W', 'M', 'V', '2' - } - - int send_http_post_once = 0; - const double start_time_lim = get_time_point(); - double before = get_time_point(); - double start_time = get_time_point(); - float avg_fps = 0; - int frame_counter = 0; - int global_frame_counter = 0; - - while(1){ - ++count; - { - const float nms = .45; // 0.4F - int local_nboxes = nboxes; - detection *local_dets = dets; - this_thread_yield(); - - if (!benchmark) custom_atomic_store_int(&run_fetch_in_thread, 1); // if (custom_create_thread(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); - custom_atomic_store_int(&run_detect_in_thread, 1); // if (custom_create_thread(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); - - //if (nms) do_nms_obj(local_dets, local_nboxes, l.classes, nms); // bad results - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(local_dets, local_nboxes, l.classes, nms); - else diounms_sort(local_dets, local_nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - - if (l.embedding_size) set_track_id(local_dets, local_nboxes, demo_thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show); - - //printf("\033[2J"); - //printf("\033[1;1H"); - //printf("\nFPS:%.1f\n", fps); - printf("Objects:\n\n"); - - ++frame_id; - if (demo_json_port > 0) { - int timeout = 400000; - send_json(local_dets, local_nboxes, l.classes, demo_names, frame_id, demo_json_port, timeout); - } - - //char *http_post_server = "webhook.site/898bbd9b-0ddd-49cf-b81d-1f56be98d870"; - if (http_post_host && !send_http_post_once) { - int timeout = 3; // 3 seconds - int http_post_port = 80; // 443 https, 80 http - if (send_http_post_request(http_post_host, http_post_port, filename, - local_dets, nboxes, classes, names, frame_id, ext_output, timeout)) - { - if (time_limit_sec > 0) send_http_post_once = 1; - } - } - - if (!benchmark && !dontdraw_bbox) draw_detections_cv_v3(show_img, local_dets, local_nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes, demo_ext_output); - free_detections(local_dets, local_nboxes); - - printf("\nFPS:%.1f \t AVG_FPS:%.1f\n", fps, avg_fps); - - if(!prefix){ - if (!dont_show) { - const int each_frame = max_val_cmp(1, avg_fps / 60); - if(global_frame_counter % each_frame == 0) show_image_mat(show_img, "Demo"); - int c = wait_key_cv(1); - if (c == 10) { - if (frame_skip == 0) frame_skip = 60; - else if (frame_skip == 4) frame_skip = 0; - else if (frame_skip == 60) frame_skip = 4; - else frame_skip = 0; - } - else if (c == 27 || c == 1048603) // ESC - exit (OpenCV 2.x / 3.x) - { - flag_exit = 1; - } - } - }else{ - char buff[256]; - sprintf(buff, "%s_%08d.jpg", prefix, count); - if(show_img) save_cv_jpg(show_img, buff); - } - - // if you run it with param -mjpeg_port 8090 then open URL in your web-browser: http://localhost:8090 - if (mjpeg_port > 0 && show_img) { - int port = mjpeg_port; - int timeout = 400000; - int jpeg_quality = 40; // 1 - 100 - send_mjpeg(show_img, port, timeout, jpeg_quality); - } - - // save video file - if (output_video_writer && show_img) { - write_frame_cv(output_video_writer, show_img); - printf("\n cvWriteFrame \n"); - } - - while (custom_atomic_load_int(&run_detect_in_thread)) { - if(avg_fps > 180) this_thread_yield(); - else this_thread_sleep_for(thread_wait_ms); // custom_join(detect_thread, 0); - } - if (!benchmark) { - while (custom_atomic_load_int(&run_fetch_in_thread)) { - if (avg_fps > 180) this_thread_yield(); - else this_thread_sleep_for(thread_wait_ms); // custom_join(fetch_thread, 0); - } - free_image(det_s); - } - - if (time_limit_sec > 0 && (get_time_point() - start_time_lim)/1000000 > time_limit_sec) { - printf(" start_time_lim = %f, get_time_point() = %f, time spent = %f \n", start_time_lim, get_time_point(), get_time_point() - start_time_lim); - break; - } - - if (flag_exit == 1) break; - - if(delay == 0){ - if(!benchmark) release_mat(&show_img); - show_img = det_img; - } - det_img = in_img; - det_s = in_s; - } - --delay; - if(delay < 0){ - delay = frame_skip; - - //double after = get_wall_time(); - //float curr = 1./(after - before); - double after = get_time_point(); // more accurate time measurements - float curr = 1000000. / (after - before); - fps = fps*0.9 + curr*0.1; - before = after; - - float spent_time = (get_time_point() - start_time) / 1000000; - frame_counter++; - global_frame_counter++; - if (spent_time >= 3.0f) { - //printf(" spent_time = %f \n", spent_time); - avg_fps = frame_counter / spent_time; - frame_counter = 0; - start_time = get_time_point(); - } - } - } - printf("input video stream closed. \n"); - if (output_video_writer) { - release_video_writer(&output_video_writer); - printf("output_video_writer closed. \n"); - } - - this_thread_sleep_for(thread_wait_ms); - - custom_join(detect_thread, 0); - custom_join(fetch_thread, 0); - - // free memory - free_image(in_s); - free_detections(dets, nboxes); - - demo_index = (avg_frames + demo_index - 1) % avg_frames; - for (j = 0; j < avg_frames; ++j) { - release_mat(&cv_images[j]); - } - free(cv_images); - - free_ptrs((void **)names, net.layers[net.n - 1].classes); - - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); - free_network(net); - //cudaProfilerStop(); -} -#else -void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, - int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, - int benchmark, int benchmark_layers) -{ - fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); -} -#endif diff --git a/src/Detector/darknet/src/demo.h b/src/Detector/darknet/src/demo.h deleted file mode 100644 index 380b72fed..000000000 --- a/src/Detector/darknet/src/demo.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef DEMO_H -#define DEMO_H - -#include "image.h" -#ifdef __cplusplus -extern "C" { -#endif -void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int avgframes, - int frame_skip, char *prefix, char *out_filename, int mjpeg_port, int dontdraw_bbox, int json_port, int dont_show, int ext_output, int letter_box_in, int time_limit_sec, char *http_post_host, int benchmark, int benchmark_layers); -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/detection_layer.c b/src/Detector/darknet/src/detection_layer.c deleted file mode 100644 index 3c6528a9b..000000000 --- a/src/Detector/darknet/src/detection_layer.c +++ /dev/null @@ -1,315 +0,0 @@ -#include "detection_layer.h" -#include "activations.h" -#include "softmax_layer.h" -#include "blas.h" -#include "box.h" -#include "dark_cuda.h" -#include "utils.h" -#include -#include -#include -#include - -detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore) -{ - detection_layer l = { (LAYER_TYPE)0 }; - l.type = DETECTION; - - l.n = n; - l.batch = batch; - l.inputs = inputs; - l.classes = classes; - l.coords = coords; - l.rescore = rescore; - l.side = side; - l.w = side; - l.h = side; - assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs); - l.cost = (float*)xcalloc(1, sizeof(float)); - l.outputs = l.inputs; - l.truths = l.side*l.side*(1+l.coords+l.classes); - l.output = (float*)xcalloc(batch * l.outputs, sizeof(float)); - l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float)); - - l.forward = forward_detection_layer; - l.backward = backward_detection_layer; -#ifdef GPU - l.forward_gpu = forward_detection_layer_gpu; - l.backward_gpu = backward_detection_layer_gpu; - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); - l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); -#endif - - fprintf(stderr, "Detection Layer\n"); - srand(time(0)); - - return l; -} - -void forward_detection_layer(const detection_layer l, network_state state) -{ - int locations = l.side*l.side; - int i,j; - memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); - //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1); - int b; - if (l.softmax){ - for(b = 0; b < l.batch; ++b){ - int index = b*l.inputs; - for (i = 0; i < locations; ++i) { - int offset = i*l.classes; - softmax(l.output + index + offset, l.classes, 1, - l.output + index + offset, 1); - } - } - } - if(state.train){ - float avg_iou = 0; - float avg_cat = 0; - float avg_allcat = 0; - float avg_obj = 0; - float avg_anyobj = 0; - int count = 0; - *(l.cost) = 0; - int size = l.inputs * l.batch; - memset(l.delta, 0, size * sizeof(float)); - for (b = 0; b < l.batch; ++b){ - int index = b*l.inputs; - for (i = 0; i < locations; ++i) { - int truth_index = (b*locations + i)*(1+l.coords+l.classes); - int is_obj = state.truth[truth_index]; - for (j = 0; j < l.n; ++j) { - int p_index = index + locations*l.classes + i*l.n + j; - l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]); - *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2); - avg_anyobj += l.output[p_index]; - } - - int best_index = -1; - float best_iou = 0; - float best_rmse = 20; - - if (!is_obj){ - continue; - } - - int class_index = index + i*l.classes; - for(j = 0; j < l.classes; ++j) { - l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]); - *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2); - if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j]; - avg_allcat += l.output[class_index+j]; - } - - box truth = float_to_box(state.truth + truth_index + 1 + l.classes); - truth.x /= l.side; - truth.y /= l.side; - - for(j = 0; j < l.n; ++j){ - int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords; - box out = float_to_box(l.output + box_index); - out.x /= l.side; - out.y /= l.side; - - if (l.sqrt){ - out.w = out.w*out.w; - out.h = out.h*out.h; - } - - float iou = box_iou(out, truth); - //iou = 0; - float rmse = box_rmse(out, truth); - if(best_iou > 0 || iou > 0){ - if(iou > best_iou){ - best_iou = iou; - best_index = j; - } - }else{ - if(rmse < best_rmse){ - best_rmse = rmse; - best_index = j; - } - } - } - - if(l.forced){ - if(truth.w*truth.h < .1){ - best_index = 1; - }else{ - best_index = 0; - } - } - if(l.random && *(state.net.seen) < 64000){ - best_index = rand()%l.n; - } - - int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords; - int tbox_index = truth_index + 1 + l.classes; - - box out = float_to_box(l.output + box_index); - out.x /= l.side; - out.y /= l.side; - if (l.sqrt) { - out.w = out.w*out.w; - out.h = out.h*out.h; - } - float iou = box_iou(out, truth); - - //printf("%d,", best_index); - int p_index = index + locations*l.classes + i*l.n + best_index; - *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2); - *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2); - avg_obj += l.output[p_index]; - l.delta[p_index] = l.object_scale * (1.-l.output[p_index]); - - if(l.rescore){ - l.delta[p_index] = l.object_scale * (iou - l.output[p_index]); - } - - l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]); - l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]); - l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]); - l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]); - if(l.sqrt){ - l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]); - l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]); - } - - *(l.cost) += pow(1-iou, 2); - avg_iou += iou; - ++count; - } - } - - if(0){ - float* costs = (float*)xcalloc(l.batch * locations * l.n, sizeof(float)); - for (b = 0; b < l.batch; ++b) { - int index = b*l.inputs; - for (i = 0; i < locations; ++i) { - for (j = 0; j < l.n; ++j) { - int p_index = index + locations*l.classes + i*l.n + j; - costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index]; - } - } - } - int indexes[100]; - top_k(costs, l.batch*locations*l.n, 100, indexes); - float cutoff = costs[indexes[99]]; - for (b = 0; b < l.batch; ++b) { - int index = b*l.inputs; - for (i = 0; i < locations; ++i) { - for (j = 0; j < l.n; ++j) { - int p_index = index + locations*l.classes + i*l.n + j; - if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0; - } - } - } - free(costs); - } - - - *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - - - printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count); - //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0); - } -} - -void backward_detection_layer(const detection_layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); -} - -void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness) -{ - int i,j,n; - float *predictions = l.output; - //int per_cell = 5*num+classes; - for (i = 0; i < l.side*l.side; ++i){ - int row = i / l.side; - int col = i % l.side; - for(n = 0; n < l.n; ++n){ - int index = i*l.n + n; - int p_index = l.side*l.side*l.classes + i*l.n + n; - float scale = predictions[p_index]; - int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4; - boxes[index].x = (predictions[box_index + 0] + col) / l.side * w; - boxes[index].y = (predictions[box_index + 1] + row) / l.side * h; - boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w; - boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h; - for(j = 0; j < l.classes; ++j){ - int class_index = i*l.classes; - float prob = scale*predictions[class_index+j]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - if(only_objectness){ - probs[index][0] = scale; - } - } - } -} - -#ifdef GPU - -void forward_detection_layer_gpu(const detection_layer l, network_state state) -{ - if(!state.train){ - copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); - return; - } - - float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float)); - float *truth_cpu = 0; - if(state.truth){ - int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes); - truth_cpu = (float*)xcalloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - cuda_pull_array(state.input, in_cpu, l.batch*l.inputs); - network_state cpu_state = state; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - forward_detection_layer(l, cpu_state); - cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); - free(cpu_state.input); - if(cpu_state.truth) free(cpu_state.truth); -} - -void backward_detection_layer_gpu(detection_layer l, network_state state) -{ - axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1); - //copy_ongpu(l.batch*l.inputs, l.delta_gpu, 1, state.delta, 1); -} -#endif - -void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) -{ - int i, j, n; - float *predictions = l.output; - //int per_cell = 5*num+classes; - for (i = 0; i < l.side*l.side; ++i) { - int row = i / l.side; - int col = i % l.side; - for (n = 0; n < l.n; ++n) { - int index = i*l.n + n; - int p_index = l.side*l.side*l.classes + i*l.n + n; - float scale = predictions[p_index]; - int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4; - box b; - b.x = (predictions[box_index + 0] + col) / l.side * w; - b.y = (predictions[box_index + 1] + row) / l.side * h; - b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w; - b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h; - dets[index].bbox = b; - dets[index].objectness = scale; - for (j = 0; j < l.classes; ++j) { - int class_index = i*l.classes; - float prob = scale*predictions[class_index + j]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - } -} diff --git a/src/Detector/darknet/src/detection_layer.h b/src/Detector/darknet/src/detection_layer.h deleted file mode 100644 index f97bc39a8..000000000 --- a/src/Detector/darknet/src/detection_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef DETECTION_LAYER_H -#define DETECTION_LAYER_H - -#include "layer.h" -#include "network.h" - -typedef layer detection_layer; - -#ifdef __cplusplus -extern "C" { -#endif -detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore); -void forward_detection_layer(const detection_layer l, network_state state); -void backward_detection_layer(const detection_layer l, network_state state); -void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); -void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); - -#ifdef GPU -void forward_detection_layer_gpu(const detection_layer l, network_state state); -void backward_detection_layer_gpu(detection_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/detector.c b/src/Detector/darknet/src/detector.c deleted file mode 100644 index d9aeed3b9..000000000 --- a/src/Detector/darknet/src/detector.c +++ /dev/null @@ -1,2040 +0,0 @@ -#include -#include "darknet.h" -#include "network.h" -#include "region_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" -#include "demo.h" -#include "option_list.h" - -#ifndef __COMPAR_FN_T -#define __COMPAR_FN_T -typedef int (*__compar_fn_t)(const void*, const void*); -#ifdef __USE_GNU -typedef __compar_fn_t comparison_fn_t; -#endif -#endif - -#include "http_stream.h" - -int check_mistakes = 0; - -static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 }; - -void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers, char* chart_path) -{ - list *options = read_data_cfg(datacfg); - char *train_images = option_find_str(options, "train", "data/train.txt"); - char *valid_images = option_find_str(options, "valid", train_images); - char *backup_directory = option_find_str(options, "backup", "/backup/"); - - network net_map; - if (calc_map) { - FILE* valid_file = fopen(valid_images, "r"); - if (!valid_file) { - printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg); - getchar(); - exit(-1); - } - else fclose(valid_file); - - cuda_set_device(gpus[0]); - printf(" Prepare additional network for mAP calculation...\n"); - net_map = parse_network_cfg_custom(cfgfile, 1, 1); - net_map.benchmark_layers = benchmark_layers; - const int net_classes = net_map.layers[net_map.n - 1].classes; - - int k; // free memory unnecessary arrays - for (k = 0; k < net_map.n - 1; ++k) free_layer_custom(net_map.layers[k], 1); - - char *name_list = option_find_str(options, "names", "data/names.list"); - int names_size = 0; - char **names = get_labels_custom(name_list, &names_size); - if (net_classes != names_size) { - printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", - name_list, names_size, net_classes, cfgfile); - if (net_classes > names_size) getchar(); - } - free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes); - } - - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - float avg_contrastive_acc = 0; - network* nets = (network*)xcalloc(ngpus, sizeof(network)); - - srand(time(0)); - int seed = rand(); - int k; - for (k = 0; k < ngpus; ++k) { - srand(seed); -#ifdef GPU - cuda_set_device(gpus[k]); -#endif - nets[k] = parse_network_cfg(cfgfile); - nets[k].benchmark_layers = benchmark_layers; - if (weightfile) { - load_weights(&nets[k], weightfile); - } - if (clear) { - *nets[k].seen = 0; - *nets[k].cur_iteration = 0; - } - nets[k].learning_rate *= ngpus; - } - srand(time(0)); - network net = nets[0]; - - const int actual_batch_size = net.batch * net.subdivisions; - if (actual_batch_size == 1) { - printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n"); - getchar(); - } - else if (actual_batch_size < 8) { - printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size); - } - - int imgs = net.batch * net.subdivisions * ngpus; - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - data train, buffer; - - layer l = net.layers[net.n - 1]; - for (k = 0; k < net.n; ++k) { - layer lk = net.layers[k]; - if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) { - l = lk; - printf(" Detection layer: %d - type = %d \n", k, l.type); - } - } - - int classes = l.classes; - - list *plist = get_paths(train_images); - int train_images_num = plist->size; - char **paths = (char **)list_to_array(plist); - - const int init_w = net.w; - const int init_h = net.h; - const int init_b = net.batch; - int iter_save, iter_save_last, iter_map; - iter_save = get_current_iteration(net); - iter_save_last = get_current_iteration(net); - iter_map = get_current_iteration(net); - float mean_average_precision = -1; - float best_map = mean_average_precision; - - load_args args = { 0 }; - args.w = net.w; - args.h = net.h; - args.c = net.c; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.classes = classes; - args.flip = net.flip; - args.jitter = l.jitter; - args.resize = l.resize; - args.num_boxes = l.max_boxes; - args.truth_size = l.truth_size; - net.num_boxes = args.num_boxes; - net.train_images_num = train_images_num; - args.d = &buffer; - args.type = DETECTION_DATA; - args.threads = 64; // 16 or 64 - - args.angle = net.angle; - args.gaussian_noise = net.gaussian_noise; - args.blur = net.blur; - args.mixup = net.mixup; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - args.letter_box = net.letter_box; - args.mosaic_bound = net.mosaic_bound; - args.contrastive = net.contrastive; - args.contrastive_jit_flip = net.contrastive_jit_flip; - args.contrastive_color = net.contrastive_color; - if (dont_show && show_imgs) show_imgs = 2; - args.show_imgs = show_imgs; - -#ifdef OPENCV - //int num_threads = get_num_threads(); - //if(num_threads > 2) args.threads = get_num_threads() - 2; - args.threads = 6 * ngpus; // 3 for - Amazon EC2 Tesla V100: p3.2xlarge (8 logical cores) - p3.16xlarge - //args.threads = 12 * ngpus; // Ryzen 7 2700X (16 logical cores) - mat_cv* img = NULL; - float max_img_loss = net.max_chart_loss; - int number_of_lines = 100; - int img_size = 1000; - char windows_name[100]; - sprintf(windows_name, "chart_%s.png", base); - img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show, chart_path); -#endif //OPENCV - if (net.contrastive && args.threads > net.batch/2) args.threads = net.batch / 2; - if (net.track) { - args.track = net.track; - args.augment_speed = net.augment_speed; - if (net.sequential_subdivisions) args.threads = net.sequential_subdivisions * ngpus; - else args.threads = net.subdivisions * ngpus; - args.mini_batch = net.batch / net.time_steps; - printf("\n Tracking! batch = %d, subdiv = %d, time_steps = %d, mini_batch = %d \n", net.batch, net.subdivisions, net.time_steps, args.mini_batch); - } - //printf(" imgs = %d \n", imgs); - - pthread_t load_thread = load_data(args); - - int count = 0; - double time_remaining, avg_time = -1, alpha_time = 0.01; - - //while(i*imgs < N*120){ - while (get_current_iteration(net) < net.max_batches) { - if (l.random && count++ % 10 == 0) { - float rand_coef = 1.4; - if (l.random != 1.0) rand_coef = l.random; - printf("Resizing, random_coef = %.2f \n", rand_coef); - float random_val = rand_scale(rand_coef); // *x or /x - int dim_w = roundl(random_val*init_w / net.resize_step + 1) * net.resize_step; - int dim_h = roundl(random_val*init_h / net.resize_step + 1) * net.resize_step; - if (random_val < 1 && (dim_w > init_w || dim_h > init_h)) dim_w = init_w, dim_h = init_h; - - int max_dim_w = roundl(rand_coef*init_w / net.resize_step + 1) * net.resize_step; - int max_dim_h = roundl(rand_coef*init_h / net.resize_step + 1) * net.resize_step; - - // at the beginning (check if enough memory) and at the end (calc rolling mean/variance) - if (avg_loss < 0 || get_current_iteration(net) > net.max_batches - 100) { - dim_w = max_dim_w; - dim_h = max_dim_h; - } - - if (dim_w < net.resize_step) dim_w = net.resize_step; - if (dim_h < net.resize_step) dim_h = net.resize_step; - int dim_b = (init_b * max_dim_w * max_dim_h) / (dim_w * dim_h); - int new_dim_b = (int)(dim_b * 0.8); - if (new_dim_b > init_b) dim_b = new_dim_b; - - args.w = dim_w; - args.h = dim_h; - - int k; - if (net.dynamic_minibatch) { - for (k = 0; k < ngpus; ++k) { - (*nets[k].seen) = init_b * net.subdivisions * get_current_iteration(net); // remove this line, when you will save to weights-file both: seen & cur_iteration - nets[k].batch = dim_b; - int j; - for (j = 0; j < nets[k].n; ++j) - nets[k].layers[j].batch = dim_b; - } - net.batch = dim_b; - imgs = net.batch * net.subdivisions * ngpus; - args.n = imgs; - printf("\n %d x %d (batch = %d) \n", dim_w, dim_h, net.batch); - } - else - printf("\n %d x %d \n", dim_w, dim_h); - - pthread_join(load_thread, 0); - train = buffer; - free_data(train); - load_thread = load_data(args); - - for (k = 0; k < ngpus; ++k) { - resize_network(nets + k, dim_w, dim_h); - } - net = nets[0]; - } - double time = what_time_is_it_now(); - pthread_join(load_thread, 0); - train = buffer; - if (net.track) { - net.sequential_subdivisions = get_current_seq_subdivisions(net); - args.threads = net.sequential_subdivisions * ngpus; - printf(" sequential_subdivisions = %d, sequence = %d \n", net.sequential_subdivisions, get_sequence_value(net)); - } - load_thread = load_data(args); - //wait_key_cv(500); - - /* - int k; - for(k = 0; k < l.max_boxes; ++k){ - box b = float_to_box(train.y.vals[10] + 1 + k*5); - if(!b.x) break; - printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h); - } - image im = float_to_image(448, 448, 3, train.X.vals[10]); - int k; - for(k = 0; k < l.max_boxes; ++k){ - box b = float_to_box(train.y.vals[10] + 1 + k*5); - printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h); - draw_bbox(im, b, 8, 1,0,0); - } - save_image(im, "truth11"); - */ - - const double load_time = (what_time_is_it_now() - time); - printf("Loaded: %lf seconds", load_time); - if (load_time > 0.1 && avg_loss > 0) printf(" - performance bottleneck on CPU or Disk HDD/SSD"); - printf("\n"); - - time = what_time_is_it_now(); - float loss = 0; -#ifdef GPU - if (ngpus == 1) { - int wait_key = (dont_show) ? 0 : 1; - loss = train_network_waitkey(net, train, wait_key); - } - else { - loss = train_networks(nets, ngpus, train, 4); - } -#else - loss = train_network(net, train); -#endif - if (avg_loss < 0 || avg_loss != avg_loss) avg_loss = loss; // if(-inf or nan) - avg_loss = avg_loss*.9 + loss*.1; - - const int iteration = get_current_iteration(net); - //i = get_current_batch(net); - - int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions); // calculate mAP for each 4 Epochs - calc_map_for_each = fmax(calc_map_for_each, 100); - int next_map_calc = iter_map + calc_map_for_each; - next_map_calc = fmax(next_map_calc, net.burn_in); - //next_map_calc = fmax(next_map_calc, 400); - if (calc_map) { - printf("\n (next mAP calculation at %d iterations) ", next_map_calc); - if (mean_average_precision > 0) printf("\n Last accuracy mAP@0.5 = %2.2f %%, best = %2.2f %% ", mean_average_precision * 100, best_map * 100); - } - - if (net.cudnn_half) { - if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.\n", 3 * net.burn_in); - else fprintf(stderr, "\n Tensor Cores are used.\n"); - fflush(stderr); - } - printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images, %f hours left\n", iteration, loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), iteration*imgs, avg_time); - fflush(stdout); - - int draw_precision = 0; - if (calc_map && (iteration >= next_map_calc || iteration == net.max_batches)) { - if (l.random) { - printf("Resizing to initial size: %d x %d ", init_w, init_h); - args.w = init_w; - args.h = init_h; - int k; - if (net.dynamic_minibatch) { - for (k = 0; k < ngpus; ++k) { - for (k = 0; k < ngpus; ++k) { - nets[k].batch = init_b; - int j; - for (j = 0; j < nets[k].n; ++j) - nets[k].layers[j].batch = init_b; - } - } - net.batch = init_b; - imgs = init_b * net.subdivisions * ngpus; - args.n = imgs; - printf("\n %d x %d (batch = %d) \n", init_w, init_h, init_b); - } - pthread_join(load_thread, 0); - free_data(train); - train = buffer; - load_thread = load_data(args); - for (k = 0; k < ngpus; ++k) { - resize_network(nets + k, init_w, init_h); - } - net = nets[0]; - } - - copy_weights_net(net, &net_map); - - // combine Training and Validation networks - //network net_combined = combine_train_valid_networks(net, net_map); - - iter_map = iteration; - mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, 0.25, 0.5, 0, net.letter_box, &net_map);// &net_combined); - printf("\n mean_average_precision (mAP@0.5) = %f \n", mean_average_precision); - if (mean_average_precision > best_map) { - best_map = mean_average_precision; - printf("New best mAP!\n"); - char buff[256]; - sprintf(buff, "%s/%s_best.weights", backup_directory, base); - save_weights(net, buff); - } - - draw_precision = 1; - } - time_remaining = ((net.max_batches - iteration) / ngpus)*(what_time_is_it_now() - time + load_time) / 60 / 60; - // set initial value, even if resume training from 10000 iteration - if (avg_time < 0) avg_time = time_remaining; - else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time; -#ifdef OPENCV - if (net.contrastive) { - float cur_con_acc = -1; - for (k = 0; k < net.n; ++k) - if (net.layers[k].type == CONTRASTIVE) cur_con_acc = *net.layers[k].loss; - if (cur_con_acc >= 0) avg_contrastive_acc = avg_contrastive_acc*0.99 + cur_con_acc * 0.01; - printf(" avg_contrastive_acc = %f \n", avg_contrastive_acc); - } - draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, "mAP%", avg_contrastive_acc / 100, dont_show, mjpeg_port, avg_time); -#endif // OPENCV - - //if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) { - //if (i % 100 == 0) { - if (iteration >= (iter_save + 10000) || iteration % 10000 == 0) { - iter_save = iteration; -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, iteration); - save_weights(net, buff); - } - - if (iteration >= (iter_save_last + 100) || (iteration % 100 == 0 && iteration > 1)) { - iter_save_last = iteration; -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_last.weights", backup_directory, base); - save_weights(net, buff); - - if (net.ema_alpha && is_ema_initialized(net)) { - sprintf(buff, "%s/%s_ema.weights", backup_directory, base); - save_weights_upto(net, buff, net.n, 1); - printf(" EMA weights are saved to the file: %s \n", buff); - } - } - free_data(train); - } -#ifdef GPU - if (ngpus != 1) sync_nets(nets, ngpus, 0); -#endif - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); - printf("If you want to train from the beginning, then use flag in the end of training command: -clear \n"); - -#ifdef OPENCV - release_mat(&img); - destroy_all_windows_cv(); -#endif - - // free memory - pthread_join(load_thread, 0); - free_data(buffer); - - free_load_threads(&args); - - free(base); - free(paths); - free_list_contents(plist); - free_list(plist); - - free_list_contents_kvp(options); - free_list(options); - - for (k = 0; k < ngpus; ++k) free_network(nets[k]); - free(nets); - //free_network(net); - - if (calc_map) { - net_map.n = 0; - free_network(net_map); - } -} - - -static int get_coco_image_id(char *filename) -{ - char *p = strrchr(filename, '/'); - char *c = strrchr(filename, '_'); - if (c) p = c; - return atoi(p + 1); -} - -static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) -{ - int i, j; - //int image_id = get_coco_image_id(image_path); - char *p = basecfg(image_path); - int image_id = atoi(p); - for (i = 0; i < num_boxes; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - float bx = xmin; - float by = ymin; - float bw = xmax - xmin; - float bh = ymax - ymin; - - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j] > 0) { - char buff[1024]; - sprintf(buff, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f},\n", image_id, coco_ids[j], bx, by, bw, bh, dets[i].prob[j]); - fprintf(fp, buff); - //printf("%s", buff); - } - } - } -} - -void print_detector_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h) -{ - int i, j; - for (i = 0; i < total; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2. + 1; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2. + 1; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2. + 1; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2. + 1; - - if (xmin < 1) xmin = 1; - if (ymin < 1) ymin = 1; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, dets[i].prob[j], - xmin, ymin, xmax, ymax); - } - } -} - -void print_imagenet_detections(FILE *fp, int id, detection *dets, int total, int classes, int w, int h) -{ - int i, j; - for (i = 0; i < total; ++i) { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - for (j = 0; j < classes; ++j) { - int myclass = j; - if (dets[i].prob[myclass] > 0) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j + 1, dets[i].prob[myclass], - xmin, ymin, xmax, ymax); - } - } -} - -static void print_kitti_detections(FILE **fps, char *id, detection *dets, int total, int classes, int w, int h, char *outfile, char *prefix) -{ - char *kitti_ids[] = { "car", "pedestrian", "cyclist" }; - FILE *fpd = 0; - char buffd[1024]; - snprintf(buffd, 1024, "%s/%s/data/%s.txt", prefix, outfile, id); - - fpd = fopen(buffd, "w"); - int i, j; - for (i = 0; i < total; ++i) - { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - for (j = 0; j < classes; ++j) - { - //if (dets[i].prob[j]) fprintf(fpd, "%s 0 0 0 %f %f %f %f -1 -1 -1 -1 0 0 0 %f\n", kitti_ids[j], xmin, ymin, xmax, ymax, dets[i].prob[j]); - if (dets[i].prob[j]) fprintf(fpd, "%s -1 -1 -10 %f %f %f %f -1 -1 -1 -1000 -1000 -1000 -10 %f\n", kitti_ids[j], xmin, ymin, xmax, ymax, dets[i].prob[j]); - } - } - fclose(fpd); -} - -static void eliminate_bdd(char *buf, char *a) -{ - int n = 0; - int i, k; - for (i = 0; buf[i] != '\0'; i++) - { - if (buf[i] == a[n]) - { - k = i; - while (buf[i] == a[n]) - { - if (a[++n] == '\0') - { - for (k; buf[k + n] != '\0'; k++) - { - buf[k] = buf[k + n]; - } - buf[k] = '\0'; - break; - } - i++; - } - n = 0; i--; - } - } -} - -static void get_bdd_image_id(char *filename) -{ - char *p = strrchr(filename, '/'); - eliminate_bdd(p, ".jpg"); - eliminate_bdd(p, "/"); - strcpy(filename, p); -} - -static void print_bdd_detections(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) -{ - char *bdd_ids[] = { "bike" , "bus" , "car" , "motor" ,"person", "rider", "traffic light", "traffic sign", "train", "truck" }; - get_bdd_image_id(image_path); - int i, j; - - for (i = 0; i < num_boxes; ++i) - { - float xmin = dets[i].bbox.x - dets[i].bbox.w / 2.; - float xmax = dets[i].bbox.x + dets[i].bbox.w / 2.; - float ymin = dets[i].bbox.y - dets[i].bbox.h / 2.; - float ymax = dets[i].bbox.y + dets[i].bbox.h / 2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - float bx1 = xmin; - float by1 = ymin; - float bx2 = xmax; - float by2 = ymax; - - for (j = 0; j < classes; ++j) - { - if (dets[i].prob[j]) - { - fprintf(fp, "\t{\n\t\t\"name\":\"%s\",\n\t\t\"category\":\"%s\",\n\t\t\"bbox\":[%f, %f, %f, %f],\n\t\t\"score\":%f\n\t},\n", image_path, bdd_ids[j], bx1, by1, bx2, by2, dets[i].prob[j]); - } - } - } -} - -void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *outfile) -{ - int j; - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.list"); - char *name_list = option_find_str(options, "names", "data/names.list"); - char *prefix = option_find_str(options, "results", "results"); - char **names = get_labels(name_list); - char *mapf = option_find_str(options, "map", 0); - int *map = 0; - if (mapf) map = read_map(mapf); - - network net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); - - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); - - layer l = net.layers[net.n - 1]; - int k; - for (k = 0; k < net.n; ++k) { - layer lk = net.layers[k]; - if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) { - l = lk; - printf(" Detection layer: %d - type = %d \n", k, l.type); - } - } - int classes = l.classes; - - char buff[1024]; - char *type = option_find_str(options, "eval", "voc"); - FILE *fp = 0; - FILE **fps = 0; - int coco = 0; - int imagenet = 0; - int bdd = 0; - int kitti = 0; - - if (0 == strcmp(type, "coco")) { - if (!outfile) outfile = "coco_results"; - snprintf(buff, 1024, "%s/%s.json", prefix, outfile); - fp = fopen(buff, "w"); - fprintf(fp, "[\n"); - coco = 1; - } - else if (0 == strcmp(type, "bdd")) { - if (!outfile) outfile = "bdd_results"; - snprintf(buff, 1024, "%s/%s.json", prefix, outfile); - fp = fopen(buff, "w"); - fprintf(fp, "[\n"); - bdd = 1; - } - else if (0 == strcmp(type, "kitti")) { - char buff2[1024]; - if (!outfile) outfile = "kitti_results"; - printf("%s\n", outfile); - snprintf(buff, 1024, "%s/%s", prefix, outfile); - int mkd = make_directory(buff, 0777); - snprintf(buff2, 1024, "%s/%s/data", prefix, outfile); - int mkd2 = make_directory(buff2, 0777); - kitti = 1; - } - else if (0 == strcmp(type, "imagenet")) { - if (!outfile) outfile = "imagenet-detection"; - snprintf(buff, 1024, "%s/%s.txt", prefix, outfile); - fp = fopen(buff, "w"); - imagenet = 1; - classes = 200; - } - else { - if (!outfile) outfile = "comp4_det_test_"; - fps = (FILE**) xcalloc(classes, sizeof(FILE *)); - for (j = 0; j < classes; ++j) { - snprintf(buff, 1024, "%s/%s%s.txt", prefix, outfile, names[j]); - fps[j] = fopen(buff, "w"); - } - } - - - int m = plist->size; - int i = 0; - int t; - - float thresh = .001; - float nms = .6; - - int nthreads = 4; - if (m < 4) nthreads = m; - image* val = (image*)xcalloc(nthreads, sizeof(image)); - image* val_resized = (image*)xcalloc(nthreads, sizeof(image)); - image* buf = (image*)xcalloc(nthreads, sizeof(image)); - image* buf_resized = (image*)xcalloc(nthreads, sizeof(image)); - pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t)); - - load_args args = { 0 }; - args.w = net.w; - args.h = net.h; - args.c = net.c; - args.type = IMAGE_DATA; - const int letter_box = net.letter_box; - if (letter_box) args.type = LETTERBOX_DATA; - - for (t = 0; t < nthreads; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for (i = nthreads; i < m + nthreads; i += nthreads) { - fprintf(stderr, "%d\n", i); - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for (t = 0; t < nthreads && i + t < m; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - char *path = paths[i + t - nthreads]; - char *id = basecfg(path); - float *X = val_resized[t].data; - network_predict(net, X); - int w = val[t].w; - int h = val[t].h; - int nboxes = 0; - detection *dets = get_network_boxes(&net, w, h, thresh, .5, map, 0, &nboxes, letter_box); - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - - if (coco) { - print_cocos(fp, path, dets, nboxes, classes, w, h); - } - else if (imagenet) { - print_imagenet_detections(fp, i + t - nthreads + 1, dets, nboxes, classes, w, h); - } - else if (bdd) { - print_bdd_detections(fp, path, dets, nboxes, classes, w, h); - } - else if (kitti) { - print_kitti_detections(fps, id, dets, nboxes, classes, w, h, outfile, prefix); - } - else { - print_detector_detections(fps, id, dets, nboxes, classes, w, h); - } - - free_detections(dets, nboxes); - free(id); - free_image(val[t]); - free_image(val_resized[t]); - } - } - if (fps) { - for (j = 0; j < classes; ++j) { - fclose(fps[j]); - } - free(fps); - } - if (coco) { -#ifdef WIN32 - fseek(fp, -3, SEEK_CUR); -#else - fseek(fp, -2, SEEK_CUR); -#endif - fprintf(fp, "\n]\n"); - } - - if (bdd) { -#ifdef WIN32 - fseek(fp, -3, SEEK_CUR); -#else - fseek(fp, -2, SEEK_CUR); -#endif - fprintf(fp, "\n]\n"); - fclose(fp); - } - - if (fp) fclose(fp); - - if (val) free(val); - if (val_resized) free(val_resized); - if (thr) free(thr); - if (buf) free(buf); - if (buf_resized) free(buf_resized); - - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)time(0) - start); -} - -void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - srand(time(0)); - - //list *plist = get_paths("data/coco_val_5k.list"); - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.txt"); - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); - - //layer l = net.layers[net.n - 1]; - - int j, k; - - int m = plist->size; - int i = 0; - - float thresh = .001; - float iou_thresh = .5; - float nms = .4; - - int total = 0; - int correct = 0; - int proposals = 0; - float avg_iou = 0; - - for (i = 0; i < m; ++i) { - char *path = paths[i]; - image orig = load_image(path, 0, 0, net.c); - image sized = resize_image(orig, net.w, net.h); - char *id = basecfg(path); - network_predict(net, sized.data); - int nboxes = 0; - int letterbox = 0; - detection *dets = get_network_boxes(&net, sized.w, sized.h, thresh, .5, 0, 1, &nboxes, letterbox); - if (nms) do_nms_obj(dets, nboxes, 1, nms); - - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - for (k = 0; k < nboxes; ++k) { - if (dets[k].objectness > thresh) { - ++proposals; - } - } - for (j = 0; j < num_labels; ++j) { - ++total; - box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; - float best_iou = 0; - for (k = 0; k < nboxes; ++k) { - float iou = box_iou(dets[k].bbox, t); - if (dets[k].objectness > thresh && iou > best_iou) { - best_iou = iou; - } - } - avg_iou += best_iou; - if (best_iou > iou_thresh) { - ++correct; - } - } - //fprintf(stderr, " %s - %s - ", paths[i], labelpath); - fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals / (i + 1), avg_iou * 100 / total, 100.*correct / total); - free(id); - free_image(orig); - free_image(sized); - } -} - -typedef struct { - box b; - float p; - int class_id; - int image_index; - int truth_flag; - int unique_truth_index; -} box_prob; - -int detections_comparator(const void *pa, const void *pb) -{ - box_prob a = *(const box_prob *)pa; - box_prob b = *(const box_prob *)pb; - float diff = a.p - b.p; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; -} - -float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, const int map_points, int letter_box, network *existing_net) -{ - int j; - list *options = read_data_cfg(datacfg); - char *valid_images = option_find_str(options, "valid", "data/train.txt"); - char *difficult_valid_images = option_find_str(options, "difficult", NULL); - char *name_list = option_find_str(options, "names", "data/names.list"); - int names_size = 0; - char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list); - //char *mapf = option_find_str(options, "map", 0); - //int *map = 0; - //if (mapf) map = read_map(mapf); - FILE* reinforcement_fd = NULL; - - network net; - //int initial_batch; - if (existing_net) { - char *train_images = option_find_str(options, "train", "data/train.txt"); - valid_images = option_find_str(options, "valid", train_images); - net = *existing_net; - remember_network_recurrent_state(*existing_net); - free_network_recurrent_state(*existing_net); - } - else { - net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - //set_batch_network(&net, 1); - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - } - if (net.layers[net.n - 1].classes != names_size) { - printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", - name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - getchar(); - } - srand(time(0)); - printf("\n calculation mAP (mean average precision)...\n"); - - list *plist = get_paths(valid_images); - char **paths = (char **)list_to_array(plist); - - char **paths_dif = NULL; - if (difficult_valid_images) { - list *plist_dif = get_paths(difficult_valid_images); - paths_dif = (char **)list_to_array(plist_dif); - } - - - layer l = net.layers[net.n - 1]; - int k; - for (k = 0; k < net.n; ++k) { - layer lk = net.layers[k]; - if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) { - l = lk; - printf(" Detection layer: %d - type = %d \n", k, l.type); - } - } - int classes = l.classes; - - int m = plist->size; - int i = 0; - int t; - - const float thresh = .005; - const float nms = .45; - //const float iou_thresh = 0.5; - - int nthreads = 4; - if (m < 4) nthreads = m; - image* val = (image*)xcalloc(nthreads, sizeof(image)); - image* val_resized = (image*)xcalloc(nthreads, sizeof(image)); - image* buf = (image*)xcalloc(nthreads, sizeof(image)); - image* buf_resized = (image*)xcalloc(nthreads, sizeof(image)); - pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t)); - - load_args args = { 0 }; - args.w = net.w; - args.h = net.h; - args.c = net.c; - letter_box = net.letter_box; - if (letter_box) args.type = LETTERBOX_DATA; - else args.type = IMAGE_DATA; - - //const float thresh_calc_avg_iou = 0.24; - float avg_iou = 0; - int tp_for_thresh = 0; - int fp_for_thresh = 0; - - box_prob* detections = (box_prob*)xcalloc(1, sizeof(box_prob)); - int detections_count = 0; - int unique_truth_count = 0; - - int* truth_classes_count = (int*)xcalloc(classes, sizeof(int)); - - // For multi-class precision and recall computation - float *avg_iou_per_class = (float*)xcalloc(classes, sizeof(float)); - int *tp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int)); - int *fp_for_thresh_per_class = (int*)xcalloc(classes, sizeof(int)); - - for (t = 0; t < nthreads; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for (i = nthreads; i < m + nthreads; i += nthreads) { - fprintf(stderr, "\r%d", i); - for (t = 0; t < nthreads && (i + t - nthreads) < m; ++t) { - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for (t = 0; t < nthreads && (i + t) < m; ++t) { - args.path = paths[i + t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for (t = 0; t < nthreads && i + t - nthreads < m; ++t) { - const int image_index = i + t - nthreads; - char *path = paths[image_index]; - char *id = basecfg(path); - float *X = val_resized[t].data; - network_predict(net, X); - - int nboxes = 0; - float hier_thresh = 0; - detection *dets; - if (args.type == LETTERBOX_DATA) { - dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); - } - else { - dets = get_network_boxes(&net, 1, 1, thresh, hier_thresh, 0, 0, &nboxes, letter_box); - } - //detection *dets = get_network_boxes(&net, val[t].w, val[t].h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); // for letter_box=1 - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - - //if (l.embedding_size) set_track_id(dets, nboxes, thresh, l.sim_thresh, l.track_ciou_norm, l.track_history_size, l.dets_for_track, l.dets_for_show); - - char labelpath[4096]; - replace_image_to_label(path, labelpath); - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - int j; - for (j = 0; j < num_labels; ++j) { - truth_classes_count[truth[j].id]++; - } - - // difficult - box_label *truth_dif = NULL; - int num_labels_dif = 0; - if (paths_dif) - { - char *path_dif = paths_dif[image_index]; - - char labelpath_dif[4096]; - replace_image_to_label(path_dif, labelpath_dif); - - truth_dif = read_boxes(labelpath_dif, &num_labels_dif); - } - - const int checkpoint_detections_count = detections_count; - - int i; - for (i = 0; i < nboxes; ++i) { - - int class_id; - for (class_id = 0; class_id < classes; ++class_id) { - float prob = dets[i].prob[class_id]; - if (prob > 0) { - detections_count++; - detections = (box_prob*)xrealloc(detections, detections_count * sizeof(box_prob)); - detections[detections_count - 1].b = dets[i].bbox; - detections[detections_count - 1].p = prob; - detections[detections_count - 1].image_index = image_index; - detections[detections_count - 1].class_id = class_id; - detections[detections_count - 1].truth_flag = 0; - detections[detections_count - 1].unique_truth_index = -1; - - int truth_index = -1; - float max_iou = 0; - for (j = 0; j < num_labels; ++j) - { - box t = { truth[j].x, truth[j].y, truth[j].w, truth[j].h }; - //printf(" IoU = %f, prob = %f, class_id = %d, truth[j].id = %d \n", - // box_iou(dets[i].bbox, t), prob, class_id, truth[j].id); - float current_iou = box_iou(dets[i].bbox, t); - if (current_iou > iou_thresh && class_id == truth[j].id) { - if (current_iou > max_iou) { - max_iou = current_iou; - truth_index = unique_truth_count + j; - } - } - } - - // best IoU - if (truth_index > -1) { - detections[detections_count - 1].truth_flag = 1; - detections[detections_count - 1].unique_truth_index = truth_index; - } - else { - // if object is difficult then remove detection - for (j = 0; j < num_labels_dif; ++j) { - box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h }; - float current_iou = box_iou(dets[i].bbox, t); - if (current_iou > iou_thresh && class_id == truth_dif[j].id) { - --detections_count; - break; - } - } - } - - // calc avg IoU, true-positives, false-positives for required Threshold - if (prob > thresh_calc_avg_iou) { - int z, found = 0; - for (z = checkpoint_detections_count; z < detections_count - 1; ++z) { - if (detections[z].unique_truth_index == truth_index) { - found = 1; break; - } - } - - if (truth_index > -1 && found == 0) { - avg_iou += max_iou; - ++tp_for_thresh; - avg_iou_per_class[class_id] += max_iou; - tp_for_thresh_per_class[class_id]++; - } - else{ - fp_for_thresh++; - fp_for_thresh_per_class[class_id]++; - } - } - } - } - } - - unique_truth_count += num_labels; - - //static int previous_errors = 0; - //int total_errors = fp_for_thresh + (unique_truth_count - tp_for_thresh); - //int errors_in_this_image = total_errors - previous_errors; - //previous_errors = total_errors; - //if(reinforcement_fd == NULL) reinforcement_fd = fopen("reinforcement.txt", "wb"); - //char buff[1000]; - //sprintf(buff, "%s\n", path); - //if(errors_in_this_image > 0) fwrite(buff, sizeof(char), strlen(buff), reinforcement_fd); - - free_detections(dets, nboxes); - free(id); - free_image(val[t]); - free_image(val_resized[t]); - } - } - - //for (t = 0; t < nthreads; ++t) { - // pthread_join(thr[t], 0); - //} - - if ((tp_for_thresh + fp_for_thresh) > 0) - avg_iou = avg_iou / (tp_for_thresh + fp_for_thresh); - - int class_id; - for(class_id = 0; class_id < classes; class_id++){ - if ((tp_for_thresh_per_class[class_id] + fp_for_thresh_per_class[class_id]) > 0) - avg_iou_per_class[class_id] = avg_iou_per_class[class_id] / (tp_for_thresh_per_class[class_id] + fp_for_thresh_per_class[class_id]); - } - - // SORT(detections) - qsort(detections, detections_count, sizeof(box_prob), detections_comparator); - - typedef struct { - double precision; - double recall; - int tp, fp, fn; - } pr_t; - - // for PR-curve - pr_t** pr = (pr_t**)xcalloc(classes, sizeof(pr_t*)); - for (i = 0; i < classes; ++i) { - pr[i] = (pr_t*)xcalloc(detections_count, sizeof(pr_t)); - } - printf("\n detections_count = %d, unique_truth_count = %d \n", detections_count, unique_truth_count); - - - int* detection_per_class_count = (int*)xcalloc(classes, sizeof(int)); - for (j = 0; j < detections_count; ++j) { - detection_per_class_count[detections[j].class_id]++; - } - - int* truth_flags = (int*)xcalloc(unique_truth_count, sizeof(int)); - - int rank; - for (rank = 0; rank < detections_count; ++rank) { - if (rank % 100 == 0) - printf(" rank = %d of ranks = %d \r", rank, detections_count); - - if (rank > 0) { - int class_id; - for (class_id = 0; class_id < classes; ++class_id) { - pr[class_id][rank].tp = pr[class_id][rank - 1].tp; - pr[class_id][rank].fp = pr[class_id][rank - 1].fp; - } - } - - box_prob d = detections[rank]; - // if (detected && isn't detected before) - if (d.truth_flag == 1) { - if (truth_flags[d.unique_truth_index] == 0) - { - truth_flags[d.unique_truth_index] = 1; - pr[d.class_id][rank].tp++; // true-positive - } else - pr[d.class_id][rank].fp++; - } - else { - pr[d.class_id][rank].fp++; // false-positive - } - - for (i = 0; i < classes; ++i) - { - const int tp = pr[i][rank].tp; - const int fp = pr[i][rank].fp; - const int fn = truth_classes_count[i] - tp; // false-negative = objects - true-positive - pr[i][rank].fn = fn; - - if ((tp + fp) > 0) pr[i][rank].precision = (double)tp / (double)(tp + fp); - else pr[i][rank].precision = 0; - - if ((tp + fn) > 0) pr[i][rank].recall = (double)tp / (double)(tp + fn); - else pr[i][rank].recall = 0; - - if (rank == (detections_count - 1) && detection_per_class_count[i] != (tp + fp)) { // check for last rank - printf(" class_id: %d - detections = %d, tp+fp = %d, tp = %d, fp = %d \n", i, detection_per_class_count[i], tp+fp, tp, fp); - } - } - } - - free(truth_flags); - - - double mean_average_precision = 0; - - for (i = 0; i < classes; ++i) { - double avg_precision = 0; - - // MS COCO - uses 101-Recall-points on PR-chart. - // PascalVOC2007 - uses 11-Recall-points on PR-chart. - // PascalVOC2010-2012 - uses Area-Under-Curve on PR-chart. - // ImageNet - uses Area-Under-Curve on PR-chart. - - // correct mAP calculation: ImageNet, PascalVOC 2010-2012 - if (map_points == 0) - { - double last_recall = pr[i][detections_count - 1].recall; - double last_precision = pr[i][detections_count - 1].precision; - for (rank = detections_count - 2; rank >= 0; --rank) - { - double delta_recall = last_recall - pr[i][rank].recall; - last_recall = pr[i][rank].recall; - - if (pr[i][rank].precision > last_precision) { - last_precision = pr[i][rank].precision; - } - - avg_precision += delta_recall * last_precision; - } - //add remaining area of PR curve when recall isn't 0 at rank-1 - double delta_recall = last_recall - 0; - avg_precision += delta_recall * last_precision; - } - // MSCOCO - 101 Recall-points, PascalVOC - 11 Recall-points - else - { - int point; - for (point = 0; point < map_points; ++point) { - double cur_recall = point * 1.0 / (map_points-1); - double cur_precision = 0; - for (rank = 0; rank < detections_count; ++rank) - { - if (pr[i][rank].recall >= cur_recall) { // > or >= - if (pr[i][rank].precision > cur_precision) { - cur_precision = pr[i][rank].precision; - } - } - } - //printf("class_id = %d, point = %d, cur_recall = %.4f, cur_precision = %.4f \n", i, point, cur_recall, cur_precision); - - avg_precision += cur_precision; - } - avg_precision = avg_precision / map_points; - } - - printf("class_id = %d, name = %s, ap = %2.2f%% \t (TP = %d, FP = %d) \n", - i, names[i], avg_precision * 100, tp_for_thresh_per_class[i], fp_for_thresh_per_class[i]); - - float class_precision = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)fp_for_thresh_per_class[i]); - float class_recall = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)(truth_classes_count[i] - tp_for_thresh_per_class[i])); - //printf("Precision = %1.2f, Recall = %1.2f, avg IOU = %2.2f%% \n\n", class_precision, class_recall, avg_iou_per_class[i]); - - mean_average_precision += avg_precision; - } - - const float cur_precision = (float)tp_for_thresh / ((float)tp_for_thresh + (float)fp_for_thresh); - const float cur_recall = (float)tp_for_thresh / ((float)tp_for_thresh + (float)(unique_truth_count - tp_for_thresh)); - const float f1_score = 2.F * cur_precision * cur_recall / (cur_precision + cur_recall); - printf("\n for conf_thresh = %1.2f, precision = %1.2f, recall = %1.2f, F1-score = %1.2f \n", - thresh_calc_avg_iou, cur_precision, cur_recall, f1_score); - - printf(" for conf_thresh = %0.2f, TP = %d, FP = %d, FN = %d, average IoU = %2.2f %% \n", - thresh_calc_avg_iou, tp_for_thresh, fp_for_thresh, unique_truth_count - tp_for_thresh, avg_iou * 100); - - mean_average_precision = mean_average_precision / classes; - printf("\n IoU threshold = %2.0f %%, ", iou_thresh * 100); - if (map_points) printf("used %d Recall-points \n", map_points); - else printf("used Area-Under-Curve for each unique Recall \n"); - - printf(" mean average precision (mAP@%0.2f) = %f, or %2.2f %% \n", iou_thresh, mean_average_precision, mean_average_precision * 100); - - for (i = 0; i < classes; ++i) { - free(pr[i]); - } - free(pr); - free(detections); - free(truth_classes_count); - free(detection_per_class_count); - - free(avg_iou_per_class); - free(tp_for_thresh_per_class); - free(fp_for_thresh_per_class); - - fprintf(stderr, "Total Detection Time: %d Seconds\n", (int)(time(0) - start)); - printf("\nSet -points flag:\n"); - printf(" `-points 101` for MS COCO \n"); - printf(" `-points 11` for PascalVOC 2007 (uncomment `difficult` in voc.data) \n"); - printf(" `-points 0` (AUC) for ImageNet, PascalVOC 2010-2012, your custom dataset\n"); - if (reinforcement_fd != NULL) fclose(reinforcement_fd); - - // free memory - free_ptrs((void**)names, net.layers[net.n - 1].classes); - free_list_contents_kvp(options); - free_list(options); - - if (existing_net) { - //set_batch_network(&net, initial_batch); - //free_network_recurrent_state(*existing_net); - restore_network_recurrent_state(*existing_net); - //randomize_network_recurrent_state(*existing_net); - } - else { - free_network(net); - } - if (val) free(val); - if (val_resized) free(val_resized); - if (thr) free(thr); - if (buf) free(buf); - if (buf_resized) free(buf_resized); - - return mean_average_precision; -} - -typedef struct { - float w, h; -} anchors_t; - -int anchors_comparator(const void *pa, const void *pb) -{ - anchors_t a = *(const anchors_t *)pa; - anchors_t b = *(const anchors_t *)pb; - float diff = b.w*b.h - a.w*a.h; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; -} - -int anchors_data_comparator(const float **pa, const float **pb) -{ - float *a = (float *)*pa; - float *b = (float *)*pb; - float diff = b[0] * b[1] - a[0] * a[1]; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; -} - - -void calc_anchors(char *datacfg, int num_of_clusters, int width, int height, int show) -{ - printf("\n num_of_clusters = %d, width = %d, height = %d \n", num_of_clusters, width, height); - if (width < 0 || height < 0) { - printf("Usage: darknet detector calc_anchors data/voc.data -num_of_clusters 9 -width 416 -height 416 \n"); - printf("Error: set width and height \n"); - return; - } - - //float pointsdata[] = { 1,1, 2,2, 6,6, 5,5, 10,10 }; - float* rel_width_height_array = (float*)xcalloc(1000, sizeof(float)); - - - list *options = read_data_cfg(datacfg); - char *train_images = option_find_str(options, "train", "data/train.list"); - list *plist = get_paths(train_images); - int number_of_images = plist->size; - char **paths = (char **)list_to_array(plist); - - int classes = option_find_int(options, "classes", 1); - int* counter_per_class = (int*)xcalloc(classes, sizeof(int)); - - srand(time(0)); - int number_of_boxes = 0; - printf(" read labels from %d images \n", number_of_images); - - int i, j; - for (i = 0; i < number_of_images; ++i) { - char *path = paths[i]; - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - //printf(" new path: %s \n", labelpath); - char *buff = (char*)xcalloc(6144, sizeof(char)); - for (j = 0; j < num_labels; ++j) - { - if (truth[j].x > 1 || truth[j].x <= 0 || truth[j].y > 1 || truth[j].y <= 0 || - truth[j].w > 1 || truth[j].w <= 0 || truth[j].h > 1 || truth[j].h <= 0) - { - printf("\n\nWrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f \n", - labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); - sprintf(buff, "echo \"Wrong label: %s - j = %d, x = %f, y = %f, width = %f, height = %f\" >> bad_label.list", - labelpath, j, truth[j].x, truth[j].y, truth[j].w, truth[j].h); - system(buff); - if (check_mistakes) getchar(); - } - if (truth[j].id >= classes) { - classes = truth[j].id + 1; - counter_per_class = (int*)xrealloc(counter_per_class, classes * sizeof(int)); - } - counter_per_class[truth[j].id]++; - - number_of_boxes++; - rel_width_height_array = (float*)xrealloc(rel_width_height_array, 2 * number_of_boxes * sizeof(float)); - - rel_width_height_array[number_of_boxes * 2 - 2] = truth[j].w * width; - rel_width_height_array[number_of_boxes * 2 - 1] = truth[j].h * height; - printf("\r loaded \t image: %d \t box: %d", i + 1, number_of_boxes); - } - free(buff); - } - printf("\n all loaded. \n"); - printf("\n calculating k-means++ ..."); - - matrix boxes_data; - model anchors_data; - boxes_data = make_matrix(number_of_boxes, 2); - - printf("\n"); - for (i = 0; i < number_of_boxes; ++i) { - boxes_data.vals[i][0] = rel_width_height_array[i * 2]; - boxes_data.vals[i][1] = rel_width_height_array[i * 2 + 1]; - //if (w > 410 || h > 410) printf("i:%d, w = %f, h = %f \n", i, w, h); - } - - // Is used: distance(box, centroid) = 1 - IoU(box, centroid) - - // K-means - anchors_data = do_kmeans(boxes_data, num_of_clusters); - - qsort((void*)anchors_data.centers.vals, num_of_clusters, 2 * sizeof(float), (__compar_fn_t)anchors_data_comparator); - - //gen_anchors.py = 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 - //float orig_anch[] = { 1.19, 1.99, 2.79, 4.60, 4.53, 8.92, 8.06, 5.29, 10.32, 10.66 }; - - printf("\n"); - float avg_iou = 0; - for (i = 0; i < number_of_boxes; ++i) { - float box_w = rel_width_height_array[i * 2]; //points->data.fl[i * 2]; - float box_h = rel_width_height_array[i * 2 + 1]; //points->data.fl[i * 2 + 1]; - //int cluster_idx = labels->data.i[i]; - int cluster_idx = 0; - float min_dist = FLT_MAX; - float best_iou = 0; - for (j = 0; j < num_of_clusters; ++j) { - float anchor_w = anchors_data.centers.vals[j][0]; // centers->data.fl[j * 2]; - float anchor_h = anchors_data.centers.vals[j][1]; // centers->data.fl[j * 2 + 1]; - float min_w = (box_w < anchor_w) ? box_w : anchor_w; - float min_h = (box_h < anchor_h) ? box_h : anchor_h; - float box_intersect = min_w*min_h; - float box_union = box_w*box_h + anchor_w*anchor_h - box_intersect; - float iou = box_intersect / box_union; - float distance = 1 - iou; - if (distance < min_dist) { - min_dist = distance; - cluster_idx = j; - best_iou = iou; - } - } - - float anchor_w = anchors_data.centers.vals[cluster_idx][0]; //centers->data.fl[cluster_idx * 2]; - float anchor_h = anchors_data.centers.vals[cluster_idx][1]; //centers->data.fl[cluster_idx * 2 + 1]; - if (best_iou > 1 || best_iou < 0) { // || box_w > width || box_h > height) { - printf(" Wrong label: i = %d, box_w = %f, box_h = %f, anchor_w = %f, anchor_h = %f, iou = %f \n", - i, box_w, box_h, anchor_w, anchor_h, best_iou); - } - else avg_iou += best_iou; - } - - char buff[1024]; - FILE* fwc = fopen("counters_per_class.txt", "wb"); - if (fwc) { - sprintf(buff, "counters_per_class = "); - printf("\n%s", buff); - fwrite(buff, sizeof(char), strlen(buff), fwc); - for (i = 0; i < classes; ++i) { - sprintf(buff, "%d", counter_per_class[i]); - printf("%s", buff); - fwrite(buff, sizeof(char), strlen(buff), fwc); - if (i < classes - 1) { - fwrite(", ", sizeof(char), 2, fwc); - printf(", "); - } - } - printf("\n"); - fclose(fwc); - } - else { - printf(" Error: file counters_per_class.txt can't be open \n"); - } - - avg_iou = 100 * avg_iou / number_of_boxes; - printf("\n avg IoU = %2.2f %% \n", avg_iou); - - - FILE* fw = fopen("anchors.txt", "wb"); - if (fw) { - printf("\nSaving anchors to the file: anchors.txt \n"); - printf("anchors = "); - for (i = 0; i < num_of_clusters; ++i) { - float anchor_w = anchors_data.centers.vals[i][0]; //centers->data.fl[i * 2]; - float anchor_h = anchors_data.centers.vals[i][1]; //centers->data.fl[i * 2 + 1]; - if (width > 32) sprintf(buff, "%3.0f,%3.0f", anchor_w, anchor_h); - else sprintf(buff, "%2.4f,%2.4f", anchor_w, anchor_h); - printf("%s", buff); - fwrite(buff, sizeof(char), strlen(buff), fw); - if (i + 1 < num_of_clusters) { - fwrite(", ", sizeof(char), 2, fw); - printf(", "); - } - } - printf("\n"); - fclose(fw); - } - else { - printf(" Error: file anchors.txt can't be open \n"); - } - - if (show) { -#ifdef OPENCV - show_acnhors(number_of_boxes, num_of_clusters, rel_width_height_array, anchors_data, width, height); -#endif // OPENCV - } - free(rel_width_height_array); - free(counter_per_class); - - getchar(); -} - - -void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, - float hier_thresh, int dont_show, int ext_output, int save_labels, char *outfile, int letter_box, int benchmark_layers) -{ - list *options = read_data_cfg(datacfg); - char *name_list = option_find_str(options, "names", "data/names.list"); - int names_size = 0; - char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list); - - image **alphabet = load_alphabet(); - network net = parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - if (weightfile) { - load_weights(&net, weightfile); - } - if (net.letter_box) letter_box = 1; - net.benchmark_layers = benchmark_layers; - fuse_conv_batchnorm(net); - calculate_binary_weights(net); - if (net.layers[net.n - 1].classes != names_size) { - printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", - name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - if (net.layers[net.n - 1].classes > names_size) getchar(); - } - srand(2222222); - char buff[256]; - char *input = buff; - char *json_buf = NULL; - int json_image_id = 0; - FILE* json_file = NULL; - if (outfile) { - json_file = fopen(outfile, "wb"); - if(!json_file) { - error("fopen failed"); - } - char *tmp = "[\n"; - fwrite(tmp, sizeof(char), strlen(tmp), json_file); - } - int j; - float nms = .45; // 0.4F - while (1) { - if (filename) { - strncpy(input, filename, 256); - if (strlen(input) > 0) - if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; - } - else { - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if (!input) break; - strtok(input, "\n"); - } - //image im; - //image sized = load_image_resize(input, net.w, net.h, net.c, &im); - image im = load_image(input, 0, 0, net.c); - image sized; - if(letter_box) sized = letterbox_image(im, net.w, net.h); - else sized = resize_image(im, net.w, net.h); - - layer l = net.layers[net.n - 1]; - int k; - for (k = 0; k < net.n; ++k) { - layer lk = net.layers[k]; - if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) { - l = lk; - printf(" Detection layer: %d - type = %d \n", k, l.type); - } - } - - //box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); - //float **probs = calloc(l.w*l.h*l.n, sizeof(float*)); - //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float)); - - float *X = sized.data; - - //time= what_time_is_it_now(); - double time = get_time_point(); - network_predict(net, X); - //network_predict_image(&net, im); letterbox = 1; - printf("%s: Predicted in %lf milli-seconds.\n", input, ((double)get_time_point() - time) / 1000); - //printf("%s: Predicted in %f seconds.\n", input, (what_time_is_it_now()-time)); - - int nboxes = 0; - detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter_box); - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes, ext_output); - save_image(im, "predictions"); - if (!dont_show) { - show_image(im, "predictions"); - } - - if (json_file) { - if (json_buf) { - char *tmp = ", \n"; - fwrite(tmp, sizeof(char), strlen(tmp), json_file); - } - ++json_image_id; - json_buf = detection_to_json(dets, nboxes, l.classes, names, json_image_id, input); - - fwrite(json_buf, sizeof(char), strlen(json_buf), json_file); - free(json_buf); - } - - // pseudo labeling concept - fast.ai - if (save_labels) - { - char labelpath[4096]; - replace_image_to_label(input, labelpath); - - FILE* fw = fopen(labelpath, "wb"); - int i; - for (i = 0; i < nboxes; ++i) { - char buff[1024]; - int class_id = -1; - float prob = 0; - for (j = 0; j < l.classes; ++j) { - if (dets[i].prob[j] > thresh && dets[i].prob[j] > prob) { - prob = dets[i].prob[j]; - class_id = j; - } - } - if (class_id >= 0) { - sprintf(buff, "%d %2.4f %2.4f %2.4f %2.4f\n", class_id, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h); - fwrite(buff, sizeof(char), strlen(buff), fw); - } - } - fclose(fw); - } - - free_detections(dets, nboxes); - free_image(im); - free_image(sized); - - if (!dont_show) { - wait_until_press_key_cv(); - destroy_all_windows_cv(); - } - - if (filename) break; - } - - if (json_file) { - char *tmp = "\n]"; - fwrite(tmp, sizeof(char), strlen(tmp), json_file); - fclose(json_file); - } - - // free memory - free_ptrs((void**)names, net.layers[net.n - 1].classes); - free_list_contents_kvp(options); - free_list(options); - - int i; - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); - - free_network(net); -} - -#if defined(OPENCV) && defined(GPU) - -// adversarial attack dnn -void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show, int it_num, - int letter_box, int benchmark_layers) -{ - list *options = read_data_cfg(datacfg); - char *name_list = option_find_str(options, "names", "data/names.list"); - int names_size = 0; - char **names = get_labels_custom(name_list, &names_size); //get_labels(name_list); - - image **alphabet = load_alphabet(); - network net = parse_network_cfg(cfgfile);// parse_network_cfg_custom(cfgfile, 1, 1); // set batch=1 - net.adversarial = 1; - set_batch_network(&net, 1); - if (weightfile) { - load_weights(&net, weightfile); - } - net.benchmark_layers = benchmark_layers; - //fuse_conv_batchnorm(net); - //calculate_binary_weights(net); - if (net.layers[net.n - 1].classes != names_size) { - printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n", - name_list, names_size, net.layers[net.n - 1].classes, cfgfile); - if (net.layers[net.n - 1].classes > names_size) getchar(); - } - - srand(2222222); - char buff[256]; - char *input = buff; - - int j; - float nms = .45; // 0.4F - while (1) { - if (filename) { - strncpy(input, filename, 256); - if (strlen(input) > 0) - if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; - } - else { - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if (!input) break; - strtok(input, "\n"); - } - //image im; - //image sized = load_image_resize(input, net.w, net.h, net.c, &im); - image im = load_image(input, 0, 0, net.c); - image sized; - if (letter_box) sized = letterbox_image(im, net.w, net.h); - else sized = resize_image(im, net.w, net.h); - - image src_sized = copy_image(sized); - - layer l = net.layers[net.n - 1]; - int k; - for (k = 0; k < net.n; ++k) { - layer lk = net.layers[k]; - if (lk.type == YOLO || lk.type == GAUSSIAN_YOLO || lk.type == REGION) { - l = lk; - printf(" Detection layer: %d - type = %d \n", k, l.type); - } - } - - net.num_boxes = l.max_boxes; - int num_truth = l.truths; - float *truth_cpu = (float *)xcalloc(num_truth, sizeof(float)); - - int *it_num_set = (int *)xcalloc(1, sizeof(int)); - float *lr_set = (float *)xcalloc(1, sizeof(float)); - int *boxonly = (int *)xcalloc(1, sizeof(int)); - - cv_draw_object(sized, truth_cpu, net.num_boxes, num_truth, it_num_set, lr_set, boxonly, l.classes, names); - - net.learning_rate = *lr_set; - it_num = *it_num_set; - - float *X = sized.data; - - mat_cv* img = NULL; - float max_img_loss = 5; - int number_of_lines = 100; - int img_size = 1000; - char windows_name[100]; - char *base = basecfg(cfgfile); - sprintf(windows_name, "chart_%s.png", base); - img = draw_train_chart(windows_name, max_img_loss, it_num, number_of_lines, img_size, dont_show, NULL); - - int iteration; - for (iteration = 0; iteration < it_num; ++iteration) - { - forward_backward_network_gpu(net, X, truth_cpu); - - float avg_loss = get_network_cost(net); - draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, it_num, 0, 0, "mAP%", 0, dont_show, 0, 0); - - float inv_loss = 1.0 / max_val_cmp(0.01, avg_loss); - //net.learning_rate = *lr_set * inv_loss; - - if (*boxonly) { - int dw = truth_cpu[2] * sized.w, dh = truth_cpu[3] * sized.h; - int dx = truth_cpu[0] * sized.w - dw / 2, dy = truth_cpu[1] * sized.h - dh / 2; - image crop = crop_image(sized, dx, dy, dw, dh); - copy_image_inplace(src_sized, sized); - embed_image(crop, sized, dx, dy); - } - - show_image_cv(sized, "image_optimization"); - wait_key_cv(20); - } - - net.train = 0; - quantize_image(sized); - network_predict(net, X); - - save_image_png(sized, "drawn"); - //sized = load_image("drawn.png", 0, 0, net.c); - - int nboxes = 0; - detection *dets = get_network_boxes(&net, sized.w, sized.h, thresh, 0, 0, 1, &nboxes, letter_box); - if (nms) { - if (l.nms_kind == DEFAULT_NMS) do_nms_sort(dets, nboxes, l.classes, nms); - else diounms_sort(dets, nboxes, l.classes, nms, l.nms_kind, l.beta_nms); - } - draw_detections_v3(sized, dets, nboxes, thresh, names, alphabet, l.classes, 1); - save_image(sized, "pre_predictions"); - if (!dont_show) { - show_image(sized, "pre_predictions"); - } - - free_detections(dets, nboxes); - free_image(im); - free_image(sized); - free_image(src_sized); - - if (!dont_show) { - wait_until_press_key_cv(); - destroy_all_windows_cv(); - } - - free(lr_set); - free(it_num_set); - - if (filename) break; - } - - // free memory - free_ptrs((void**)names, net.layers[net.n - 1].classes); - free_list_contents_kvp(options); - free_list(options); - - int i; - const int nsize = 8; - for (j = 0; j < nsize; ++j) { - for (i = 32; i < 127; ++i) { - free_image(alphabet[j][i]); - } - free(alphabet[j]); - } - free(alphabet); - - free_network(net); -} -#else // defined(OPENCV) && defined(GPU) -void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show, int it_num, - int letter_box, int benchmark_layers) -{ - printf(" ./darknet detector draw ... can't be used without OpenCV and CUDA! \n"); - getchar(); -} -#endif // defined(OPENCV) && defined(GPU) - -void run_detector(int argc, char **argv) -{ - int dont_show = find_arg(argc, argv, "-dont_show"); - int benchmark = find_arg(argc, argv, "-benchmark"); - int benchmark_layers = find_arg(argc, argv, "-benchmark_layers"); - //if (benchmark_layers) benchmark = 1; - if (benchmark) dont_show = 1; - int show = find_arg(argc, argv, "-show"); - int letter_box = find_arg(argc, argv, "-letter_box"); - int calc_map = find_arg(argc, argv, "-map"); - int map_points = find_int_arg(argc, argv, "-points", 0); - check_mistakes = find_arg(argc, argv, "-check_mistakes"); - int show_imgs = find_arg(argc, argv, "-show_imgs"); - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); - int avgframes = find_int_arg(argc, argv, "-avgframes", 3); - int dontdraw_bbox = find_arg(argc, argv, "-dontdraw_bbox"); - int json_port = find_int_arg(argc, argv, "-json_port", -1); - char *http_post_host = find_char_arg(argc, argv, "-http_post_host", 0); - int time_limit_sec = find_int_arg(argc, argv, "-time_limit_sec", 0); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); - char *outfile = find_char_arg(argc, argv, "-out", 0); - char *prefix = find_char_arg(argc, argv, "-prefix", 0); - float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24 - float iou_thresh = find_float_arg(argc, argv, "-iou_thresh", .5); // 0.5 for mAP - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); - int cam_index = find_int_arg(argc, argv, "-c", 0); - int frame_skip = find_int_arg(argc, argv, "-s", 0); - int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5); - int width = find_int_arg(argc, argv, "-width", -1); - int height = find_int_arg(argc, argv, "-height", -1); - // extended output in test mode (output of rect bound coords) - // and for recall mode (extended output table-like format with results for best_class fit) - int ext_output = find_arg(argc, argv, "-ext_output"); - int save_labels = find_arg(argc, argv, "-save_labels"); - char* chart_path = find_char_arg(argc, argv, "-chart", 0); - if (argc < 4) { - fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - char *gpu_list = find_char_arg(argc, argv, "-gpus", 0); - int *gpus = 0; - int gpu = 0; - int ngpus = 0; - if (gpu_list) { - printf("%s\n", gpu_list); - int len = (int)strlen(gpu_list); - ngpus = 1; - int i; - for (i = 0; i < len; ++i) { - if (gpu_list[i] == ',') ++ngpus; - } - gpus = (int*)xcalloc(ngpus, sizeof(int)); - for (i = 0; i < ngpus; ++i) { - gpus[i] = atoi(gpu_list); - gpu_list = strchr(gpu_list, ',') + 1; - } - } - else { - gpu = gpu_index; - gpus = &gpu; - ngpus = 1; - } - - int clear = find_arg(argc, argv, "-clear"); - - char *datacfg = argv[3]; - char *cfg = argv[4]; - char *weights = (argc > 5) ? argv[5] : 0; - if (weights) - if (strlen(weights) > 0) - if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0; - char *filename = (argc > 6) ? argv[6] : 0; - if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers); - else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs, benchmark_layers, chart_path); - else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile); - else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights); - else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL); - else if (0 == strcmp(argv[2], "calc_anchors")) calc_anchors(datacfg, num_of_clusters, width, height, show); - else if (0 == strcmp(argv[2], "draw")) { - int it_num = 100; - draw_object(datacfg, cfg, weights, filename, thresh, dont_show, it_num, letter_box, benchmark_layers); - } - else if (0 == strcmp(argv[2], "demo")) { - list *options = read_data_cfg(datacfg); - int classes = option_find_int(options, "classes", 20); - char *name_list = option_find_str(options, "names", "data/names.list"); - char **names = get_labels(name_list); - if (filename) - if (strlen(filename) > 0) - if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; - demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, avgframes, frame_skip, prefix, out_filename, - mjpeg_port, dontdraw_bbox, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers); - - free_list_contents_kvp(options); - free_list(options); - } - else printf(" There isn't such command: %s", argv[2]); - - if (gpus && gpu_list && ngpus > 1) free(gpus); -} diff --git a/src/Detector/darknet/src/dice.c b/src/Detector/darknet/src/dice.c deleted file mode 100644 index 8a0393a8b..000000000 --- a/src/Detector/darknet/src/dice.c +++ /dev/null @@ -1,117 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" - -char *dice_labels[] = {"face1","face2","face3","face4","face5","face6"}; - -void train_dice(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - char* backup_directory = "backup/"; - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = 1024; - int i = *net.seen/imgs; - char **labels = dice_labels; - list *plist = get_paths("data/dice/dice.train.list"); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - clock_t time; - while(1){ - ++i; - time=clock(); - data train = load_data_old(paths, imgs, plist->size, labels, 6, net.w, net.h); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d: %f, %f avg, %lf seconds, %ld images\n", i, loss, avg_loss, sec(clock()-time), *net.seen); - free_data(train); - if((i % 100) == 0) net.learning_rate *= .1; - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, i); - save_weights(net, buff); - } - } -} - -void validate_dice(char *filename, char *weightfile) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - srand(time(0)); - - char **labels = dice_labels; - list *plist = get_paths("data/dice/dice.val.list"); - - char **paths = (char **)list_to_array(plist); - int m = plist->size; - free_list(plist); - - data val = load_data_old(paths, m, 0, labels, 6, net.w, net.h); - float *acc = network_accuracies(net, val, 2); - printf("Validation Accuracy: %f, %d images\n", acc[0], m); - free_data(val); -} - -void test_dice(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - int i = 0; - char **names = dice_labels; - char buff[256]; - char *input = buff; - int indexes[6]; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input, net.w, net.h); - float *X = im.data; - float *predictions = network_predict(net, X); - top_predictions(net, 6, indexes); - for(i = 0; i < 6; ++i){ - int index = indexes[i]; - printf("%s: %f\n", names[index], predictions[index]); - } - free_image(im); - if (filename) break; - } -} - -void run_dice(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "test")) test_dice(cfg, weights, filename); - else if(0==strcmp(argv[2], "train")) train_dice(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_dice(cfg, weights); -} diff --git a/src/Detector/darknet/src/dropout_layer.c b/src/Detector/darknet/src/dropout_layer.c deleted file mode 100644 index 3cc73ed24..000000000 --- a/src/Detector/darknet/src/dropout_layer.c +++ /dev/null @@ -1,88 +0,0 @@ -#include "dropout_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include -#include - -dropout_layer make_dropout_layer(int batch, int inputs, float probability, int dropblock, float dropblock_size_rel, int dropblock_size_abs, int w, int h, int c) -{ - dropout_layer l = { (LAYER_TYPE)0 }; - l.type = DROPOUT; - l.probability = probability; - l.dropblock = dropblock; - l.dropblock_size_rel = dropblock_size_rel; - l.dropblock_size_abs = dropblock_size_abs; - if (l.dropblock) { - l.out_w = l.w = w; - l.out_h = l.h = h; - l.out_c = l.c = c; - - if (l.w <= 0 || l.h <= 0 || l.c <= 0) { - printf(" Error: DropBlock - there must be positive values for: l.w=%d, l.h=%d, l.c=%d \n", l.w, l.h, l.c); - exit(0); - } - } - l.inputs = inputs; - l.outputs = inputs; - l.batch = batch; - l.rand = (float*)xcalloc(inputs * batch, sizeof(float)); - l.scale = 1./(1.0 - probability); - l.forward = forward_dropout_layer; - l.backward = backward_dropout_layer; -#ifdef GPU - l.forward_gpu = forward_dropout_layer_gpu; - l.backward_gpu = backward_dropout_layer_gpu; - l.rand_gpu = cuda_make_array(l.rand, inputs*batch); - if (l.dropblock) { - l.drop_blocks_scale = cuda_make_array_pinned(l.rand, l.batch); - l.drop_blocks_scale_gpu = cuda_make_array(l.rand, l.batch); - } -#endif - if (l.dropblock) { - if(l.dropblock_size_abs) fprintf(stderr, "dropblock p = %.3f l.dropblock_size_abs = %d %4d -> %4d\n", probability, l.dropblock_size_abs, inputs, inputs); - else fprintf(stderr, "dropblock p = %.3f l.dropblock_size_rel = %.2f %4d -> %4d\n", probability, l.dropblock_size_rel, inputs, inputs); - } - else fprintf(stderr, "dropout p = %.3f %4d -> %4d\n", probability, inputs, inputs); - return l; -} - -void resize_dropout_layer(dropout_layer *l, int inputs) -{ - l->inputs = l->outputs = inputs; - l->rand = (float*)xrealloc(l->rand, l->inputs * l->batch * sizeof(float)); -#ifdef GPU - cuda_free(l->rand_gpu); - l->rand_gpu = cuda_make_array(l->rand, l->inputs*l->batch); - - if (l->dropblock) { - cudaFreeHost(l->drop_blocks_scale); - l->drop_blocks_scale = cuda_make_array_pinned(l->rand, l->batch); - - cuda_free(l->drop_blocks_scale_gpu); - l->drop_blocks_scale_gpu = cuda_make_array(l->rand, l->batch); - } -#endif -} - -void forward_dropout_layer(dropout_layer l, network_state state) -{ - int i; - if (!state.train) return; - for(i = 0; i < l.batch * l.inputs; ++i){ - float r = rand_uniform(0, 1); - l.rand[i] = r; - if(r < l.probability) state.input[i] = 0; - else state.input[i] *= l.scale; - } -} - -void backward_dropout_layer(dropout_layer l, network_state state) -{ - int i; - if(!state.delta) return; - for(i = 0; i < l.batch * l.inputs; ++i){ - float r = l.rand[i]; - if(r < l.probability) state.delta[i] = 0; - else state.delta[i] *= l.scale; - } -} diff --git a/src/Detector/darknet/src/dropout_layer.h b/src/Detector/darknet/src/dropout_layer.h deleted file mode 100644 index fa02300a7..000000000 --- a/src/Detector/darknet/src/dropout_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef DROPOUT_LAYER_H -#define DROPOUT_LAYER_H - -#include "layer.h" -#include "network.h" - -typedef layer dropout_layer; - -#ifdef __cplusplus -extern "C" { -#endif -dropout_layer make_dropout_layer(int batch, int inputs, float probability, int dropblock, float dropblock_size_rel, int dropblock_size_abs, int w, int h, int c); - -void forward_dropout_layer(dropout_layer l, network_state state); -void backward_dropout_layer(dropout_layer l, network_state state); -void resize_dropout_layer(dropout_layer *l, int inputs); - -#ifdef GPU -void forward_dropout_layer_gpu(dropout_layer l, network_state state); -void backward_dropout_layer_gpu(dropout_layer l, network_state state); - -#endif -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/dropout_layer_kernels.cu b/src/Detector/darknet/src/dropout_layer_kernels.cu deleted file mode 100644 index 05cde5949..000000000 --- a/src/Detector/darknet/src/dropout_layer_kernels.cu +++ /dev/null @@ -1,311 +0,0 @@ -#include -#include -#include -#include - -#include "dropout_layer.h" -#include "dark_cuda.h" -#include "utils.h" -#include "blas.h" - -#include "image_opencv.h" -#include "image.h" - - -__global__ void dropblock_fast_kernel(float *rand, float prob, int w, int h, int spatial, int filters, int batch, int block_size, float *drop_blocks_scale, float *output) -{ - const int threads = BLOCK; - const int id = threadIdx.x; - const int f = blockIdx.x % filters; - const int b = blockIdx.x / filters; - - __shared__ int prob_block; - __shared__ int index_block; - - if (id == 0) { - prob_block = 1.0 * 1000000; - index_block = -1; - } - __syncthreads(); - - int i; - for (i = id; i < spatial; i += threads) { - int index = b*spatial*f + f*spatial + i; - - if (rand[index] < prob) { - //Chose with the lowest rand[i] - int new_val = rand[index] * 1000000; - rand[index] = 1; - int old_val = atomicMin(&prob_block, new_val); - if (new_val < old_val) { - index_block = i; - //if (b == 0) printf("\n rand[i] = %f, prob = %f, b = %d, f = %d, i = %d, index_block = %d \n", rand[i], prob, b, f, i, index_block); - } - } - - } - __syncthreads(); - if (index_block == -1) return; - - - int b_x = index_block % w; - int b_y = index_block / w; - - if (b_x > (w - block_size)) b_x = b_x - (w - block_size); - if (b_y > (h - block_size)) b_y = b_y - (h - block_size); - - b_x = max(0, min(b_x, w - block_size)); - b_y = max(0, min(b_y, h - block_size)); - - int block_square_size = block_size * block_size; - - for (i = id; i < block_square_size; i += threads) - { - int i_x = i % block_size; - int i_y = i / block_size; - - int x = b_x + i_x; - int y = b_y + i_y; - - if (x >= 0 && x < w && y >= 0 && y < h) { - int new_index = b*filters*spatial + f*spatial + y*w + x; - - output[new_index] = 0; - rand[new_index] = 0; - } - } - - //if (id == 0 && b == 0) printf(" f = %d, b = %d \n", f, b); - - if (id == 0 && drop_blocks_scale) { - atomicAdd(&drop_blocks_scale[b], block_square_size); - //if(b == 0) printf("\n index_block = %d \n", index_block); - } - -} - -__global__ void set_scales_dropblock_kernel(float *drop_blocks_scale, int block_size_w, int block_size_h, int outputs, int batch) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= batch) return; - - //printf(" drop_blocks_scale[index] = %f \n", drop_blocks_scale[index]); - const float prob = drop_blocks_scale[index] / (float)outputs; - const float scale = 1.0f / (1.0f - prob); - drop_blocks_scale[index] = scale; -} - -__global__ void scale_dropblock_kernel(float *output, int size, int outputs, float *drop_blocks_scale) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= size) return; - - const int b = index / outputs; - output[index] *= drop_blocks_scale[b]; -} - - -__global__ void backward_dropblock_kernel(float *pass, float *delta, int size) -{ - const int index = blockIdx.x*blockDim.x + threadIdx.x; - if (index >= size) return; - - if (pass[index] == 0) delta[index] = 0; -} - - -__global__ void yoloswag420blazeit360noscope(float *input, int size, float *rand, float prob, float scale) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id < size) input[id] = (rand[id] < prob) ? 0 : input[id]*scale; -} - - -void forward_dropout_layer_gpu(dropout_layer l, network_state state) -{ - if (!state.train) return; - int iteration_num = get_current_iteration(state.net); // (*state.net.seen) / (state.net.batch*state.net.subdivisions); - //if (iteration_num < state.net.burn_in) return; - - // We gradually increase the block size and the probability of dropout - during the first half of the training - float multiplier = 1.0; - if(iteration_num < (state.net.max_batches*0.85)) - multiplier = (iteration_num / (float)(state.net.max_batches*0.85)); - - // dropblock - if (l.dropblock) { - //l.probability = 1 / keep_prob - //const int max_blocks_per_channel = 10; - const float cur_prob = l.probability * multiplier; - const float cur_scale = 1.f / (1.f - cur_prob); - - int block_width = l.dropblock_size_abs *multiplier; - int block_height = l.dropblock_size_abs *multiplier; - - if (l.dropblock_size_rel) { - block_width = l.dropblock_size_rel * l.w * multiplier; - block_height = l.dropblock_size_rel * l.h * multiplier; - } - - block_width = max_val_cmp(1, block_width); - block_height = max_val_cmp(1, block_height); - - block_width = min_val_cmp(l.w, block_width); - block_height = min_val_cmp(l.h, block_height); - - const int block_size = min_val_cmp(block_width, block_height); - const float block_prob = cur_prob / (block_size*block_size); - assert(block_size <= l.w && block_size <= l.h); - - const int size = l.inputs*l.batch; - cuda_random(l.rand_gpu, size); - - fill_ongpu(l.batch, 0, l.drop_blocks_scale_gpu, 1); - - //fill_ongpu(l.outputs * l.batch, 1, state.input, 1); // remove!!! - - int num_blocks = l.batch * l.c; - dropblock_fast_kernel << > > (l.rand_gpu, block_prob, l.w, l.h, l.w*l.h, l.c, l.batch, block_size, l.drop_blocks_scale_gpu, state.input); - CHECK_CUDA(cudaPeekAtLastError()); - - num_blocks = get_number_of_blocks(l.batch, BLOCK); - set_scales_dropblock_kernel << > > (l.drop_blocks_scale_gpu, block_size, block_size, l.outputs, l.batch); - CHECK_CUDA(cudaPeekAtLastError()); - - /* - { - cuda_pull_array(l.drop_blocks_scale_gpu, l.drop_blocks_scale, l.batch); - - float avg_scale = 0; - - for (int b = 0; b < l.batch; ++b) { - const float scale = l.drop_blocks_scale[b]; - avg_scale += scale; - printf(" %d x %d - block_size = %d, block_size*block_size = %d , ", l.w, l.h, block_size, block_size*block_size); - printf(" , l.drop_blocks_scale[b] = %f, scale = %f \t cur_prob = %f, cur_scale = %f \n", - l.drop_blocks_scale[b], scale, cur_prob, cur_scale); - } - avg_scale = avg_scale / l.batch; - printf(" avg_scale = %f \n", avg_scale); - - float *output = (float *)calloc(l.outputs * l.batch, sizeof(float)); - cuda_pull_array(state.input, output, l.outputs * l.batch); - - printf(" l.w = %d, l.h = %d, l.c = %d \n", l.w, l.h, l.c); - - image img = float_to_image(l.w, l.h, l.c, output); - img = collapse_image_layers(img, 1); - //normalize_image(img); - - show_image(img, "dropout - forward"); - wait_key_cv(0); - //free_image(img); - //free(output); - } - */ - - num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK); - scale_dropblock_kernel << > > (state.input, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - - } - // dropout - else { - int size = l.inputs*l.batch; - cuda_random(l.rand_gpu, size); - /* - int i; - for(i = 0; i < size; ++i){ - layer.rand[i] = rand_uniform(); - } - cuda_push_array(layer.rand_gpu, layer.rand, size); - */ - - yoloswag420blazeit360noscope << > > (state.input, size, l.rand_gpu, l.probability, l.scale); - CHECK_CUDA(cudaPeekAtLastError()); - } -} - -void backward_dropout_layer_gpu(dropout_layer l, network_state state) -{ - if(!state.delta) return; - //int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions); - //if (iteration_num < state.net.burn_in) return; - - const int size = l.inputs*l.batch; - - // dropblock - if (l.dropblock) { - int iteration_num = get_current_iteration(state.net); //(*state.net.seen) / (state.net.batch*state.net.subdivisions); - float multiplier = 1.0; - if (iteration_num < (state.net.max_batches*0.85)) - multiplier = (iteration_num / (float)(state.net.max_batches*0.85)); - - const float cur_prob = l.probability * multiplier; - const float cur_scale = 1.f / (1.f - cur_prob); - - int block_width = l.dropblock_size_abs * multiplier; - int block_height = l.dropblock_size_abs * multiplier; - - if (l.dropblock_size_rel) { - block_width = l.dropblock_size_rel * l.w * multiplier; - block_height = l.dropblock_size_rel * l.h * multiplier; - } - - block_width = max_val_cmp(1, block_width); - block_height = max_val_cmp(1, block_height); - - block_width = min_val_cmp(l.w, block_width); - block_height = min_val_cmp(l.h, block_height); - - const int block_size = min_val_cmp(block_width, block_height); - const float block_prob = cur_prob / (block_size*block_size); - - //fill_ongpu(l.outputs * l.batch, 1, state.delta, 1); // remove!!! - - int num_blocks = get_number_of_blocks(l.outputs * l.batch, BLOCK); - backward_dropblock_kernel << > >(l.rand_gpu, state.delta, l.outputs * l.batch); - CHECK_CUDA(cudaPeekAtLastError()); - - scale_dropblock_kernel << > > (state.delta, l.outputs * l.batch, l.outputs, l.drop_blocks_scale_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - - /* - { - cuda_pull_array(l.drop_blocks_scale_gpu, l.drop_blocks_scale, l.batch); - - float avg_scale = 0; - - for (int b = 0; b < l.batch; ++b) { - const float scale = l.drop_blocks_scale[b]; - avg_scale += scale; - printf(" %d x %d - block_size = %d, block_size*block_size = %d , ", l.w, l.h, block_size, block_size*block_size); - printf(" , l.drop_blocks_scale[b] = %f, scale = %f \t cur_prob = %f, cur_scale = %f \n", - l.drop_blocks_scale[b], scale, cur_prob, cur_scale); - } - avg_scale = avg_scale / l.batch; - printf(" avg_scale = %f \n", avg_scale); - - float *output = (float *)calloc(l.outputs * l.batch, sizeof(float)); - cuda_pull_array(state.delta, output, l.outputs * l.batch); - - printf(" l.w = %d, l.h = %d, l.c = %d \n", l.w, l.h, l.c); - - image img = float_to_image(l.w, l.h, l.c, output); - img = collapse_image_layers(img, 1); - //normalize_image(img); - - show_image(img, "dropout - delta"); - wait_key_cv(0); - //free_image(img); - //free(output); - } - */ - - } - // dropout - else { - yoloswag420blazeit360noscope << > > (state.delta, size, l.rand_gpu, l.probability, l.scale); - CHECK_CUDA(cudaPeekAtLastError()); - } -} diff --git a/src/Detector/darknet/src/gaussian_yolo_layer.c b/src/Detector/darknet/src/gaussian_yolo_layer.c deleted file mode 100644 index bfef69916..000000000 --- a/src/Detector/darknet/src/gaussian_yolo_layer.c +++ /dev/null @@ -1,898 +0,0 @@ -// Gaussian YOLOv3 implementation -// Author: Jiwoong Choi -// ICCV 2019 Paper: http://openaccess.thecvf.com/content_ICCV_2019/html/Choi_Gaussian_YOLOv3_An_Accurate_and_Fast_Object_Detector_Using_Localization_ICCV_2019_paper.html -// arxiv.org: https://arxiv.org/abs/1904.04620v2 -// source code: https://github.com/jwchoi384/Gaussian_YOLOv3 - -#include "gaussian_yolo_layer.h" -#include "activations.h" -#include "blas.h" -#include "box.h" -#include "dark_cuda.h" -#include "utils.h" - -#include -#include -#include -#include - -#ifndef M_PI -#define M_PI 3.141592 -#endif - -extern int check_mistakes; - -layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes) -{ - int i; - layer l = { (LAYER_TYPE)0 }; - l.type = GAUSSIAN_YOLO; - - l.n = n; - l.total = total; - l.batch = batch; - l.h = h; - l.w = w; - l.c = n*(classes + 8 + 1); - l.out_w = l.w; - l.out_h = l.h; - l.out_c = l.c; - l.classes = classes; - l.cost = (float*)calloc(1, sizeof(float)); - l.biases = (float*)calloc(total*2, sizeof(float)); - if(mask) l.mask = mask; - else{ - l.mask = (int*)calloc(n, sizeof(int)); - for(i = 0; i < n; ++i){ - l.mask[i] = i; - } - } - l.bias_updates = (float*)calloc(n*2, sizeof(float)); - l.outputs = h*w*n*(classes + 8 + 1); - l.inputs = l.outputs; - l.max_boxes = max_boxes; - l.truths = l.max_boxes*(4 + 1); - l.delta = (float*)calloc(batch*l.outputs, sizeof(float)); - l.output = (float*)calloc(batch*l.outputs, sizeof(float)); - for(i = 0; i < total*2; ++i){ - l.biases[i] = .5; - } - - l.forward = forward_gaussian_yolo_layer; - l.backward = backward_gaussian_yolo_layer; -#ifdef GPU - l.forward_gpu = forward_gaussian_yolo_layer_gpu; - l.backward_gpu = backward_gaussian_yolo_layer_gpu; - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); - l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); - - - free(l.output); - if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; - else { - cudaGetLastError(); // reset CUDA-error - l.output = (float*)calloc(batch * l.outputs, sizeof(float)); - } - - free(l.delta); - if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; - else { - cudaGetLastError(); // reset CUDA-error - l.delta = (float*)calloc(batch * l.outputs, sizeof(float)); - } - -#endif - - //fprintf(stderr, "Gaussian_yolo\n"); - srand(time(0)); - - return l; -} - -void resize_gaussian_yolo_layer(layer *l, int w, int h) -{ - l->w = w; - l->h = h; - - l->outputs = h*w*l->n*(l->classes + 8 + 1); - l->inputs = l->outputs; - - //l->output = (float *)realloc(l->output, l->batch*l->outputs * sizeof(float)); - //l->delta = (float *)realloc(l->delta, l->batch*l->outputs * sizeof(float)); - - if (!l->output_pinned) l->output = (float*)realloc(l->output, l->batch*l->outputs * sizeof(float)); - if (!l->delta_pinned) l->delta = (float*)realloc(l->delta, l->batch*l->outputs * sizeof(float)); - -#ifdef GPU - - if (l->output_pinned) { - CHECK_CUDA(cudaFreeHost(l->output)); - if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { - cudaGetLastError(); // reset CUDA-error - l->output = (float*)calloc(l->batch * l->outputs, sizeof(float)); - l->output_pinned = 0; - } - } - - if (l->delta_pinned) { - CHECK_CUDA(cudaFreeHost(l->delta)); - if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { - cudaGetLastError(); // reset CUDA-error - l->delta = (float*)calloc(l->batch * l->outputs, sizeof(float)); - l->delta_pinned = 0; - } - } - - - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - - l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); - l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); -#endif -} - -box get_gaussian_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride, YOLO_POINT yolo_point) -{ - box b; - - b.w = exp(x[index + 4 * stride]) * biases[2 * n] / w; - b.h = exp(x[index + 6 * stride]) * biases[2 * n + 1] / h; - b.x = (i + x[index + 0 * stride]) / lw; - b.y = (j + x[index + 2 * stride]) / lh; - - if (yolo_point == YOLO_CENTER) { - } - else if (yolo_point == YOLO_LEFT_TOP) { - b.x = (i + x[index + 0 * stride]) / lw + b.w / 2; - b.y = (j + x[index + 2 * stride]) / lh + b.h / 2; - } - else if (yolo_point == YOLO_RIGHT_BOTTOM) { - b.x = (i + x[index + 0 * stride]) / lw - b.w / 2; - b.y = (j + x[index + 2 * stride]) / lh - b.h / 2; - } - - return b; -} - -static inline float fix_nan_inf(float val) -{ - if (isnan(val) || isinf(val)) val = 0; - return val; -} - -static inline float clip_value(float val, const float max_val) -{ - if (val > max_val) val = max_val; - else if (val < -max_val) val = -max_val; - return val; -} - -float delta_gaussian_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, - float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, float uc_normalizer, int accumulate, YOLO_POINT yolo_point, float max_delta) -{ - box pred = get_gaussian_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, yolo_point); - - float iou; - ious all_ious = { 0 }; - all_ious.iou = box_iou(pred, truth); - all_ious.giou = box_giou(pred, truth); - all_ious.diou = box_diou(pred, truth); - all_ious.ciou = box_ciou(pred, truth); - if (pred.w == 0) { pred.w = 1.0; } - if (pred.h == 0) { pred.h = 1.0; } - - float sigma_const = 0.3; - float epsi = pow(10,-9); - - float dx, dy, dw, dh; - - iou = all_ious.iou; - - float tx, ty, tw, th; - - tx = (truth.x*lw - i); - ty = (truth.y*lh - j); - tw = log(truth.w*w / biases[2 * n]); - th = log(truth.h*h / biases[2 * n + 1]); - - if (yolo_point == YOLO_CENTER) { - } - else if (yolo_point == YOLO_LEFT_TOP) { - tx = ((truth.x - truth.w / 2)*lw - i); - ty = ((truth.y - truth.h / 2)*lh - j); - } - else if (yolo_point == YOLO_RIGHT_BOTTOM) { - tx = ((truth.x + truth.w / 2)*lw - i); - ty = ((truth.y + truth.h / 2)*lh - j); - } - - dx = (tx - x[index + 0 * stride]); - dy = (ty - x[index + 2 * stride]); - dw = (tw - x[index + 4 * stride]); - dh = (th - x[index + 6 * stride]); - - // Gaussian - float in_exp_x = dx / x[index+1*stride]; - float in_exp_x_2 = pow(in_exp_x, 2); - float normal_dist_x = exp(in_exp_x_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+1*stride]+sigma_const)); - - float in_exp_y = dy / x[index+3*stride]; - float in_exp_y_2 = pow(in_exp_y, 2); - float normal_dist_y = exp(in_exp_y_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+3*stride]+sigma_const)); - - float in_exp_w = dw / x[index+5*stride]; - float in_exp_w_2 = pow(in_exp_w, 2); - float normal_dist_w = exp(in_exp_w_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+5*stride]+sigma_const)); - - float in_exp_h = dh / x[index+7*stride]; - float in_exp_h_2 = pow(in_exp_h, 2); - float normal_dist_h = exp(in_exp_h_2*(-1./2.))/(sqrt(M_PI * 2.0)*(x[index+7*stride]+sigma_const)); - - float temp_x = (1./2.) * 1./(normal_dist_x+epsi) * normal_dist_x * scale; - float temp_y = (1./2.) * 1./(normal_dist_y+epsi) * normal_dist_y * scale; - float temp_w = (1./2.) * 1./(normal_dist_w+epsi) * normal_dist_w * scale; - float temp_h = (1./2.) * 1./(normal_dist_h+epsi) * normal_dist_h * scale; - - if (!accumulate) { - delta[index + 0 * stride] = 0; - delta[index + 1 * stride] = 0; - delta[index + 2 * stride] = 0; - delta[index + 3 * stride] = 0; - delta[index + 4 * stride] = 0; - delta[index + 5 * stride] = 0; - delta[index + 6 * stride] = 0; - delta[index + 7 * stride] = 0; - } - - float delta_x = temp_x * in_exp_x * (1. / x[index + 1 * stride]); - float delta_y = temp_y * in_exp_y * (1. / x[index + 3 * stride]); - float delta_w = temp_w * in_exp_w * (1. / x[index + 5 * stride]); - float delta_h = temp_h * in_exp_h * (1. / x[index + 7 * stride]); - - float delta_ux = temp_x * (in_exp_x_2 / x[index + 1 * stride] - 1. / (x[index + 1 * stride] + sigma_const)); - float delta_uy = temp_y * (in_exp_y_2 / x[index + 3 * stride] - 1. / (x[index + 3 * stride] + sigma_const)); - float delta_uw = temp_w * (in_exp_w_2 / x[index + 5 * stride] - 1. / (x[index + 5 * stride] + sigma_const)); - float delta_uh = temp_h * (in_exp_h_2 / x[index + 7 * stride] - 1. / (x[index + 7 * stride] + sigma_const)); - - if (iou_loss != MSE) { - // GIoU - iou = all_ious.giou; - - // https://github.com/generalized-iou/g-darknet - // https://arxiv.org/abs/1902.09630v2 - // https://giou.stanford.edu/ - // https://arxiv.org/abs/1911.08287v1 - // https://github.com/Zzh-tju/DIoU-darknet - all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); - - float dx, dy, dw, dh; - - dx = all_ious.dx_iou.dt; - dy = all_ious.dx_iou.db; - dw = all_ious.dx_iou.dl; - dh = all_ious.dx_iou.dr; - - if (yolo_point == YOLO_CENTER) { - } - else if (yolo_point == YOLO_LEFT_TOP) { - dx = dx - dw/2; - dy = dy - dh/2; - } - else if (yolo_point == YOLO_RIGHT_BOTTOM) { - dx = dx + dw / 2; - dy = dy + dh / 2; - } - - // jacobian^t (transpose) - //float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - //float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - //float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - //float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); - - // predict exponential, apply gradient of e^delta_t ONLY for w,h - dw *= exp(x[index + 4 * stride]); - dh *= exp(x[index + 6 * stride]); - - delta_x = dx; - delta_y = dy; - delta_w = dw; - delta_h = dh; - } - - // normalize iou weight, for GIoU - delta_x *= iou_normalizer; - delta_y *= iou_normalizer; - delta_w *= iou_normalizer; - delta_h *= iou_normalizer; - - // normalize Uncertainty weight - delta_ux *= uc_normalizer; - delta_uy *= uc_normalizer; - delta_uw *= uc_normalizer; - delta_uh *= uc_normalizer; - - delta_x = fix_nan_inf(delta_x); - delta_y = fix_nan_inf(delta_y); - delta_w = fix_nan_inf(delta_w); - delta_h = fix_nan_inf(delta_h); - - delta_ux = fix_nan_inf(delta_ux); - delta_uy = fix_nan_inf(delta_uy); - delta_uw = fix_nan_inf(delta_uw); - delta_uh = fix_nan_inf(delta_uh); - - if (max_delta != FLT_MAX) { - delta_x = clip_value(delta_x, max_delta); - delta_y = clip_value(delta_y, max_delta); - delta_w = clip_value(delta_w, max_delta); - delta_h = clip_value(delta_h, max_delta); - - delta_ux = clip_value(delta_ux, max_delta); - delta_uy = clip_value(delta_uy, max_delta); - delta_uw = clip_value(delta_uw, max_delta); - delta_uh = clip_value(delta_uh, max_delta); - } - - delta[index + 0 * stride] += delta_x; - delta[index + 2 * stride] += delta_y; - delta[index + 4 * stride] += delta_w; - delta[index + 6 * stride] += delta_h; - - delta[index + 1 * stride] += delta_ux; - delta[index + 3 * stride] += delta_uy; - delta[index + 5 * stride] += delta_uw; - delta[index + 7 * stride] += delta_uh; - return iou; -} - -void averages_gaussian_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) -{ - - int classes_in_one_box = 0; - int c; - for (c = 0; c < classes; ++c) { - if (delta[class_index + stride*c] > 0) classes_in_one_box++; - } - - if (classes_in_one_box > 0) { - delta[box_index + 0 * stride] /= classes_in_one_box; - delta[box_index + 1 * stride] /= classes_in_one_box; - delta[box_index + 2 * stride] /= classes_in_one_box; - delta[box_index + 3 * stride] /= classes_in_one_box; - delta[box_index + 4 * stride] /= classes_in_one_box; - delta[box_index + 5 * stride] /= classes_in_one_box; - delta[box_index + 6 * stride] /= classes_in_one_box; - delta[box_index + 7 * stride] /= classes_in_one_box; - } -} - -void delta_gaussian_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, float label_smooth_eps, float *classes_multipliers, float cls_normalizer) -{ - int n; - if (delta[index]){ - float y_true = 1; - if (label_smooth_eps) y_true = y_true * (1 - label_smooth_eps) + 0.5*label_smooth_eps; - delta[index + stride*class_id] = y_true - output[index + stride*class_id]; - //delta[index + stride*class_id] = 1 - output[index + stride*class_id]; - - if (classes_multipliers) delta[index + stride*class_id] *= classes_multipliers[class_id]; - if(avg_cat) *avg_cat += output[index + stride*class_id]; - return; - } - for(n = 0; n < classes; ++n){ - float y_true = ((n == class_id) ? 1 : 0); - if (label_smooth_eps) y_true = y_true * (1 - label_smooth_eps) + 0.5*label_smooth_eps; - delta[index + stride*n] = y_true - output[index + stride*n]; - - if (classes_multipliers && n == class_id) delta[index + stride*class_id] *= classes_multipliers[class_id] * cls_normalizer; - if(n == class_id && avg_cat) *avg_cat += output[index + stride*n]; - } -} - -int compare_gaussian_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id, float conf_thresh) -{ - int j; - for (j = 0; j < classes; ++j) { - //float prob = objectness * output[class_index + stride*j]; - float prob = output[class_index + stride*j]; - if (prob > conf_thresh) { - return 1; - } - } - return 0; -} - -static int entry_gaussian_index(layer l, int batch, int location, int entry) -{ - int n = location / (l.w*l.h); - int loc = location % (l.w*l.h); - return batch*l.outputs + n*l.w*l.h*(8+l.classes+1) + entry*l.w*l.h + loc; -} - -void forward_gaussian_yolo_layer(const layer l, network_state state) -{ - int i,j,b,t,n; - memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); - -#ifndef GPU - for (b = 0; b < l.batch; ++b){ - for(n = 0; n < l.n; ++n){ - // x : mu, sigma - int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); - activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); - scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale x - // y : mu, sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 2); - activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); - scal_add_cpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale y - // w : sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 5); - activate_array(l.output + index, l.w*l.h, LOGISTIC); - // h : sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 7); - activate_array(l.output + index, l.w*l.h, LOGISTIC); - // objectness & class - index = entry_gaussian_index(l, b, n*l.w*l.h, 8); - activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); - } - } -#endif - - memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); - if (!state.train) return; - float avg_iou = 0; - float recall = 0; - float recall75 = 0; - float avg_cat = 0; - float avg_obj = 0; - float avg_anyobj = 0; - int count = 0; - int class_count = 0; - *(l.cost) = 0; - for (b = 0; b < l.batch; ++b) { - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - const int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); - const int obj_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 8); - const int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - const int stride = l.w*l.h; - box pred = get_gaussian_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h, l.yolo_point); - float best_match_iou = 0; - int best_match_t = 0; - float best_iou = 0; - int best_t = 0; - for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (class_id >= l.classes) { - printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); - printf(" truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); - if (check_mistakes) getchar(); - continue; // if label contains class_id more than number of classes in the cfg-file - } - if(!truth.x) break; - - - float objectness = l.output[obj_index]; - int class_id_match = compare_gaussian_yolo_class(l.output, l.classes, class_index, l.w*l.h, objectness, class_id, 0.25f); - - float iou = box_iou(pred, truth); - if (iou > best_match_iou && class_id_match == 1) { - best_match_iou = iou; - best_match_t = t; - } - if (iou > best_iou) { - best_iou = iou; - best_t = t; - } - } - - avg_anyobj += l.output[obj_index]; - l.delta[obj_index] = l.obj_normalizer * (0 - l.output[obj_index]); - if (best_match_iou > l.ignore_thresh) { - const float iou_multiplier = best_match_iou*best_match_iou;// (best_match_iou - l.ignore_thresh) / (1.0 - l.ignore_thresh); - if (l.objectness_smooth) { - l.delta[obj_index] = l.obj_normalizer * (iou_multiplier - l.output[obj_index]); - - int class_id = state.truth[best_match_t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - } - else l.delta[obj_index] = 0; - } - else if (state.net.adversarial) { - float scale = pred.w * pred.h; - if (scale > 0) scale = sqrt(scale); - l.delta[obj_index] = scale * l.obj_normalizer * (0 - l.output[obj_index]); - int cl_id; - for (cl_id = 0; cl_id < l.classes; ++cl_id) { - if (l.output[class_index + stride*cl_id] * l.output[obj_index] > 0.25) - l.delta[class_index + stride*cl_id] = scale * (0 - l.output[class_index + stride*cl_id]); - } - } - if (best_iou > l.truth_thresh) { - const float iou_multiplier = best_iou*best_iou;// (best_iou - l.truth_thresh) / (1.0 - l.truth_thresh); - if (l.objectness_smooth) l.delta[obj_index] = l.obj_normalizer * (iou_multiplier - l.output[obj_index]); - else l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); - //l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); - - int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - if (l.objectness_smooth) l.delta[class_index + stride*class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride*class_id]); - box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1); - delta_gaussian_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); - } - } - } - } - for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); - - if(!truth.x) break; - float best_iou = 0; - int best_n = 0; - i = (truth.x * l.w); - j = (truth.y * l.h); - - if (l.yolo_point == YOLO_CENTER) { - } - else if (l.yolo_point == YOLO_LEFT_TOP) { - i = min_val_cmp(l.w-1, max_val_cmp(0, ((truth.x - truth.w / 2) * l.w))); - j = min_val_cmp(l.h-1, max_val_cmp(0, ((truth.y - truth.h / 2) * l.h))); - } - else if (l.yolo_point == YOLO_RIGHT_BOTTOM) { - i = min_val_cmp(l.w-1, max_val_cmp(0, ((truth.x + truth.w / 2) * l.w))); - j = min_val_cmp(l.h-1, max_val_cmp(0, ((truth.y + truth.h / 2) * l.h))); - } - - box truth_shift = truth; - truth_shift.x = truth_shift.y = 0; - for(n = 0; n < l.total; ++n){ - box pred = {0}; - pred.w = l.biases[2*n]/ state.net.w; - pred.h = l.biases[2*n+1]/ state.net.h; - float iou = box_iou(pred, truth_shift); - if (iou > best_iou){ - best_iou = iou; - best_n = n; - } - } - - int mask_n = int_index(l.mask, best_n, l.n); - if(mask_n >= 0){ - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - - int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); - - int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); - avg_obj += l.output[obj_index]; - l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - - int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - - ++count; - ++class_count; - if(iou > .5) recall += 1; - if(iou > .75) recall75 += 1; - avg_iou += iou; - } - - - // iou_thresh - for (n = 0; n < l.total; ++n) { - int mask_n = int_index(l.mask, n, l.n); - if (mask_n >= 0 && n != best_n && l.iou_thresh < 1.0f) { - box pred = { 0 }; - pred.w = l.biases[2 * n] / state.net.w; - pred.h = l.biases[2 * n + 1] / state.net.h; - float iou = box_iou_kind(pred, truth_shift, l.iou_thresh_kind); // IOU, GIOU, MSE, DIOU, CIOU - // iou, n - - if (iou > l.iou_thresh) { - int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - - int box_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - float iou = delta_gaussian_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w*truth.h), l.w*l.h, l.iou_normalizer * class_multiplier, l.iou_loss, l.uc_normalizer, 1, l.yolo_point, l.max_delta); - - int obj_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 8); - avg_obj += l.output[obj_index]; - l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - - int class_index = entry_gaussian_index(l, b, mask_n*l.w*l.h + j*l.w + i, 9); - delta_gaussian_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - - ++count; - ++class_count; - if (iou > .5) recall += 1; - if (iou > .75) recall75 += 1; - avg_iou += iou; - } - } - } - } - - // averages the deltas obtained by the function: delta_yolo_box()_accumulate - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - int class_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 9); - const int stride = l.w*l.h; - - averages_gaussian_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); - } - } - } - } - - - // calculate: Classification-loss, IoU-loss and Uncertainty-loss - const int stride = l.w*l.h; - float* classification_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); - memcpy(classification_lost, l.delta, l.batch * l.outputs * sizeof(float)); - - - for (b = 0; b < l.batch; ++b) { - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - - classification_lost[box_index + 0 * stride] = 0; - classification_lost[box_index + 1 * stride] = 0; - classification_lost[box_index + 2 * stride] = 0; - classification_lost[box_index + 3 * stride] = 0; - classification_lost[box_index + 4 * stride] = 0; - classification_lost[box_index + 5 * stride] = 0; - classification_lost[box_index + 6 * stride] = 0; - classification_lost[box_index + 7 * stride] = 0; - } - } - } - } - float class_loss = pow(mag_array(classification_lost, l.outputs * l.batch), 2); - free(classification_lost); - - - float* except_uncertainty_lost = (float *)calloc(l.batch * l.outputs, sizeof(float)); - memcpy(except_uncertainty_lost, l.delta, l.batch * l.outputs * sizeof(float)); - for (b = 0; b < l.batch; ++b) { - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int box_index = entry_gaussian_index(l, b, n*l.w*l.h + j*l.w + i, 0); - except_uncertainty_lost[box_index + 4 * stride] = 0; - except_uncertainty_lost[box_index + 5 * stride] = 0; - except_uncertainty_lost[box_index + 6 * stride] = 0; - except_uncertainty_lost[box_index + 7 * stride] = 0; - } - } - } - } - float except_uc_loss = pow(mag_array(except_uncertainty_lost, l.outputs * l.batch), 2); - free(except_uncertainty_lost); - - *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - - float loss = pow(mag_array(l.delta, l.outputs * l.batch), 2); - float uc_loss = loss - except_uc_loss; - float iou_loss = except_uc_loss - class_loss; - - loss /= l.batch; - class_loss /= l.batch; - uc_loss /= l.batch; - iou_loss /= l.batch; - - fprintf(stderr, "Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d, class_loss = %.2f, iou_loss = %.2f, uc_loss = %.2f, total_loss = %.2f \n", - state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count, - class_loss, iou_loss, uc_loss, loss); -} - -void backward_gaussian_yolo_layer(const layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); -} - -void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) -{ - int i; - int new_w=0; - int new_h=0; - if (letter) { - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - } - else { - new_w = netw; - new_h = neth; - } - /* - if (((float)netw/w) < ((float)neth/h)) { - new_w = netw; - new_h = (h * netw)/w; - } else { - new_h = neth; - new_w = (w * neth)/h; - } - */ - for (i = 0; i < n; ++i){ - box b = dets[i].bbox; - b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); - b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); - b.w *= (float)netw/new_w; - b.h *= (float)neth/new_h; - if(!relative){ - b.x *= w; - b.w *= w; - b.y *= h; - b.h *= h; - } - dets[i].bbox = b; - } -} - -int gaussian_yolo_num_detections(layer l, float thresh) -{ - int i, n; - int count = 0; - for (i = 0; i < l.w*l.h; ++i){ - for(n = 0; n < l.n; ++n){ - int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); - if(l.output[obj_index] > thresh){ - ++count; - } - } - } - return count; -} - -/* -void avg_flipped_gaussian_yolo(layer l) -{ - int i,j,n,z; - float *flip = l.output + l.outputs; - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w/2; ++i) { - for (n = 0; n < l.n; ++n) { - for(z = 0; z < l.classes + 8 + 1; ++z){ - int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; - int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); - float swap = flip[i1]; - flip[i1] = flip[i2]; - flip[i2] = swap; - if(z == 0){ - flip[i1] = -flip[i1]; - flip[i2] = -flip[i2]; - } - } - } - } - } - for(i = 0; i < l.outputs; ++i){ - l.output[i] = (l.output[i] + flip[i])/2.; - } -} -*/ - -int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter) -{ - int i,j,n; - float *predictions = l.output; - //if (l.batch == 2) avg_flipped_gaussian_yolo(l); - int count = 0; - for (i = 0; i < l.w*l.h; ++i){ - int row = i / l.w; - int col = i % l.w; - for(n = 0; n < l.n; ++n){ - int obj_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 8); - float objectness = predictions[obj_index]; - if (objectness <= thresh) continue; // incorrect behavior for Nan values - - if (objectness > thresh) { - int box_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 0); - dets[count].bbox = get_gaussian_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h, l.yolo_point); - dets[count].objectness = objectness; - dets[count].classes = l.classes; - - dets[count].uc[0] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 1)]; // tx uncertainty - dets[count].uc[1] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 3)]; // ty uncertainty - dets[count].uc[2] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 5)]; // tw uncertainty - dets[count].uc[3] = predictions[entry_gaussian_index(l, 0, n*l.w*l.h + i, 7)]; // th uncertainty - - dets[count].points = l.yolo_point; - //if (l.yolo_point != YOLO_CENTER) dets[count].objectness = objectness = 0; - - for (j = 0; j < l.classes; ++j) { - int class_index = entry_gaussian_index(l, 0, n*l.w*l.h + i, 9 + j); - float uc_aver = (dets[count].uc[0] + dets[count].uc[1] + dets[count].uc[2] + dets[count].uc[3]) / 4.0; - float prob = objectness*predictions[class_index] * (1.0 - uc_aver); - dets[count].prob[j] = (prob > thresh) ? prob : 0; - } - ++count; - } - } - } - correct_gaussian_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); - return count; -} - -#ifdef GPU - -void forward_gaussian_yolo_layer_gpu(const layer l, network_state state) -{ - copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); - int b, n; - for (b = 0; b < l.batch; ++b) - { - for(n = 0; n < l.n; ++n) - { - // x : mu, sigma - int index = entry_gaussian_index(l, b, n*l.w*l.h, 0); - activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); - scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale x - // y : mu, sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 2); - activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); - scal_add_ongpu(l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale y - // w : sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 5); - activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); - // h : sigma - index = entry_gaussian_index(l, b, n*l.w*l.h, 7); - activate_array_ongpu(l.output_gpu + index, l.w*l.h, LOGISTIC); - // objectness & class - index = entry_gaussian_index(l, b, n*l.w*l.h, 8); - activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); - } - } - - if (!state.train || l.onlyforward) { - //cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - cuda_pull_array_async(l.output_gpu, l.output, l.batch*l.outputs); - CHECK_CUDA(cudaPeekAtLastError()); - return; - } - - float *in_cpu = (float *)calloc(l.batch*l.inputs, sizeof(float)); - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - memcpy(in_cpu, l.output, l.batch*l.outputs * sizeof(float)); - float *truth_cpu = 0; - if (state.truth) { - int num_truth = l.batch*l.truths; - truth_cpu = (float *)calloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - network_state cpu_state = state; - cpu_state.net = state.net; - cpu_state.index = state.index; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - forward_gaussian_yolo_layer(l, cpu_state); - //forward_yolo_layer(l, state); - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - free(in_cpu); - if (cpu_state.truth) free(cpu_state.truth); -} - -void backward_gaussian_yolo_layer_gpu(const layer l, network_state state) -{ - axpy_ongpu(l.batch*l.inputs, l.delta_normalizer, l.delta_gpu, 1, state.delta, 1); -} -#endif diff --git a/src/Detector/darknet/src/gaussian_yolo_layer.h b/src/Detector/darknet/src/gaussian_yolo_layer.h deleted file mode 100644 index 9080881dc..000000000 --- a/src/Detector/darknet/src/gaussian_yolo_layer.h +++ /dev/null @@ -1,22 +0,0 @@ -//Gaussian YOLOv3 implementation -#ifndef GAUSSIAN_YOLO_LAYER_H -#define GAUSSIAN_YOLO_LAYER_H - -#include "darknet.h" -#include "layer.h" -#include "network.h" - -layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes); -void forward_gaussian_yolo_layer(const layer l, network_state state); -void backward_gaussian_yolo_layer(const layer l, network_state state); -void resize_gaussian_yolo_layer(layer *l, int w, int h); -int gaussian_yolo_num_detections(layer l, float thresh); -int get_gaussian_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter); -void correct_gaussian_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter); - -#ifdef GPU -void forward_gaussian_yolo_layer_gpu(const layer l, network_state state); -void backward_gaussian_yolo_layer_gpu(layer l, network_state state); -#endif - -#endif diff --git a/src/Detector/darknet/src/gemm.c b/src/Detector/darknet/src/gemm.c deleted file mode 100644 index 519751c06..000000000 --- a/src/Detector/darknet/src/gemm.c +++ /dev/null @@ -1,2865 +0,0 @@ -#include "gemm.h" -#include "utils.h" -#include "im2col.h" -#include "dark_cuda.h" -#include -#include -#include -#include -#include -#include -#ifdef _WIN32 -#include -#endif -#if defined(_OPENMP) -#include -#endif - -#define TILE_M 4 // 4 ops -#define TILE_N 16 // AVX2 = 2 ops * 8 floats -#define TILE_K 16 // loop -#ifdef __cplusplus -#define PUT_IN_REGISTER -#else -#define PUT_IN_REGISTER register -#endif - -void gemm_bin(int M, int N, int K, float ALPHA, - char *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - char A_PART = A[i*lda+k]; - if(A_PART){ - for(j = 0; j < N; ++j){ - C[i*ldc+j] += B[k*ldb+j]; - } - } else { - for(j = 0; j < N; ++j){ - C[i*ldc+j] -= B[k*ldb+j]; - } - } - } - } -} - -float *random_matrix(int rows, int cols) -{ - int i; - float* m = (float*)xcalloc(rows * cols, sizeof(float)); - for(i = 0; i < rows*cols; ++i){ - m[i] = (float)rand()/RAND_MAX; - } - return m; -} - -void time_random_matrix(int TA, int TB, int m, int k, int n) -{ - float *a; - if(!TA) a = random_matrix(m,k); - else a = random_matrix(k,m); - int lda = (!TA)?k:m; - float *b; - if(!TB) b = random_matrix(k,n); - else b = random_matrix(n,k); - int ldb = (!TB)?n:k; - - float *c = random_matrix(m,n); - int i; - clock_t start = clock(), end; - for(i = 0; i<10; ++i){ - gemm_cpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); - } - end = clock(); - printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); - free(a); - free(b); - free(c); -} - - -void gemm(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - gemm_cpu( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); -} - - -//-------------------------------------------- -// XNOR bitwise GEMM for binary neural network -//-------------------------------------------- - - -static inline unsigned char xnor(unsigned char a, unsigned char b) { - //return a == b; - return !(a^b); -} - -// INT-32 -static inline uint32_t get_bit_int32(uint32_t const*const src, size_t index) { - size_t src_i = index / 32; - int src_shift = index % 32; - unsigned char val = (src[src_i] & (1 << src_shift)) > 0; - return val; -} - -static inline uint32_t xnor_int32(uint32_t a, uint32_t b) { - return ~(a^b); -} - -static inline uint64_t xnor_int64(uint64_t a, uint64_t b) { - return ~(a^b); -} - - -static inline uint32_t fill_bit_int32(char src) { - if (src == 0) return 0x00000000; - else return 0xFFFFFFFF; -} - -static inline uint64_t fill_bit_int64(char src) { - if (src == 0) return 0x0000000000000000; - else return 0xFFFFFFFFFFFFFFFF; -} - -void binary_int32_printf(uint32_t src) { - int i; - for (i = 0; i < 32; ++i) { - if (src & 1) printf("1"); - else printf("0"); - src = src >> 1; - } - printf("\n"); -} - -void binary_int64_printf(uint64_t src) { - int i; - for (i = 0; i < 64; ++i) { - if (src & 1) printf("1"); - else printf("0"); - src = src >> 1; - } - printf("\n"); -} - -/* -void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int *count_arr = xcalloc(M*N, sizeof(int)); - - int i, j, k; - for (i = 0; i < M; ++i) { // l.n - filters [16 - 55 - 1024] - for (k = 0; k < K; ++k) { // l.size*l.size*l.c - one filter size [27 - 9216] - char a_bit = get_bit(A, i*lda + k); - - for (j = 0; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - char b_bit = get_bit(B, k*ldb + j); - count_arr[i*ldc + j] += xnor(a_bit, b_bit); - } - } - } - - for (i = 0; i < M; ++i) { - float mean_val = mean_arr[i]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] = (2 * count_arr[i*ldc + j] - K) * mean_val; - } - } - free(count_arr); -} -*/ - -/* -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int *count_arr = xcalloc(M*N, sizeof(int)); - - int i, j, k; - for (i = 0; i < M; ++i) { // l.n - filters [16 - 55 - 1024] - for (j = 0; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - for (k = 0; k < K; ++k) { // l.size*l.size*l.c - one filter size [27 - 9216] - char a_bit = get_bit(A, i*lda + k); - char b_bit = get_bit(B, j*ldb + k); - count_arr[i*ldc + j] += xnor(a_bit, b_bit); - } - } - } - - for (i = 0; i < M; ++i) { - float mean_val = mean_arr[i]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] = (2 * count_arr[i*ldc + j] - K) * mean_val; - } - } - free(count_arr); -} -*/ - -/* -void gemm_nn_custom_bin_mean(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int *count_arr = xcalloc(M*N, sizeof(int)); - - int i; - -#pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - filters [16 - 55 - 1024] - int j, k, h; - for (k = 0; k < K; ++k) { // l.size*l.size*l.c - one filter size [27 - 9216] - const char a_bit = get_bit(A, i*lda + k); - uint64_t a_bit64 = fill_bit_int64(a_bit); - int k_ldb = k*ldb; - - for (j = 0; j < N; j += 64) { // out_h*out_w - one channel output size [169 - 173056] - if ((N - j > 64) && (k_ldb % 8 == 0)) { - uint64_t b_bit64 = *((uint64_t *)(B + (k_ldb + j) / 8)); - uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - //printf("\n %d \n",__builtin_popcountll(c_bit64)); // gcc - printf("\n %d \n", __popcnt64(c_bit64)); // msvs - - int h; - for (h = 0; h < 64; ++h) - if ((c_bit64 >> h) & 1) count_arr[i*ldc + j + h] += 1; - - //binary_int64_printf(a_bit64); - //binary_int64_printf(b_bit64); - //binary_int64_printf(c_bit64); - } - else { - for (; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - char b_bit = get_bit(B, k_ldb + j); - if (xnor(a_bit, b_bit)) count_arr[i*ldc + j] += 1; - } - } - - } - } - } - - if (mean_arr) { - //int K_2 = K / 2; - for (i = 0; i < M; ++i) { - float mean_val = mean_arr[i]; - //float mean_val2 = 2 * mean_val; - for (j = 0; j < N; ++j) { - C[i*ldc + j] = (2 * count_arr[i*ldc + j] - K) * mean_val; - //C[i*ldc + j] = (count_arr[i*ldc + j] - K_2) *mean_val2; - } - } - } - else { - for (i = 0; i < M; ++i) { - for (j = 0; j < N; ++j) { - C[i*ldc + j] = count_arr[i*ldc + j] - K / 2; - } - } - } - - free(count_arr); - - //getchar(); -} -*/ - - -/* -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - -#pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - filters [16 - 55 - 1024] - int j, k, h; - float mean_val = mean_arr[i]; - - for (j = 0; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - int count = 0; - - for (k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - uint64_t a_bit64 = *((uint64_t *)(A + (i*lda + k) / 8)); - uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); - uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - -#ifdef WIN32 - int tmp_count = __popcnt64(c_bit64); -#else - int tmp_count = __builtin_popcountll(c_bit64); -#endif - - if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits - count += tmp_count; - //binary_int64_printf(c_bit64); - //printf(", count = %d \n\n", tmp_count); - } - - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } -} -*/ - -//---------------------------- - -// is not used -/* -void transpose_32x32_bits_my(uint32_t *A, uint32_t *B, int lda, int ldb) -{ - unsigned int x, y; - for (y = 0; y < 32; ++y) { - for (x = 0; x < 32; ++x) { - if (A[y * lda] & ((uint32_t)1 << x)) B[x * ldb] |= (uint32_t)1 << y; - } - } -} -*/ - -#ifndef GPU -uint8_t reverse_8_bit(uint8_t a) { - return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; -} - -uint32_t reverse_32_bit(uint32_t a) -{ - // unsigned int __rbit(unsigned int val) // for ARM //__asm__("rbit %0, %1\n" : "=r"(output) : "r"(input)); - return (reverse_8_bit(a >> 24) << 0) | - (reverse_8_bit(a >> 16) << 8) | - (reverse_8_bit(a >> 8) << 16) | - (reverse_8_bit(a >> 0) << 24); -} - -#define swap(a0, a1, j, m) t = (a0 ^ (a1 >>j)) & m; a0 = a0 ^ t; a1 = a1 ^ (t << j); - -void transpose32_optimized(uint32_t A[32]) { - int j, k; - unsigned m, t; - - //m = 0x0000FFFF; - //for (j = 16; j != 0; j = j >> 1, m = m ^ (m << j)) { - // for (k = 0; k < 32; k = (k + j + 1) & ~j) { - // t = (A[k] ^ (A[k + j] >> j)) & m; - // A[k] = A[k] ^ t; - // A[k + j] = A[k + j] ^ (t << j); - // } - //} - - j = 16; - m = 0x0000FFFF; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 8; - m = 0x00ff00ff; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 4; - m = 0x0f0f0f0f; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 2; - m = 0x33333333; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 1; - m = 0x55555555; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - // reverse Y - for (j = 0; j < 16; ++j) { - uint32_t tmp = A[j]; - A[j] = reverse_32_bit(A[31 - j]); - A[31 - j] = reverse_32_bit(tmp); - } -} - -void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n) -{ - unsigned A_tmp[32]; - int i; - #pragma unroll - for (i = 0; i < 32; ++i) A_tmp[i] = A[i * m]; - transpose32_optimized(A_tmp); - #pragma unroll - for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i]; -} - - -void transpose_8x8_bits_my(unsigned char *A, unsigned char *B, int lda, int ldb) -{ - unsigned x, y; - for (y = 0; y < 8; ++y) { - for (x = 0; x < 8; ++x) { - if (A[y * lda] & (1 << x)) B[x * ldb] |= 1 << y; - } - } -} - -unsigned char reverse_byte_1(char a) -{ - return ((a & 0x1) << 7) | ((a & 0x2) << 5) | - ((a & 0x4) << 3) | ((a & 0x8) << 1) | - ((a & 0x10) >> 1) | ((a & 0x20) >> 3) | - ((a & 0x40) >> 5) | ((a & 0x80) >> 7); -} - -unsigned char reverse_byte(unsigned char a) -{ - return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; -} - -static unsigned char lookup[16] = { - 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, - 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, }; - -unsigned char reverse_byte_3(unsigned char n) { - // Reverse the top and bottom nibble then swap them. - return (lookup[n & 0b1111] << 4) | lookup[n >> 4]; -} - - -void transpose8rS32_reversed_diagonale(unsigned char* A, unsigned char* B, int m, int n) -{ - unsigned x, y, t; - - x = y = 0; - // Load the array and pack it into x and y. - //x = (A[0] << 24) | (A[m] << 16) | (A[2 * m] << 8) | A[3 * m]; - //y = (A[4 * m] << 24) | (A[5 * m] << 16) | (A[6 * m] << 8) | A[7 * m]; - - t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7); - t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7); - - t = (x ^ (x >> 14)) & 0x0000CCCC; x = x ^ t ^ (t << 14); - t = (y ^ (y >> 14)) & 0x0000CCCC; y = y ^ t ^ (t << 14); - - t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F); - y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F); - x = t; - - B[7 * n] = reverse_byte(x >> 24); B[6 * n] = reverse_byte(x >> 16); B[5 * n] = reverse_byte(x >> 8); B[4 * n] = reverse_byte(x); - B[3 * n] = reverse_byte(y >> 24); B[2 * n] = reverse_byte(y >> 16); B[1 * n] = reverse_byte(y >> 8); B[0 * n] = reverse_byte(y); -} - -/* -// transpose by 8-bit -void transpose_bin(char *A, char *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - //printf("\n n = %d, ldb = %d \t\t m = %d, lda = %d \n", n, ldb, m, lda); - int i; - #pragma omp parallel for - for (i = 0; i < n; i += 8) { - int j; - for (j = 0; j < m; j += 8) { - int a_index = i*lda + j; - int b_index = j*ldb + i; - //transpose_8x8_bits_my(&A[a_index/8], &B[b_index/8], lda/8, ldb/8); - transpose8rS32_reversed_diagonale(&A[a_index / 8], &B[b_index / 8], lda / 8, ldb / 8); - } - for (; j < m; ++j) { - if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i); - } - } -} -*/ - -#endif - -// transpose by 32-bit -void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - //printf("\n n = %d (n mod 32 = %d), m = %d (m mod 32 = %d) \n", n, n % 32, m, m % 32); - //printf("\n lda = %d (lda mod 32 = %d), ldb = %d (ldb mod 32 = %d) \n", lda, lda % 32, ldb, ldb % 32); - int i; - #pragma omp parallel for - for (i = 0; i < n; i += 32) { - int j; - for (j = 0; j < m; j += 32) { - int a_index = i*lda + j; - int b_index = j*ldb + i; - transpose_32x32_bits_reversed_diagonale(&A[a_index / 32], &B[b_index / 32], lda / 32, ldb / 32); - //transpose_32x32_bits_my(&A[a_index/32], &B[b_index/32], lda/32, ldb/32); - } - for (; j < m; ++j) { - if (get_bit((const unsigned char* const)A, i * lda + j)) set_bit((unsigned char* const)B, j * ldb + i); - } - } -} - -static inline int popcnt_32(uint32_t val32) { -#ifdef WIN32 // Windows MSVS - int tmp_count = __popcnt(val32); -#else // Linux GCC - int tmp_count = __builtin_popcount(val32); -#endif - return tmp_count; -} -//---------------------------- - -#if (defined(__AVX__) && defined(__x86_64__)) || (defined(_WIN64) && !defined(__MINGW32__)) - -#if (defined(_WIN64) && !defined(__MINGW64__)) -#include -#include -#include -#include - -#if defined(_MSC_VER) && _MSC_VER <= 1900 -static inline __int32 _mm256_extract_epi64(__m256i a, const int index) { - return a.m256i_i64[index]; -} - -static inline __int32 _mm256_extract_epi32(__m256i a, const int index) { - return a.m256i_i32[index]; -} -#endif - -static inline float _dn_castu32_f32(uint32_t a) { - return *((float *)&a); -} - -static inline float _mm256_extract_float32(__m256 a, const int index) { - return a.m256_f32[index]; -} - -#else // Linux GCC/Clang -#include -#include -#include -#include -#include - -static inline float _dn_castu32_f32(uint32_t a) { - return *((float *)&a); -} - -static inline float _mm256_extract_float32(__m256 a, const int index) { - switch(index) { - case 0: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0)); - case 1: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 1)); - case 2: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 2)); - case 3: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 3)); - case 4: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 4)); - case 5: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 5)); - case 6: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 6)); - case 7: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 7)); - default: - return _dn_castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), 0)); - } -} - -void asm_cpuid(uint32_t* abcd, uint32_t eax) -{ - uint32_t ebx = 0, edx = 0, ecx = 0; - - // EBX is saved to EDI and later restored - __asm__("movl %%ebx, %%edi;" - "cpuid;" - "xchgl %%ebx, %%edi;" - : "=D"(ebx), - "+a"(eax), "+c"(ecx), "=d"(edx)); - - abcd[0] = eax; - abcd[1] = ebx; - abcd[2] = ecx; - abcd[3] = edx; -} -#endif - - - -#ifdef _WIN32 -// Windows -#define cpuid(info, x) __cpuidex(info, x, 0) -#else -// GCC Intrinsics -void cpuid(int info[4], int InfoType) { - __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]); -} -#endif - - -// Misc. -static int HW_MMX, HW_x64, HW_RDRAND, HW_BMI1, HW_BMI2, HW_ADX, HW_PREFETCHWT1; -static int HW_ABM; // Advanced Bit Manipulation - -// SIMD: 128-bit -static int HW_SSE, HW_SSE2, HW_SSE3, HW_SSSE3, HW_SSE41, HW_SSE42, HW_SSE4a, HW_AES, HW_SHA; - -// SIMD: 256-bit -static int HW_AVX, HW_XOP, HW_FMA3, HW_FMA4, HW_AVX2; - -// SIMD: 512-bit -static int HW_AVX512F; // AVX512 Foundation -static int HW_AVX512CD; // AVX512 Conflict Detection -static int HW_AVX512PF; // AVX512 Prefetch -static int HW_AVX512ER; // AVX512 Exponential + Reciprocal -static int HW_AVX512VL; // AVX512 Vector Length Extensions -static int HW_AVX512BW; // AVX512 Byte + Word -static int HW_AVX512DQ; // AVX512 Doubleword + Quadword -static int HW_AVX512IFMA; // AVX512 Integer 52-bit Fused Multiply-Add -static int HW_AVX512VBMI; // AVX512 Vector Byte Manipulation Instructions - -// https://stackoverflow.com/questions/6121792/how-to-check-if-a-cpu-supports-the-sse3-instruction-set -void check_cpu_features(void) { - int info[4]; - cpuid(info, 0); - int nIds = info[0]; - - cpuid(info, 0x80000000); - unsigned nExIds = info[0]; - - // Detect Features - if (nIds >= 0x00000001) { - cpuid(info, 0x00000001); - HW_MMX = (info[3] & ((uint32_t)1 << 23)) != 0; - HW_SSE = (info[3] & ((uint32_t)1 << 25)) != 0; - HW_SSE2 = (info[3] & ((uint32_t)1 << 26)) != 0; - HW_SSE3 = (info[2] & ((uint32_t)1 << 0)) != 0; - - HW_SSSE3 = (info[2] & ((uint32_t)1 << 9)) != 0; - HW_SSE41 = (info[2] & ((uint32_t)1 << 19)) != 0; - HW_SSE42 = (info[2] & ((uint32_t)1 << 20)) != 0; - HW_AES = (info[2] & ((uint32_t)1 << 25)) != 0; - - HW_AVX = (info[2] & ((uint32_t)1 << 28)) != 0; - HW_FMA3 = (info[2] & ((uint32_t)1 << 12)) != 0; - - HW_RDRAND = (info[2] & ((uint32_t)1 << 30)) != 0; - } - if (nIds >= 0x00000007) { - cpuid(info, 0x00000007); - HW_AVX2 = (info[1] & ((uint32_t)1 << 5)) != 0; - - HW_BMI1 = (info[1] & ((uint32_t)1 << 3)) != 0; - HW_BMI2 = (info[1] & ((uint32_t)1 << 8)) != 0; - HW_ADX = (info[1] & ((uint32_t)1 << 19)) != 0; - HW_SHA = (info[1] & ((uint32_t)1 << 29)) != 0; - HW_PREFETCHWT1 = (info[2] & ((uint32_t)1 << 0)) != 0; - - HW_AVX512F = (info[1] & ((uint32_t)1 << 16)) != 0; - HW_AVX512CD = (info[1] & ((uint32_t)1 << 28)) != 0; - HW_AVX512PF = (info[1] & ((uint32_t)1 << 26)) != 0; - HW_AVX512ER = (info[1] & ((uint32_t)1 << 27)) != 0; - HW_AVX512VL = (info[1] & ((uint32_t)1 << 31)) != 0; - HW_AVX512BW = (info[1] & ((uint32_t)1 << 30)) != 0; - HW_AVX512DQ = (info[1] & ((uint32_t)1 << 17)) != 0; - HW_AVX512IFMA = (info[1] & ((uint32_t)1 << 21)) != 0; - HW_AVX512VBMI = (info[2] & ((uint32_t)1 << 1)) != 0; - } - if (nExIds >= 0x80000001) { - cpuid(info, 0x80000001); - HW_x64 = (info[3] & ((uint32_t)1 << 29)) != 0; - HW_ABM = (info[2] & ((uint32_t)1 << 5)) != 0; - HW_SSE4a = (info[2] & ((uint32_t)1 << 6)) != 0; - HW_FMA4 = (info[2] & ((uint32_t)1 << 16)) != 0; - HW_XOP = (info[2] & ((uint32_t)1 << 11)) != 0; - } -} - -int is_avx() { - static int result = -1; - if (result == -1) { - check_cpu_features(); - result = HW_AVX; - if (result == 1) printf(" Used AVX \n"); - else printf(" Not used AVX \n"); - } - return result; -} - -int is_fma_avx2() { - static int result = -1; - if (result == -1) { - check_cpu_features(); - result = HW_FMA3 && HW_AVX2; - if (result == 1) printf(" Used FMA & AVX2 \n"); - else printf(" Not used FMA & AVX2 \n"); - } - return result; -} - -// https://software.intel.com/sites/landingpage/IntrinsicsGuide -void gemm_nn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i, j, k; - if (is_avx() == 1) { // AVX - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - float A_PART = ALPHA*A[i*lda + k]; - __m256 a256, b256, c256, result256; // AVX - a256 = _mm256_set1_ps(A_PART); - for (j = 0; j < N - 8; j += 8) { - b256 = _mm256_loadu_ps(&B[k*ldb + j]); - c256 = _mm256_loadu_ps(&C[i*ldc + j]); - // FMA - Intel Haswell (2013), AMD Piledriver (2012) - //result256 = _mm256_fmadd_ps(a256, b256, c256); - result256 = _mm256_mul_ps(a256, b256); - result256 = _mm256_add_ps(result256, c256); - _mm256_storeu_ps(&C[i*ldc + j], result256); - } - - int prev_end = (N % 8 == 0) ? (N - 8) : (N / 8) * 8; - for (j = prev_end; j < N; ++j) - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } - else { - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - /* // SSE - __m128 a128, b128, c128, result128; // SSE - a128 = _mm_set1_ps(A_PART); - for (j = 0; j < N - 4; j += 4) { - b128 = _mm_loadu_ps(&B[k*ldb + j]); - c128 = _mm_loadu_ps(&C[i*ldc + j]); - //result128 = _mm_fmadd_ps(a128, b128, c128); - result128 = _mm_mul_ps(a128, b128); - result128 = _mm_add_ps(result128, c128); - _mm_storeu_ps(&C[i*ldc + j], result128); - } - - int prev_end = (N % 4 == 0) ? (N - 4) : (N / 4) * 4; - for (j = prev_end; j < N; ++j){ - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - */ - } - } - } -} - - - -void gemm_nn_fast(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i; - - #pragma omp parallel for - for (i = 0; i < (M / TILE_M)*TILE_M; i += TILE_M) - { - int j, k; - int i_d, k_d; - - for (k = 0; k < (K / TILE_K)*TILE_K; k += TILE_K) - { - for (j = 0; j < (N / TILE_N)*TILE_N; j += TILE_N) - { - // L1 - 6 bits tag [11:6] - cache size 32 KB, conflict for each 4 KB - // L2 - 9 bits tag [14:6] - cache size 256 KB, conflict for each 32 KB - // L3 - 13 bits tag [18:6] - cache size 8 MB, conflict for each 512 KB - - __m256 result256; - __m256 a256_0, b256_0; // AVX - __m256 a256_1, b256_1; // AVX - __m256 a256_2;// , b256_2; // AVX - __m256 a256_3;// , b256_3; // AVX - __m256 c256_0, c256_1, c256_2, c256_3; - __m256 c256_4, c256_5, c256_6, c256_7; - - c256_0 = _mm256_loadu_ps(&C[(0 + i)*ldc + (0 + j)]); - c256_1 = _mm256_loadu_ps(&C[(1 + i)*ldc + (0 + j)]); - c256_2 = _mm256_loadu_ps(&C[(0 + i)*ldc + (8 + j)]); - c256_3 = _mm256_loadu_ps(&C[(1 + i)*ldc + (8 + j)]); - - c256_4 = _mm256_loadu_ps(&C[(2 + i)*ldc + (0 + j)]); - c256_5 = _mm256_loadu_ps(&C[(3 + i)*ldc + (0 + j)]); - c256_6 = _mm256_loadu_ps(&C[(2 + i)*ldc + (8 + j)]); - c256_7 = _mm256_loadu_ps(&C[(3 + i)*ldc + (8 + j)]); - - - for (k_d = 0; k_d < (TILE_K); ++k_d) - { - a256_0 = _mm256_set1_ps(ALPHA*A[(0 + i)*lda + (k_d + k)]); - a256_1 = _mm256_set1_ps(ALPHA*A[(1 + i)*lda + (k_d + k)]); - - a256_2 = _mm256_set1_ps(ALPHA*A[(2 + i)*lda + (k_d + k)]); - a256_3 = _mm256_set1_ps(ALPHA*A[(3 + i)*lda + (k_d + k)]); - - - b256_0 = _mm256_loadu_ps(&B[(k_d + k)*ldb + (0 + j)]); - b256_1 = _mm256_loadu_ps(&B[(k_d + k)*ldb + (8 + j)]); - - // FMA - Intel Haswell (2013), AMD Piledriver (2012) - //c256_0 = _mm256_fmadd_ps(a256_0, b256_0, c256_0); - //c256_1 = _mm256_fmadd_ps(a256_1, b256_0, c256_1); - //c256_2 = _mm256_fmadd_ps(a256_0, b256_1, c256_2); - //c256_3 = _mm256_fmadd_ps(a256_1, b256_1, c256_3); - - //c256_4 = _mm256_fmadd_ps(a256_2, b256_0, c256_4); - //c256_5 = _mm256_fmadd_ps(a256_3, b256_0, c256_5); - //c256_6 = _mm256_fmadd_ps(a256_2, b256_1, c256_6); - //c256_7 = _mm256_fmadd_ps(a256_3, b256_1, c256_7); - - result256 = _mm256_mul_ps(a256_0, b256_0); - c256_0 = _mm256_add_ps(result256, c256_0); - - result256 = _mm256_mul_ps(a256_1, b256_0); - c256_1 = _mm256_add_ps(result256, c256_1); - - result256 = _mm256_mul_ps(a256_0, b256_1); - c256_2 = _mm256_add_ps(result256, c256_2); - - result256 = _mm256_mul_ps(a256_1, b256_1); - c256_3 = _mm256_add_ps(result256, c256_3); - - - result256 = _mm256_mul_ps(a256_2, b256_0); - c256_4 = _mm256_add_ps(result256, c256_4); - - result256 = _mm256_mul_ps(a256_3, b256_0); - c256_5 = _mm256_add_ps(result256, c256_5); - - result256 = _mm256_mul_ps(a256_2, b256_1); - c256_6 = _mm256_add_ps(result256, c256_6); - - result256 = _mm256_mul_ps(a256_3, b256_1); - c256_7 = _mm256_add_ps(result256, c256_7); - } - _mm256_storeu_ps(&C[(0 + i)*ldc + (0 + j)], c256_0); - _mm256_storeu_ps(&C[(1 + i)*ldc + (0 + j)], c256_1); - _mm256_storeu_ps(&C[(0 + i)*ldc + (8 + j)], c256_2); - _mm256_storeu_ps(&C[(1 + i)*ldc + (8 + j)], c256_3); - - _mm256_storeu_ps(&C[(2 + i)*ldc + (0 + j)], c256_4); - _mm256_storeu_ps(&C[(3 + i)*ldc + (0 + j)], c256_5); - _mm256_storeu_ps(&C[(2 + i)*ldc + (8 + j)], c256_6); - _mm256_storeu_ps(&C[(3 + i)*ldc + (8 + j)], c256_7); - } - - for (j = (N / TILE_N)*TILE_N; j < N; ++j) { - for (i_d = i; i_d < (i + TILE_M); ++i_d) - { - for (k_d = k; k_d < (k + TILE_K); ++k_d) - { - PUT_IN_REGISTER float A_PART = ALPHA*A[i_d*lda + k_d]; - C[i_d*ldc + j] += A_PART*B[k_d*ldb + j]; - } - } - } - } - - for (k = (K / TILE_K)*TILE_K; k < K; ++k) - { - for (i_d = i; i_d < (i + TILE_M); ++i_d) - { - PUT_IN_REGISTER float A_PART = ALPHA*A[i_d*lda + k]; - for (j = 0; j < N; ++j) { - C[i_d*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } - } - - for (i = (M / TILE_M)*TILE_M; i < M; ++i) { - int j, k; - for (k = 0; k < K; ++k) { - PUT_IN_REGISTER float A_PART = ALPHA*A[i*lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } -} - - - -void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, - uint32_t *A, int lda, - uint32_t *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - #pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - int j, s; - float mean_val = mean_arr[i]; - //printf(" l.mean_arr[i] = %d \n ", l.mean_arr[i]); - for (s = 0; s < K; ++s) // l.size*l.size*l.c/32 or (l.size*l.size*l.c) - { - PUT_IN_REGISTER uint32_t A_PART = A[i*lda + s]; - __m256i a256 = _mm256_set1_epi32(A_PART); - - for (j = 0; j < N - 8; j += 8) - { - __m256i b256 = *((__m256i*)&B[s*ldb + j]); - __m256i xor256 = _mm256_xor_si256(a256, b256); // xnor = xor(a,b) - __m256i all_1 = _mm256_set1_epi8((char)255); - __m256i xnor256 = _mm256_andnot_si256(xor256, all_1); // xnor = not(xor(a,b)) - - // waiting for - CPUID Flags: AVX512VPOPCNTDQ: __m512i _mm512_popcnt_epi32(__m512i a) - __m256 count = _mm256_setr_ps( - popcnt_32(_mm256_extract_epi32(xnor256, 0)), - popcnt_32(_mm256_extract_epi32(xnor256, 1)), - popcnt_32(_mm256_extract_epi32(xnor256, 2)), - popcnt_32(_mm256_extract_epi32(xnor256, 3)), - popcnt_32(_mm256_extract_epi32(xnor256, 4)), - popcnt_32(_mm256_extract_epi32(xnor256, 5)), - popcnt_32(_mm256_extract_epi32(xnor256, 6)), - popcnt_32(_mm256_extract_epi32(xnor256, 7))); - - __m256 val2 = _mm256_set1_ps(2); - count = _mm256_mul_ps(count, val2); // count * 2 - - __m256 val32 = _mm256_set1_ps(32); - count = _mm256_sub_ps(count, val32); // count - 32 - - __m256 mean256 = _mm256_set1_ps(mean_val); - count = _mm256_mul_ps(count, mean256); // count * mean_val - - __m256 c256 = *((__m256*)&C[i*ldc + j]); - count = _mm256_add_ps(count, c256); // c = c + count - *((__m256*)&C[i*ldc + j]) = count; - } - - for (; j < N; ++j) // out_h*out_w; - { - PUT_IN_REGISTER uint32_t B_PART = B[s*ldb + j]; - uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int - - C[i*ldc + j] += (2 * count - 32) * mean_val; - } - } - } -} - -void convolution_2d_old(int w, int h, int ksize, int n, int c, int pad, int stride, - float *weights, float *input, float *output) -{ - //const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 - //const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 - - int fil; - // filter index - #pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP - for (fil = 0; fil < n; ++fil) { - //int i, f, j; - int chan, y, x, f_y, f_x; - // channel index - for (chan = 0; chan < c; ++chan) - // input - y - for (y = 0; y < h; ++y) - // input - x - for (x = 0; x < w; ++x) - { - int const output_index = fil*w*h + y*w + x; - int const weights_pre_index = fil*c*ksize*ksize + chan*ksize*ksize; - int const input_pre_index = chan*w*h; - float sum = 0; - - // filter - y - for (f_y = 0; f_y < ksize; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < ksize; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= h || input_x >= w) continue; - - int input_index = input_pre_index + input_y*w + input_x; - int weights_index = weights_pre_index + f_y*ksize + f_x; - - sum += input[input_index] * weights[weights_index]; - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - output[output_index] += sum; - } - } -} - -void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, - float *weights, float *input, float *output, float *mean) -{ - //const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 - //const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 - int i; - -#if defined(_OPENMP) - static int max_num_threads = 0; - if (max_num_threads == 0) { - max_num_threads = omp_get_max_threads(); - //omp_set_num_threads( max_num_threads / 2); - } -#endif - - //convolution_2d_old(w, h, ksize, n, c, pad, stride, weights, input, output); - - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - for (i = 0; i < ksize*ksize*n*c; i+=8) { - *((__m256*)&weights[i]) = _mm256_and_ps(*((__m256*)&weights[i]), _mm256_castsi256_ps(all256_sing1)); - } - - //for (i = 0; i < w*h*c; i += 8) { - //(*(__m256*)&input[i]) = _mm256_and_ps(*((__m256*)&input[i]), _mm256_castsi256_ps(all256_sing1)); - //} - - - //__m256i all256_last_zero = _mm256_set1_epi32(0xFFFFFFFF); - //all256_last_zero.m256i_i32[7] = 0; - __m256i all256_last_zero = - _mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0); - - __m256i idx256 = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); - //__m256 all256_sing1 = _mm256_set1_ps(0x80000000); - __m256 all256_one = _mm256_set1_ps(1); - __m256i all256i_one = _mm256_set1_epi32(1); - - ///__m256i src256 = _mm256_loadu_si256((__m256i *)(&src[i])); - ///__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats - - int fil; - // filter index - #pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP - for (fil = 0; fil < n; ++fil) { - int chan, y, x, f_y, f_x; - float cur_mean = fabs(mean[fil]); - __m256 mean256 = _mm256_set1_ps(cur_mean); - // channel index - //for (chan = 0; chan < c; ++chan) - // input - y - for (y = 0; y < h; ++y) - // input - x - for (x = 0; x < w-8; x+=8) - { - int const output_index = fil*w*h + y*w + x; - float sum = 0; - __m256 sum256 = _mm256_set1_ps(0); - - for (chan = 0; chan < c; ++chan) { - int const weights_pre_index = fil*c*ksize*ksize + chan*ksize*ksize; - int const input_pre_index = chan*w*h; - - - // filter - y - for (f_y = 0; f_y < ksize; ++f_y) - { - int input_y = y + f_y - pad; - //__m256 in = *((__m256*)&input[input_pre_index + input_y*w]); - if (input_y < 0 || input_y >= h) continue; - //__m256 in = _mm256_loadu_ps(&input[input_pre_index + input_y*w + x - pad]); - - // filter - x - for (f_x = 0; f_x < ksize; ++f_x) - { - int input_x = x + f_x - pad; - //if (input_y < 0 || input_x < 0 || input_y >= h || input_x >= w) continue; - - int input_index = input_pre_index + input_y*w + input_x; - int weights_index = weights_pre_index + f_y*ksize + f_x; - //if (input_y < 0 || input_y >= h) continue; - - //sum += input[input_index] * weights[weights_index]; - - __m256 in = *((__m256*)&input[input_index]); - __m256 w = _mm256_set1_ps(weights[weights_index]); - //__m256 w_sign = _mm256_and_ps(w, _mm256_castsi256_ps(all256_sing1)); // check sign in 8 x 32-bit floats - __m256 xor256 = _mm256_xor_ps(w, in); - //printf("\n xor256_1 = %f, xor256_2 = %f \n", xor256.m256_f32[0], xor256.m256_f32[1]); - //printf("\n in = %f, w = %f, xor256 = %f \n", in.m256_f32[0], w_sign.m256_f32[0], xor256.m256_f32[0]); - - //__m256 pn1 = _mm256_and_ps(_mm256_castsi256_ps(all256i_one), xor256); - - - //sum256 = xor256; - sum256 = _mm256_add_ps(xor256, sum256); - //printf("\n --- \n"); - //printf("\n 0 = %f, 1 = %f, 2 = %f, 3 = %f, 4 = %f, 5 = %f, 6 = %f, 7 = %f \n", in.m256_f32[0], in.m256_f32[1], in.m256_f32[2], in.m256_f32[3], in.m256_f32[4], in.m256_f32[5], in.m256_f32[6], in.m256_f32[7]); - - if (f_x < ksize-1) { - //in = _mm256_permutevar8x32_ps(in, idx256); - //in = _mm256_and_ps(in, _mm256_castsi256_ps(all256_last_zero)); - } - } - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - //output[output_index] += sum; - - sum256 = _mm256_mul_ps(sum256, mean256); - //printf("\n cur_mean = %f, sum256 = %f, sum256 = %f, in = %f \n", - // cur_mean, sum256.m256_f32[0], sum256.m256_f32[1], input[input_pre_index]); - - //__m256 out = *((__m256*)&output[output_index]); - //out = _mm256_add_ps(out, sum256); - //(*(__m256*)&output[output_index]) = out; - *((__m256*)&output[output_index]) = sum256; - - //_mm256_storeu_ps(&C[i*ldc + j], result256); - } - } -} - - - -// http://graphics.stanford.edu/~seander/bithacks.html -// https://stackoverflow.com/questions/17354971/fast-counting-the-number-of-set-bits-in-m128i-register -// https://arxiv.org/pdf/1611.07612.pdf - -static inline int popcnt128(__m128i n) { - const __m128i n_hi = _mm_unpackhi_epi64(n, n); -#if defined(_MSC_VER) - return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi)); -#elif defined(__APPLE__) && defined(__clang__) - return _mm_popcnt_u64(_mm_cvtsi128_si64(n)) + _mm_popcnt_u64(_mm_cvtsi128_si64(n_hi)); -#else - return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi)); -#endif -} - -static inline int popcnt256(__m256i n) { - return popcnt128(_mm256_extractf128_si256(n, 0)) + popcnt128(_mm256_extractf128_si256(n, 1)); -} - -static inline __m256i count256(__m256i v) { - __m256i lookup = - _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, - 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, - 1, 2, 2, 3, 2, 3, 3, 4); - - __m256i low_mask = _mm256_set1_epi8(0x0f); - - __m256i lo = _mm256_and_si256(v, low_mask); - __m256i hi = _mm256_and_si256(_mm256_srli_epi32(v, 4), low_mask); - __m256i popcnt1 = _mm256_shuffle_epi8(lookup, lo); - __m256i popcnt2 = _mm256_shuffle_epi8(lookup, hi); - __m256i total = _mm256_add_epi8(popcnt1, popcnt2); - - return _mm256_sad_epu8(total, _mm256_setzero_si256()); -} - -static inline int popcnt256_custom(__m256i n) { - __m256i val = count256(n); - - //return val.m256i_i64[0] + - //val.m256i_i64[1] + - //val.m256i_i64[2] + - //val.m256i_i64[3]; - return _mm256_extract_epi64(val, 0) - + _mm256_extract_epi64(val, 1) - + _mm256_extract_epi64(val, 2) - + _mm256_extract_epi64(val, 3); -} - -static inline void xnor_avx2_popcnt(__m256i a_bit256, __m256i b_bit256, __m256i *count_sum) { - __m256i c_bit256 = _mm256_set1_epi8((char)255); - - __m256i xor256 = _mm256_xor_si256(a_bit256, b_bit256); // xnor = not(xor(a,b)) - c_bit256 = _mm256_andnot_si256(xor256, c_bit256); // can be optimized - we can do other NOT for wegihts once and do not do this NOT - - *count_sum = _mm256_add_epi64(count256(c_bit256), *count_sum); // 1st part - popcnt Mula's algorithm -} - -// 2nd part - popcnt Mula's algorithm -static inline int get_count_mula(__m256i count_sum) { - return _mm256_extract_epi64(count_sum, 0) - + _mm256_extract_epi64(count_sum, 1) - + _mm256_extract_epi64(count_sum, 2) - + _mm256_extract_epi64(count_sum, 3); -} - -// 5x times faster than gemm()-float32 -// further optimizations: do mean-mult only for the last layer -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - -#if defined(_OPENMP) - static int max_num_threads = 0; - if (max_num_threads == 0) { - max_num_threads = omp_get_max_threads(); - //omp_set_num_threads(max_num_threads / 2); - } -#endif - - //#pragma omp parallel for - //for (i = 0; i < M; ++i) - #pragma omp parallel for - for (i = 0; i < (M/2)*2; i += 2) - { // l.n - filters [16 - 55 - 1024] - float mean_val_0 = mean_arr[i + 0]; - float mean_val_1 = mean_arr[i + 1]; - int j, k; - //__m256i all_1 = _mm256_set1_epi8(255); - - //for (j = 0; j < N; ++j) - for (j = 0; j < (N/2)*2; j += 2) - { // out_h*out_w - one channel output size [169 - 173056] - //int count = 0; - const int bit_step = 256; - __m256i count_sum_0 = _mm256_set1_epi8(0); - __m256i count_sum_1 = _mm256_set1_epi8(0); - __m256i count_sum_2 = _mm256_set1_epi8(0); - __m256i count_sum_3 = _mm256_set1_epi8(0); - - for (k = 0; k < K; k += bit_step) { // l.size*l.size*l.c - one filter size [27 - 9216] - - __m256i a_bit256_0 = _mm256_loadu_si256((__m256i *)(A + ((i + 0)*lda + k) / 8)); - __m256i b_bit256_0 = _mm256_loadu_si256((__m256i *)(B + ((j + 0)*ldb + k) / 8)); - - __m256i a_bit256_1 = _mm256_loadu_si256((__m256i *)(A + ((i + 1)*lda + k) / 8)); - __m256i b_bit256_1 = _mm256_loadu_si256((__m256i *)(B + ((j + 1)*ldb + k) / 8)); - - - xnor_avx2_popcnt(a_bit256_0, b_bit256_0, &count_sum_0); - xnor_avx2_popcnt(a_bit256_0, b_bit256_1, &count_sum_1); - - xnor_avx2_popcnt(a_bit256_1, b_bit256_0, &count_sum_2); - xnor_avx2_popcnt(a_bit256_1, b_bit256_1, &count_sum_3); - - //count += popcnt256(c_bit256); - //binary_int64_printf(c_bit64); - //printf(", count = %d \n\n", tmp_count); - } - - int count_0 = get_count_mula(count_sum_0); - int count_1 = get_count_mula(count_sum_1); - int count_2 = get_count_mula(count_sum_2); - int count_3 = get_count_mula(count_sum_3); - - const int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count_0 = count_0 - f1; // remove extra bits (from empty space for align only) - count_1 = count_1 - f1; - count_2 = count_2 - f1; - count_3 = count_3 - f1; - C[i*ldc + (j + 0)] = (2 * count_0 - K) * mean_val_0; - C[i*ldc + (j + 1)] = (2 * count_1 - K) * mean_val_0; - C[(i + 1)*ldc + (j + 0)] = (2 * count_2 - K) * mean_val_1; - C[(i + 1)*ldc + (j + 1)] = (2 * count_3 - K) * mean_val_1; - } - - int i_d; - for (i_d = 0; i_d < 2; ++i_d) - { - float mean_val = mean_arr[i + i_d]; - for (j = (N / 2) * 2; j < N; j += 1) - { // out_h*out_w - one channel output size [169 - 173056] - const int bit_step = 256; - __m256i count_sum = _mm256_set1_epi8(0); - - for (k = 0; k < K; k += bit_step) { // l.size*l.size*l.c - one filter size [27 - 9216] - __m256i a_bit256_0 = _mm256_loadu_si256((__m256i *)(A + ((i + i_d + 0)*lda + k) / 8)); - __m256i b_bit256_0 = _mm256_loadu_si256((__m256i *)(B + ((j + 0)*ldb + k) / 8)); - xnor_avx2_popcnt(a_bit256_0, b_bit256_0, &count_sum); - } - int count = get_count_mula(count_sum); - const int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - C[(i + i_d)*ldc + j] = (2 * count - K) * mean_val; - } - } - } - - for (i = (M / 2) * 2; i < M; i += 1) - { - float mean_val = mean_arr[i]; - int j, k; - for (j = 0; j < N; j += 1) - { // out_h*out_w - one channel output size [169 - 173056] - const int bit_step = 256; - __m256i count_sum = _mm256_set1_epi8(0); - - for (k = 0; k < K; k += bit_step) { // l.size*l.size*l.c - one filter size [27 - 9216] - __m256i a_bit256_0 = _mm256_loadu_si256((__m256i *)(A + ((i + 0)*lda + k) / 8)); - __m256i b_bit256_0 = _mm256_loadu_si256((__m256i *)(B + ((j + 0)*ldb + k) / 8)); - xnor_avx2_popcnt(a_bit256_0, b_bit256_0, &count_sum); - } - int count = get_count_mula(count_sum); - const int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } -} - - - - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom_transpose(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int ldb_align) -{ - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - int c; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1) - { - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 4; w+=8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - data_col[col_index + ldb_align * 0] = _mm256_extract_float32(src256, 0);// src256.m256_f32[0]; - data_col[col_index + ldb_align * 1] = _mm256_extract_float32(src256, 1);// src256.m256_f32[1]; - data_col[col_index + ldb_align * 2] = _mm256_extract_float32(src256, 2);// src256.m256_f32[2]; - data_col[col_index + ldb_align * 3] = _mm256_extract_float32(src256, 3);// src256.m256_f32[3]; - data_col[col_index + ldb_align * 4] = _mm256_extract_float32(src256, 4);// src256.m256_f32[4]; - data_col[col_index + ldb_align * 5] = _mm256_extract_float32(src256, 5);// src256.m256_f32[5]; - data_col[col_index + ldb_align * 6] = _mm256_extract_float32(src256, 6);// src256.m256_f32[6]; - data_col[col_index + ldb_align * 7] = _mm256_extract_float32(src256, 7);// src256.m256_f32[7]; - - //_mm256_storeu_ps(&data_col[col_index], src256); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } - - } - else { - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = 0; h < height_col; ++h) { - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h * stride; - int im_col = w_offset + w * stride; - - int col_index = (h * width_col + w)*ldb_align + c; // transposed & aligned - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } - } -} - - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col) -{ - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1 && is_fma_avx2()) - { - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col-pad; ++h) { - for (w = pad; w < width_col-pad-8; w += 8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - _mm256_storeu_ps(&data_col[col_index], src256); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - w = width_col-1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = height_col-1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } - - } - else { - //printf("\n Error: is no non-optimized version \n"); - im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); - } -} - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom_align(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align) -{ - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1 && is_fma_avx2()) - { - int new_ldb = bit_align; - - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 8; w += 8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - _mm256_storeu_ps(&data_col[col_index], src256); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - } - } - } - - } - else { - printf("\n Error: is no non-optimized version \n"); - //im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); // must be aligned for transpose after float_to_bin - // float_to_bit(b, t_input, src_size); - // transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); - } -} - - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom_bin(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align) -{ - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1 && is_fma_avx2()) - { - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 float_zero256 = _mm256_set1_ps(0.00); - - int new_ldb = bit_align; - - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 8; w += 8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //__m256i src256 = _mm256_loadu_si256((__m256i *)(&data_im[im_col + width*(im_row + height*c_im)])); - //__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats - //uint16_t mask = _mm256_movemask_ps(_mm256_castsi256_ps(result256)); // (val >= 0) ? 0 : 1 - //mask = ~mask; // inverse mask, (val >= 0) ? 1 : 0 - - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS); - uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1 - - uint16_t* dst_ptr = (uint16_t*)&((uint8_t*)data_col)[col_index / 8]; - *dst_ptr |= (mask << (col_index % 8)); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - float val = data_im[im_col + width*(im_row + height*c_im)]; - if (val > 0) set_bit((unsigned char* const)data_col, col_index); - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char* const)data_col, col_index); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char* const)data_col, col_index); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char* const)data_col, col_index); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char* const)data_col, col_index); - } - } - } - - } - else { - printf("\n Error: is no non-optimized version \n"); - //im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); // must be aligned for transpose after float_to_bin - // float_to_bit(b, t_input, src_size); - // transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); - } -} - - -void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a) -{ - int i = 0; - if (a == LINEAR) - {} - else if (a == LEAKY) - { - if (is_fma_avx2()) { - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 all256_01 = _mm256_set1_ps(0.1F); - - for (i = 0; i < n - 8; i += 8) { - //x[i] = (x[i]>0) ? x[i] : .1*x[i]; - - __m256 src256 = _mm256_loadu_ps(&x[i]); - __m256 mult256 = _mm256_mul_ps((src256), all256_01); // mult * 0.1 - - __m256i sign256 = _mm256_and_si256(_mm256_castps_si256(src256), all256_sing1); // check sign in 8 x 32-bit floats - - __m256 result256 = _mm256_blendv_ps(src256, mult256, _mm256_castsi256_ps(sign256)); // (sign>0) ? src : mult; - _mm256_storeu_ps(&x[i], result256); - } - } - - for (; i < n; ++i) { - x[i] = (x[i]>0) ? x[i] : .1*x[i]; - } - } - else { - for (i = 0; i < n; ++i) { - x[i] = activate(x[i], a); - } - } -} - -void float_to_bit(float *src, unsigned char *dst, size_t size) -{ - size_t dst_size = size / 8 + 1; - memset(dst, 0, dst_size); - - size_t i; - //__m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 float_zero256 = _mm256_set1_ps(0.0); - - for (i = 0; i < size; i+=8) - { - //__m256i src256 = _mm256_loadu_si256((__m256i *)(&src[i])); - //__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats - //uint32_t mask = _mm256_movemask_ps(_mm256_castsi256_ps(result256)); // (val >= 0) ? 0 : 1 - ////mask = ~mask; // inverse mask, (val >= 0) ? 1 : 0 - - __m256 src256 = _mm256_loadu_ps((float *)(&src[i])); - __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS); - uint32_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1 - - dst[i / 8] = mask; - } -} - -static inline void transpose4x4_SSE(float *A, float *B, const int lda, const int ldb) -{ - __m128 row1 = _mm_loadu_ps(&A[0 * lda]); - __m128 row2 = _mm_loadu_ps(&A[1 * lda]); - __m128 row3 = _mm_loadu_ps(&A[2 * lda]); - __m128 row4 = _mm_loadu_ps(&A[3 * lda]); - _MM_TRANSPOSE4_PS(row1, row2, row3, row4); - _mm_storeu_ps(&B[0 * ldb], row1); - _mm_storeu_ps(&B[1 * ldb], row2); - _mm_storeu_ps(&B[2 * ldb], row3); - _mm_storeu_ps(&B[3 * ldb], row4); -} - -void transpose_block_SSE4x4(float *A, float *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; i += block_size) { - int j, i2, j2; - //int max_i2 = (i + block_size < n) ? (i + block_size) : n; - if (i + block_size < n) { - int max_i2 = i + block_size; - for (j = 0; j < m; j += block_size) { - //int max_j2 = (j + block_size < m) ? (j + block_size) : m; - if (j + block_size < m) { - int max_j2 = j + block_size; - for (i2 = i; i2 < max_i2; i2 += 4) { - for (j2 = j; j2 < max_j2; j2 += 4) { - transpose4x4_SSE(&A[i2*lda + j2], &B[j2*ldb + i2], lda, ldb); - } - } - } - else { - for (i2 = i; i2 < max_i2; ++i2) { - for (j2 = j; j2 < m; ++j2) { - B[j2*ldb + i2] = A[i2*lda + j2]; - } - } - } - } - } - else { - for (i2 = i; i2 < n; ++i2) { - for (j2 = 0; j2 < m; ++j2) { - B[j2*ldb + i2] = A[i2*lda + j2]; - } - } - } - } -} - - -void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, - int pad, int stride, int batch) -{ - - const int w_offset = -pad / 2; - const int h_offset = -pad / 2; - int b, k; - - for (b = 0; b < batch; ++b) { - #pragma omp parallel for - for (k = 0; k < c; ++k) { - int i, j, m, n; - for (i = 0; i < out_h; ++i) { - //for (j = 0; j < out_w; ++j) { - j = 0; - - if(stride == 1 && is_avx() == 1) { - for (j = 0; j < out_w - 8 - (size - 1); j += 8) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - __m256 max256 = _mm256_set1_ps(-FLT_MAX); - for (n = 0; n < size; ++n) { - for (m = 0; m < size; ++m) { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - if (!valid) continue; - - __m256 src256 = _mm256_loadu_ps(&src[index]); - max256 = _mm256_max_ps(src256, max256); - } - } - _mm256_storeu_ps(&dst[out_index], max256); - - } - } - else if (size == 2 && stride == 2 && is_avx() == 1) { - for (j = 0; j < out_w - 4; j += 4) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - //float max = -FLT_MAX; - //int max_i = -1; - __m128 max128 = _mm_set1_ps(-FLT_MAX); - - for (n = 0; n < size; ++n) { - //for (m = 0; m < size; ++m) - m = 0; - { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - if (!valid) continue; - - __m256 src256 = _mm256_loadu_ps(&src[index]); - __m256 src256_2 = _mm256_permute_ps(src256, (1 << 0) | (3 << 4)); - __m256 max256 = _mm256_max_ps(src256, src256_2); - - __m128 src128_0 = _mm256_extractf128_ps(max256, 0); - __m128 src128_1 = _mm256_extractf128_ps(max256, 1); - __m128 src128 = _mm_shuffle_ps(src128_0, src128_1, (2 << 2) | (2 << 6)); - - max128 = _mm_max_ps(src128, max128); - } - } - _mm_storeu_ps(&dst[out_index], max128); - } - } - - for (; j < out_w; ++j) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; - for (n = 0; n < size; ++n) { - for (m = 0; m < size; ++m) { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - float val = (valid != 0) ? src[index] : -FLT_MAX; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; - } - } - dst[out_index] = max; - if (indexes) indexes[out_index] = max_i; - } - } - } - } -} - -#else // AVX - -int is_avx() { - return 0; -} - -int is_fma_avx2() { - return 0; -} - -void gemm_nn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } -} - -void gemm_nn_fast(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i, j, k; - #pragma omp parallel for - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - PUT_IN_REGISTER float A_PART = ALPHA*A[i*lda + k]; - for (j = 0; j < N; ++j) { - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } - } -} - -void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, - uint32_t *A, int lda, - uint32_t *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - #pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - int j, s; - float mean_val = mean_arr[i]; - //printf(" l.mean_arr[i] = %d \n ", l.mean_arr[i]); - for (s = 0; s < K; ++s) // l.size*l.size*l.c/32 or (l.size*l.size*l.c) - { - //PUT_IN_REGISTER float A_PART = 1*a[i*k + s]; - PUT_IN_REGISTER uint32_t A_PART = A[i * lda + s]; - for (j = 0; j < N; ++j) // out_h*out_w; - { - //c[i*n + j] += A_PART*b[s*n + j]; - PUT_IN_REGISTER uint32_t B_PART = B[s * ldb + j]; - uint32_t xnor_result = ~(A_PART ^ B_PART); - //printf(" xnor_result = %d, ", xnor_result); - int32_t count = popcnt_32(xnor_result); // must be Signed int - - C[i*ldc + j] += (2 * count - 32) * mean_val; - //c[i*n + j] += count*mean; - } - } - } -} - - -void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, - float *weights, float *input, float *output, float *mean) -{ - const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 - const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 - //int i, f, j; - - int fil; - // filter index - #pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP - for (fil = 0; fil < n; ++fil) { - int chan, y, x, f_y, f_x; - // channel index - for (chan = 0; chan < c; ++chan) - // input - y - for (y = 0; y < h; ++y) - // input - x - for (x = 0; x < w; ++x) - { - int const output_index = fil*w*h + y*w + x; - int const weights_pre_index = fil*c*ksize*ksize + chan*ksize*ksize; - int const input_pre_index = chan*w*h; - float sum = 0; - - // filter - y - for (f_y = 0; f_y < ksize; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < ksize; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= h || input_x >= w) continue; - - int input_index = input_pre_index + input_y*w + input_x; - int weights_index = weights_pre_index + f_y*ksize + f_x; - - sum += input[input_index] * weights[weights_index]; - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - output[output_index] += sum; - } - } -} - -static inline int popcnt_64(uint64_t val64) { -#ifdef WIN32 // Windows -#ifdef _WIN64 // Windows 64-bit - int tmp_count = __popcnt64(val64); -#else // Windows 32-bit - int tmp_count = __popcnt(val64); - tmp_count += __popcnt(val64 >> 32); -#endif -#else // Linux -#if defined(__x86_64__) || defined(__aarch64__) // Linux 64-bit - int tmp_count = __builtin_popcountll(val64); -#else // Linux 32-bit - int tmp_count = __builtin_popcount(val64); - tmp_count += __builtin_popcount(val64 >> 32); -#endif -#endif - return tmp_count; -} - -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - - #pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - filters [16 - 55 - 1024] - int j, k; - float mean_val = mean_arr[i]; - - for (j = 0; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - int count = 0; - - for (k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - uint64_t a_bit64 = *((uint64_t *)(A + (i*lda + k) / 8)); - uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); - uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - - int tmp_count = popcnt_64(c_bit64); - - if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits - count += tmp_count; - //binary_int64_printf(c_bit64); - //printf(", count = %d \n\n", tmp_count); - } - - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } -} - -void im2col_cpu_custom_transpose(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int ldb_align) -{ - printf("\n im2col_cpu_custom_transpose() isn't implemented without AVX \n"); -} - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col) -{ - im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); - return; - - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1) - { - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } - - } - else { - //printf("\n Error: is no non-optimized version \n"); - im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); - } -} - - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom_bin(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align) -{ - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1) - { - int new_ldb = bit_align; - - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 8; w += 1) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - float val = data_im[im_col + width*(im_row + height*c_im)]; - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - float val = data_im[im_col + width*(im_row + height*c_im)]; - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit((unsigned char*)data_col, col_index); - } - } - } - - } - else { - printf("\n Error: is no non-optimized version \n"); - //im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); // must be aligned for transpose after float_to_bin - // float_to_bit(b, t_input, src_size); - // transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); - } -} - - -void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a) -{ - int i; - if (a == LINEAR) - { - } - else if (a == LEAKY) - { - for (i = 0; i < n; ++i) { - x[i] = (x[i]>0) ? x[i] : .1*x[i]; - } - } - else { - for (i = 0; i < n; ++i) { - x[i] = activate(x[i], a); - } - } -} - -void float_to_bit(float *src, unsigned char *dst, size_t size) -{ - size_t dst_size = size / 8 + 1; - memset(dst, 0, dst_size); - - size_t i; - char* byte_arr = (char*)xcalloc(size, sizeof(char)); - for (i = 0; i < size; ++i) { - if (src[i] > 0) byte_arr[i] = 1; - } - - //for (i = 0; i < size; ++i) { - // dst[i / 8] |= byte_arr[i] << (i % 8); - //} - - for (i = 0; i < size; i += 8) { - char dst_tmp = 0; - dst_tmp |= byte_arr[i + 0] << 0; - dst_tmp |= byte_arr[i + 1] << 1; - dst_tmp |= byte_arr[i + 2] << 2; - dst_tmp |= byte_arr[i + 3] << 3; - dst_tmp |= byte_arr[i + 4] << 4; - dst_tmp |= byte_arr[i + 5] << 5; - dst_tmp |= byte_arr[i + 6] << 6; - dst_tmp |= byte_arr[i + 7] << 7; - dst[i / 8] = dst_tmp; - } - free(byte_arr); -} - -static inline void transpose_scalar_block(float *A, float *B, const int lda, const int ldb, const int block_size) -{ - int i; - //#pragma omp parallel for - for (i = 0; i= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - float val = (valid != 0) ? src[index] : -FLT_MAX; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; - } - } - dst[out_index] = max; - if (indexes) indexes[out_index] = max_i; - } - } - } - } -} - -#endif // AVX - - -// 32 channels -> 1 channel (with 32 floats) -// 256 channels -> 8 channels (with 32 floats) -void repack_input(float *input, float *re_packed_input, int w, int h, int c) -{ - const int items_per_channel = w * h; - int chan, i; - for (chan = 0; chan < c; chan += 32) - { - for (i = 0; i < items_per_channel; ++i) - { - int c_pack; - for (c_pack = 0; c_pack < 32; ++c_pack) { - float src = input[(chan + c_pack)*items_per_channel + i]; - - re_packed_input[chan*items_per_channel + i * 32 + c_pack] = src; - } - } - } -} - -void transpose_uint32(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) -{ - //l.bit_align - algined (n) by 32 - //new_ldb - aligned (k) by 256 - - int i; - //#pragma omp parallel for - for (i = 0; i < src_h; i += 1) // l.size*l.size*l.c; - { - int j; - for (j = 0; j < src_w; j += 1) // out_h*out_w; - { - ((uint32_t *)dst)[j*dst_align / 32 + i] = ((uint32_t *)src)[i*src_align + j]; - } - } -} - -void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA, - uint32_t *A, int lda, - uint32_t *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int i; - #pragma omp parallel for - for (i = 0; i < M; ++i) { // l.n - int j, s; - float mean_val = mean_arr[i]; - for (j = 0; j < N; ++j) // out_h*out_w; - { - float val = 0; - for (s = 0; s < K; ++s) // l.size*l.size*l.c/32 or (l.size*l.size*l.c) - { - PUT_IN_REGISTER uint32_t A_PART = ((uint32_t*)A)[i*lda + s]; - PUT_IN_REGISTER uint32_t B_PART = ((uint32_t*)B)[j * ldb + s]; - uint32_t xnor_result = ~(A_PART ^ B_PART); - int32_t count = popcnt_32(xnor_result); // must be Signed int - - val += (2 * count - 32) * mean_val; - } - C[i*ldc + j] += val; - } - } -} - -void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, float *output, - int w, int h, int c, int n, int size, int pad, int new_lda, float *mean_arr) -{ - int fil; - // filter index - #pragma omp parallel for - for (fil = 0; fil < n; ++fil) { - float mean_val = mean_arr[fil]; - int chan, y, x, f_y, f_x; // c_pack - // channel index - for (chan = 0; chan < c / 32; ++chan) - //for (chan = 0; chan < l.c; chan += 32) - //for (c_pack = 0; c_pack < 32; ++c_pack) - // input - y - for (y = 0; y < h; ++y) - // input - x - for (x = 0; x < w; ++x) - { - int const output_index = fil*w*h + y*w + x; - float sum = 0; - - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= h || input_x >= w) continue; - - // normal - //float input = state.input[(chan + c_pack)*l.w*l.h + input_y*l.w + input_x]; - //float weight = l.weights[fil*l.c*l.size*l.size + (chan + c_pack)*l.size*l.size + f_y*l.size + f_x]; - - // packed - //float input = re_packed_input[chan*l.w*l.h + (input_y*l.w + input_x) * 32 + c_pack]; - //float weight = l.weights[fil*l.c*l.size*l.size + chan*l.size*l.size + (f_y*l.size + f_x) * 32 + c_pack]; - //sum += input * weight; - - //float input = re_packed_input[chan*l.w*l.h + (input_y*l.w + input_x) * 32 + c_pack]; - //float weight = l.weights[fil*l.c*l.size*l.size + chan*l.size*l.size + (f_y*l.size + f_x) * 32 + c_pack]; - //uint32_t bit1 = input > 0; - //uint32_t bit2 = weight > 0; - //uint32_t count = (~(bit1 ^ bit2)) & 1; - //float result = (2 * (float)count - 1) * mean_val; - //printf("\n mul = %f, bit1 = %d, bit2 = %d, count = %d, mean = %f, result = %f ", input*weight, bit1, bit2, count, mean_val, result); - //sum += result; - - uint32_t input = ((uint32_t *)packed_input)[chan*w*h + input_y*w + input_x]; - //uint32_t weight = ((uint32_t *)l.align_bit_weights)[fil*l.c*l.size*l.size/32 + chan*l.size*l.size + f_y*l.size + f_x]; - uint32_t weight = ((uint32_t *)packed_weights)[fil*new_lda / 32 + chan*size*size + f_y*size + f_x]; - - uint32_t xnor_result = ~(input ^ weight); - int32_t count = popcnt_32(xnor_result); // mandatory Signed int - sum += (2 * count - 32) * mean_val; - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - output[output_index] += sum; - } - } -} - -void gemm_nt(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - PUT_IN_REGISTER float sum = 0; - for(k = 0; k < K; ++k){ - sum += ALPHA*A[i*lda+k]*B[j*ldb + k]; - } - C[i*ldc+j] += sum; - } - } -} - -void gemm_tn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - PUT_IN_REGISTER float A_PART = ALPHA * A[k * lda + i]; - for(j = 0; j < N; ++j){ - C[i*ldc+j] += A_PART*B[k*ldb+j]; - } - } - } -} - -void gemm_tt(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - PUT_IN_REGISTER float sum = 0; - for(k = 0; k < K; ++k){ - sum += ALPHA*A[i+k*lda]*B[k+j*ldb]; - } - C[i*ldc+j] += sum; - } - } -} - - -void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - //printf("cpu: %d %d %d %d %d %f %d %d %f %d\n",TA, TB, M, N, K, ALPHA, lda, ldb, BETA, ldc); - if (BETA != 1){ - int i, j; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - C[i*ldc + j] *= BETA; - } - } - } - - is_avx(); // initialize static variable - if (is_fma_avx2() && !TA && !TB) { - gemm_nn_fast(M, N, K, ALPHA, A, lda, B, ldb, C, ldc); - } - else { - int t; - #pragma omp parallel for - for (t = 0; t < M; ++t) { - if (!TA && !TB) - gemm_nn(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); - else if (TA && !TB) - gemm_tn(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); - else if (!TA && TB) - gemm_nt(1, N, K, ALPHA, A + t*lda, lda, B, ldb, C + t*ldc, ldc); - else - gemm_tt(1, N, K, ALPHA, A + t, lda, B, ldb, C + t*ldc, ldc); - } - } -} - -#ifdef GPU - -#include - -void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A_gpu, int lda, - float *B_gpu, int ldb, - float BETA, - float *C_gpu, int ldc) -{ - cublasHandle_t handle = blas_handle(); - cudaError_t stream_status = (cudaError_t)cublasSetStream(handle, get_cuda_stream()); - CHECK_CUDA(stream_status); - cudaError_t status = (cudaError_t)cublasSgemm(handle, (TB ? CUBLAS_OP_T : CUBLAS_OP_N), - (TA ? CUBLAS_OP_T : CUBLAS_OP_N), N, M, K, &ALPHA, B_gpu, ldb, A_gpu, lda, &BETA, C_gpu, ldc); - CHECK_CUDA(status); -} - -void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - float *A_gpu = cuda_make_array(A, (TA ? lda*K:lda*M)); - float *B_gpu = cuda_make_array(B, (TB ? ldb*N : ldb*K)); - float *C_gpu = cuda_make_array(C, ldc*M); - - gemm_ongpu(TA, TB, M, N, K, ALPHA, A_gpu, lda, B_gpu, ldb, BETA, C_gpu, ldc); - - cuda_pull_array(C_gpu, C, ldc*M); - cuda_free(A_gpu); - cuda_free(B_gpu); - cuda_free(C_gpu); -} - -#include -#include -#include -#include - -void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) -{ - float *a; - if(!TA) a = random_matrix(m,k); - else a = random_matrix(k,m); - int lda = (!TA)?k:m; - float *b; - if(!TB) b = random_matrix(k,n); - else b = random_matrix(n,k); - int ldb = (!TB)?n:k; - - float *c = random_matrix(m,n); - int i; - clock_t start = clock(), end; - for(i = 0; i<32; ++i){ - gemm_gpu(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); - } - end = clock(); - printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf s\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); - free(a); - free(b); - free(c); -} - -void time_ongpu(int TA, int TB, int m, int k, int n) -{ - int iter = 10; - float *a = random_matrix(m,k); - float *b = random_matrix(k,n); - - int lda = (!TA)?k:m; - int ldb = (!TB)?n:k; - - float *c = random_matrix(m,n); - - float *a_cl = cuda_make_array(a, m*k); - float *b_cl = cuda_make_array(b, k*n); - float *c_cl = cuda_make_array(c, m*n); - - int i; - clock_t start = clock(), end; - for(i = 0; i -#include -#ifdef __cplusplus -extern "C" { -#endif - -void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, - float *weights, float *input, float *output, float *mean); - -static inline void set_bit(unsigned char *const dst, size_t index) { - size_t dst_i = index / 8; - int dst_shift = index % 8; - dst[dst_i] |= 1 << dst_shift; - //dst[dst_i] |= 1 << (8 - dst_shift); -} - -static inline unsigned char get_bit(unsigned char const*const src, size_t index) { - size_t src_i = index / 8; - int src_shift = index % 8; - unsigned char val = (src[src_i] & (1 << src_shift)) > 0; - //unsigned char val = (src[src_i] & (1 << (8 - src_shift))) > 0; - return val; -} - -int is_avx(); -int is_fma_avx2(); - -void float_to_bit(float *src, unsigned char *dst, size_t size); - -void transpose_block_SSE4x4(float *A, float *B, const int n, const int m, - const int lda, const int ldb, const int block_size); - -void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, - const int lda, const int ldb, const int block_size); - -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr); - -void im2col_cpu_custom(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col); - -void im2col_cpu_custom_align(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align); - -void im2col_cpu_custom_bin(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align); - -void im2col_cpu_custom_transpose(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int ldb_align); - -void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a); - -void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n); - -void gemm_bin(int M, int N, int K, float ALPHA, - char *A, int lda, - float *B, int ldb, - float *C, int ldc); - -void repack_input(float *input, float *re_packed_input, int w, int h, int c); - -void convolution_repacked(uint32_t *packed_input, uint32_t *packed_weights, float *output, - int w, int h, int c, int n, int size, int pad, int new_lda, float *mean_arr); - -void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, - uint32_t *A, int lda, - uint32_t *B, int ldb, - float *C, int ldc, float *mean_arr); - -void transpose_uint32(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align); - -void gemm_nn_bin_transposed_32bit_packed(int M, int N, int K, float ALPHA, - uint32_t *A, int lda, - uint32_t *B, int ldb, - float *C, int ldc, float *mean_arr); - - -void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, - int pad, int stride, int batch); - - -void gemm(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc); - -void gemm_cpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc); - -#ifdef GPU -void gemm_ongpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A_gpu, int lda, - float *B_gpu, int ldb, - float BETA, - float *C_gpu, int ldc); - -void gemm_gpu(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc); -#endif -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/getopt.c b/src/Detector/darknet/src/getopt.c deleted file mode 100644 index 45d2b8e63..000000000 --- a/src/Detector/darknet/src/getopt.c +++ /dev/null @@ -1,498 +0,0 @@ -#ifdef _MSC_VER -#include "getopt.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef REPLACE_GETOPT -int opterr = 1; /* if error message should be printed */ -int optind = 1; /* index into parent argv vector */ -int optopt = '?'; /* character checked for validity */ -#undef optreset /* see getopt.h */ -#define optreset __mingw_optreset -int optreset; /* reset getopt */ -char* optarg; /* argument associated with option */ -#endif - -static void -_vwarnx(const char* fmt, va_list ap) -{ - (void)fprintf(stderr, "%s: ", __progname); - if (fmt != NULL) - (void)vfprintf(stderr, fmt, ap); - (void)fprintf(stderr, "\n"); -} - -static void -warnx(const char* fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - _vwarnx(fmt, ap); - va_end(ap); -} - -/* - * Compute the greatest common divisor of a and b. - */ -static int -gcd(int a, int b) -{ - int c; - - c = a % b; - while (c != 0) { - a = b; - b = c; - c = a % b; - } - - return (b); -} - -/* - * Exchange the block from nonopt_start to nonopt_end with the block - * from nonopt_end to opt_end (keeping the same order of arguments - * in each block). - */ -static void -permute_args(int panonopt_start, int panonopt_end, int opt_end, - char* const* nargv) -{ - int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; - char* swap; - - /* - * compute lengths of blocks and number and size of cycles - */ - nnonopts = panonopt_end - panonopt_start; - nopts = opt_end - panonopt_end; - ncycle = gcd(nnonopts, nopts); - cyclelen = (opt_end - panonopt_start) / ncycle; - - for (i = 0; i < ncycle; i++) { - cstart = panonopt_end + i; - pos = cstart; - for (j = 0; j < cyclelen; j++) { - if (pos >= panonopt_end) - pos -= nnonopts; - else - pos += nopts; - swap = nargv[pos]; - /* LINTED const cast */ - ((char**)nargv)[pos] = nargv[cstart]; - /* LINTED const cast */ - ((char**)nargv)[cstart] = swap; - } - } -} - -#ifdef REPLACE_GETOPT -/* - * getopt -- - * Parse argc/argv argument vector. - * - * [eventually this will replace the BSD getopt] - */ -int getopt(int nargc, char* const* nargv, const char* options) -{ - - /* - * We don't pass FLAG_PERMUTE to getopt_internal() since - * the BSD getopt(3) (unlike GNU) has never done this. - * - * Furthermore, since many privileged programs call getopt() - * before dropping privileges it makes sense to keep things - * as simple (and bug-free) as possible. - */ - return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); -} -#endif /* REPLACE_GETOPT */ - -//extern int getopt(int nargc, char * const *nargv, const char *options); - -#ifdef __cplusplus -} -#endif -/* - * POSIX requires the `getopt' API to be specified in `unistd.h'; - * thus, `unistd.h' includes this header. However, we do not want - * to expose the `getopt_long' or `getopt_long_only' APIs, when - * included in this manner. Thus, close the standard __GETOPT_H__ - * declarations block, and open an additional __GETOPT_LONG_H__ - * specific block, only when *not* __UNISTD_H_SOURCED__, in which - * to declare the extended API. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -struct option /* specification for a long form option... */ -{ - const char* name; /* option name, without leading hyphens */ - int has_arg; /* does it take an argument? */ - int* flag; /* where to save its status, or NULL */ - int val; /* its associated status value */ -}; - -enum /* permitted values for its `has_arg' field... */ -{ - no_argument = 0, /* option never takes an argument */ - required_argument, /* option always requires an argument */ - optional_argument /* option may take an argument */ -}; - -/* - * parse_long_options -- - * Parse long options in argc/argv argument vector. - * Returns -1 if short_too is set and the option does not match long_options. - */ -static int -parse_long_options(char* const* nargv, const char* options, - const struct option* long_options, int* idx, int short_too) -{ - char *current_argv, *has_equal; - size_t current_argv_len; - int i, ambiguous, match; - -#define IDENTICAL_INTERPRETATION(_x, _y) \ - (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag && long_options[(_x)].val == long_options[(_y)].val) - - current_argv = place; - match = -1; - ambiguous = 0; - - optind++; - - if ((has_equal = strchr(current_argv, '=')) != NULL) { - /* argument found (--option=arg) */ - current_argv_len = has_equal - current_argv; - has_equal++; - } else - current_argv_len = strlen(current_argv); - - for (i = 0; long_options[i].name; i++) { - /* find matching long option */ - if (strncmp(current_argv, long_options[i].name, - current_argv_len)) - continue; - - if (strlen(long_options[i].name) == current_argv_len) { - /* exact match */ - match = i; - ambiguous = 0; - break; - } - /* - * If this is a known short option, don't allow - * a partial match of a single character. - */ - if (short_too && current_argv_len == 1) - continue; - - if (match == -1) /* partial match */ - match = i; - else if (!IDENTICAL_INTERPRETATION(i, match)) - ambiguous = 1; - } - if (ambiguous) { - /* ambiguous abbreviation */ - if (PRINT_ERROR) - warnx(ambig, (int)current_argv_len, - current_argv); - optopt = 0; - return (BADCH); - } - if (match != -1) { /* option found */ - if (long_options[match].has_arg == no_argument - && has_equal) { - if (PRINT_ERROR) - warnx(noarg, (int)current_argv_len, - current_argv); - /* - * XXX: GNU sets optopt to val regardless of flag - */ - if (long_options[match].flag == NULL) - optopt = long_options[match].val; - else - optopt = 0; - return (BADARG); - } - if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) { - if (has_equal) - optarg = has_equal; - else if (long_options[match].has_arg == required_argument) { - /* - * optional argument doesn't use next nargv - */ - optarg = nargv[optind++]; - } - } - if ((long_options[match].has_arg == required_argument) - && (optarg == NULL)) { - /* - * Missing argument; leading ':' indicates no error - * should be generated. - */ - if (PRINT_ERROR) - warnx(recargstring, - current_argv); - /* - * XXX: GNU sets optopt to val regardless of flag - */ - if (long_options[match].flag == NULL) - optopt = long_options[match].val; - else - optopt = 0; - --optind; - return (BADARG); - } - } else { /* unknown option */ - if (short_too) { - --optind; - return (-1); - } - if (PRINT_ERROR) - warnx(illoptstring, current_argv); - optopt = 0; - return (BADCH); - } - if (idx) - *idx = match; - if (long_options[match].flag) { - *long_options[match].flag = long_options[match].val; - return (0); - } else - return (long_options[match].val); -#undef IDENTICAL_INTERPRETATION -} - -/* - * getopt_internal -- - * Parse argc/argv argument vector. Called by user level routines. - */ -static int -getopt_internal(int nargc, char* const* nargv, const char* options, - const struct option* long_options, int* idx, int flags) -{ - char* oli; /* option letter list index */ - int optchar, short_too; - static int posixly_correct = -1; - - if (options == NULL) - return (-1); - - /* - * XXX Some GNU programs (like cvs) set optind to 0 instead of - * XXX using optreset. Work around this braindamage. - */ - if (optind == 0) - optind = optreset = 1; - - /* - * Disable GNU extensions if POSIXLY_CORRECT is set or options - * string begins with a '+'. - * - * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or - * optreset != 0 for GNU compatibility. - */ - if (posixly_correct == -1 || optreset != 0) - posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); - if (*options == '-') - flags |= FLAG_ALLARGS; - else if (posixly_correct || *options == '+') - flags &= ~FLAG_PERMUTE; - if (*options == '+' || *options == '-') - options++; - - optarg = NULL; - if (optreset) - nonopt_start = nonopt_end = -1; -start: - if (optreset || !*place) { /* update scanning pointer */ - optreset = 0; - if (optind >= nargc) { /* end of argument vector */ - place = EMSG; - if (nonopt_end != -1) { - /* do permutation, if we have to */ - permute_args(nonopt_start, nonopt_end, - optind, nargv); - optind -= nonopt_end - nonopt_start; - } else if (nonopt_start != -1) { - /* - * If we skipped non-options, set optind - * to the first of them. - */ - optind = nonopt_start; - } - nonopt_start = nonopt_end = -1; - return (-1); - } - if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) { - place = EMSG; /* found non-option */ - if (flags & FLAG_ALLARGS) { - /* - * GNU extension: - * return non-option as argument to option 1 - */ - optarg = nargv[optind++]; - return (INORDER); - } - if (!(flags & FLAG_PERMUTE)) { - /* - * If no permutation wanted, stop parsing - * at first non-option. - */ - return (-1); - } - /* do permutation */ - if (nonopt_start == -1) - nonopt_start = optind; - else if (nonopt_end != -1) { - permute_args(nonopt_start, nonopt_end, - optind, nargv); - nonopt_start = optind - (nonopt_end - nonopt_start); - nonopt_end = -1; - } - optind++; - /* process next argument */ - goto start; - } - if (nonopt_start != -1 && nonopt_end == -1) - nonopt_end = optind; - - /* - * If we have "-" do nothing, if "--" we are done. - */ - if (place[1] != '\0' && *++place == '-' && place[1] == '\0') { - optind++; - place = EMSG; - /* - * We found an option (--), so if we skipped - * non-options, we have to permute. - */ - if (nonopt_end != -1) { - permute_args(nonopt_start, nonopt_end, - optind, nargv); - optind -= nonopt_end - nonopt_start; - } - nonopt_start = nonopt_end = -1; - return (-1); - } - } - - /* - * Check long options if: - * 1) we were passed some - * 2) the arg is not just "-" - * 3) either the arg starts with -- we are getopt_long_only() - */ - if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) { - short_too = 0; - if (*place == '-') - place++; /* --foo long option */ - else if (*place != ':' && strchr(options, *place) != NULL) - short_too = 1; /* could be short option too */ - - optchar = parse_long_options(nargv, options, long_options, - idx, short_too); - if (optchar != -1) { - place = EMSG; - return (optchar); - } - } - - if ((optchar = (int)*place++) == (int)':' || (optchar == (int)'-' && *place != '\0') || (oli = (char*)strchr(options, optchar)) == NULL) { - /* - * If the user specified "-" and '-' isn't listed in - * options, return -1 (non-option) as per POSIX. - * Otherwise, it is an unknown option character (or ':'). - */ - if (optchar == (int)'-' && *place == '\0') - return (-1); - if (!*place) - ++optind; - if (PRINT_ERROR) - warnx(illoptchar, optchar); - optopt = optchar; - return (BADCH); - } - if (long_options != NULL && optchar == 'W' && oli[1] == ';') { - /* -W long-option */ - if (*place) /* no space */ - /* NOTHING */; - else if (++optind >= nargc) { /* no arg */ - place = EMSG; - if (PRINT_ERROR) - warnx(recargchar, optchar); - optopt = optchar; - return (BADARG); - } else /* white space */ - place = nargv[optind]; - optchar = parse_long_options(nargv, options, long_options, - idx, 0); - place = EMSG; - return (optchar); - } - if (*++oli != ':') { /* doesn't take argument */ - if (!*place) - ++optind; - } else { /* takes (optional) argument */ - optarg = NULL; - if (*place) /* no white space */ - optarg = place; - else if (oli[1] != ':') { /* arg not optional */ - if (++optind >= nargc) { /* no arg */ - place = EMSG; - if (PRINT_ERROR) - warnx(recargchar, optchar); - optopt = optchar; - return (BADARG); - } else - optarg = nargv[optind]; - } - place = EMSG; - ++optind; - } - /* dump back option letter */ - return (optchar); -} - -/* - * getopt_long -- - * Parse argc/argv argument vector. - */ -int getopt_long(int nargc, char* const* nargv, const char* options, - const struct option* long_options, int* idx) -{ - - return (getopt_internal(nargc, nargv, options, long_options, idx, - FLAG_PERMUTE)); -} - -/* - * getopt_long_only -- - * Parse argc/argv argument vector. - */ -int getopt_long_only(int nargc, char* const* nargv, const char* options, - const struct option* long_options, int* idx) -{ - - return (getopt_internal(nargc, nargv, options, long_options, idx, - FLAG_PERMUTE | FLAG_LONGONLY)); -} - -//extern int getopt_long(int nargc, char * const *nargv, const char *options, -// const struct option *long_options, int *idx); -//extern int getopt_long_only(int nargc, char * const *nargv, const char *options, -// const struct option *long_options, int *idx); -/* - * Previous MinGW implementation had... - */ - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/getopt.h b/src/Detector/darknet/src/getopt.h deleted file mode 100644 index 8266c734e..000000000 --- a/src/Detector/darknet/src/getopt.h +++ /dev/null @@ -1,228 +0,0 @@ -#ifdef _MSC_VER -#ifndef __GETOPT_H__ -/** - * DISCLAIMER - * This file is part of the mingw-w64 runtime package. - * - * The mingw-w64 runtime package and its code is distributed in the hope that it - * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR - * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to - * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - */ -/* - * Copyright (c) 2002 Todd C. Miller - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * - * Sponsored in part by the Defense Advanced Research Projects - * Agency (DARPA) and Air Force Research Laboratory, Air Force - * Materiel Command, USAF, under agreement number F39502-99-1-0512. - */ -/*- - * Copyright (c) 2000 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Dieter Baron and Thomas Klausner. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#define __GETOPT_H__ - -/* All the headers include this file. */ -#include -#include -#include -#include -#include -#include -#define WIN32_LEAN_AND_MEAN -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ - -//extern int optind; /* index of first non-option in argv */ -//extern int optopt; /* single option character, as parsed */ -//extern int opterr; /* flag to enable built-in diagnostics... */ -// /* (user may set to zero, to suppress) */ -// -//extern char *optarg; /* pointer to argument of current option */ - -#define PRINT_ERROR ((opterr) && (*options != ':')) - -#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ -#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ -#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ - -/* return values */ -#define BADCH (int)'?' -#define BADARG ((*options == ':') ? (int)':' : (int)'?') -#define INORDER (int)1 - -#ifndef __CYGWIN__ -#define __progname __argv[0] -#else -extern char __declspec(dllimport) * __progname; -#endif - -#ifdef __CYGWIN__ -static char EMSG[] = ""; -#else -#define EMSG "" -#endif - -static int getopt_internal(int, char* const*, const char*, - const struct option*, int*, int); -static int parse_long_options(char* const*, const char*, - const struct option*, int*, int); -static int gcd(int, int); -static void permute_args(int, int, int, char* const*); - -static char* place = EMSG; /* option letter processing */ - -/* XXX: set optreset to 1 rather than these two */ -static int nonopt_start = -1; /* first non option argument (for permute) */ -static int nonopt_end = -1; /* first option after non options (for permute) */ - -/* Error messages */ -static const char recargchar[] = "option requires an argument -- %c"; -static const char recargstring[] = "option requires an argument -- %s"; -static const char ambig[] = "ambiguous option -- %.*s"; -static const char noarg[] = "option doesn't take an argument -- %.*s"; -static const char illoptchar[] = "unknown option -- %c"; -static const char illoptstring[] = "unknown option -- %s"; - -static void _vwarnx(const char* fmt, va_list ap); - -static void warnx(const char* fmt, ...); - -/* - * Compute the greatest common divisor of a and b. - */ -static int gcd(int a, int b); - -/* - * Exchange the block from nonopt_start to nonopt_end with the block - * from nonopt_end to opt_end (keeping the same order of arguments - * in each block). - */ -static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv); - -#ifdef REPLACE_GETOPT -/* - * getopt -- - * Parse argc/argv argument vector. - * - * [eventually this will replace the BSD getopt] - */ -int getopt(int nargc, char* const* nargv, const char* options); -#endif /* REPLACE_GETOPT */ - -//extern int getopt(int nargc, char * const *nargv, const char *options); - -#ifdef _BSD_SOURCE -/* - * BSD adds the non-standard `optreset' feature, for reinitialisation - * of `getopt' parsing. We support this feature, for applications which - * proclaim their BSD heritage, before including this header; however, - * to maintain portability, developers are advised to avoid it. - */ -#define optreset __mingw_optreset -extern int optreset; -#endif -#ifdef __cplusplus -} -#endif -/* - * POSIX requires the `getopt' API to be specified in `unistd.h'; - * thus, `unistd.h' includes this header. However, we do not want - * to expose the `getopt_long' or `getopt_long_only' APIs, when - * included in this manner. Thus, close the standard __GETOPT_H__ - * declarations block, and open an additional __GETOPT_LONG_H__ - * specific block, only when *not* __UNISTD_H_SOURCED__, in which - * to declare the extended API. - */ -#endif /* !defined(__GETOPT_H__) */ - -#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) -#define __GETOPT_LONG_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * parse_long_options -- - * Parse long options in argc/argv argument vector. - * Returns -1 if short_too is set and the option does not match long_options. - */ -/* static int parse_long_options(char* const* nargv, const char* options, const struct option* long_options, int* idx, int short_too); */ - -/* - * getopt_internal -- - * Parse argc/argv argument vector. Called by user level routines. - */ -/* static int getopt_internal(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx, int flags); */ - -/* - * getopt_long -- - * Parse argc/argv argument vector. - */ -int getopt_long(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); - -/* - * getopt_long_only -- - * Parse argc/argv argument vector. - */ -int getopt_long_only(int nargc, char* const* nargv, const char* options, const struct option* long_options, int* idx); - -/* - * Previous MinGW implementation had... - */ -#ifndef HAVE_DECL_GETOPT -/* - * ...for the long form API only; keep this for compatibility. - */ -#define HAVE_DECL_GETOPT 1 -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ -#endif diff --git a/src/Detector/darknet/src/gettimeofday.c b/src/Detector/darknet/src/gettimeofday.c deleted file mode 100644 index 74f6789b9..000000000 --- a/src/Detector/darknet/src/gettimeofday.c +++ /dev/null @@ -1,43 +0,0 @@ -#ifdef _MSC_VER -#include "gettimeofday.h" - -int gettimeofday(struct timeval* tp, struct timezone* tzp) -{ - static const uint64_t EPOCH = ((uint64_t)116444736000000000ULL); - SYSTEMTIME system_time; - FILETIME file_time; - uint64_t time; - - - GetSystemTime(&system_time); - SystemTimeToFileTime(&system_time, &file_time); - time = ((uint64_t)file_time.dwLowDateTime); - time += ((uint64_t)file_time.dwHighDateTime) << 32; - /*converting file time to unix epoch*/ - tp->tv_sec = (long)((time - EPOCH) / 10000000L); - tp->tv_usec = (long)(system_time.wMilliseconds * 1000); - return 0; - } - -int clock_gettime(int dummy, struct timespec* ct) - { - LARGE_INTEGER count; - - if (g_first_time) { - g_first_time = 0; - - if (0 == QueryPerformanceFrequency(&g_counts_per_sec)) { - g_counts_per_sec.QuadPart = 0; - } - } - - if ((NULL == ct) || (g_counts_per_sec.QuadPart <= 0) || (0 == QueryPerformanceCounter(&count))) { - return -1; -} - - ct->tv_sec = count.QuadPart / g_counts_per_sec.QuadPart; - ct->tv_nsec = ((count.QuadPart % g_counts_per_sec.QuadPart) * BILLION) / g_counts_per_sec.QuadPart; - - return 0; -} -#endif diff --git a/src/Detector/darknet/src/gettimeofday.h b/src/Detector/darknet/src/gettimeofday.h deleted file mode 100644 index 86fef1010..000000000 --- a/src/Detector/darknet/src/gettimeofday.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifdef _MSC_VER -#define WIN32_LEAN_AND_MEAN -#include -#include -#include -#include -#include "darknet.h" - -#define CLOCK_REALTIME (1) -#define BILLION (1E9) - -#ifndef timersub -#define timersub(a, b, result) \ - do { \ - (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ - (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ - if ((result)->tv_usec < 0) { \ - --(result)->tv_sec; \ - (result)->tv_usec += 1000000; \ - } \ - } while (0) -#endif // timersub - -#ifdef __cplusplus -extern "C" { -#endif - -static unsigned char g_first_time = 1; -static LARGE_INTEGER g_counts_per_sec; - -int gettimeofday(struct timeval*, struct timezone*); -int clock_gettime(int, struct timespec*); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/go.c b/src/Detector/darknet/src/go.c deleted file mode 100644 index 1c7a1a083..000000000 --- a/src/Detector/darknet/src/go.c +++ /dev/null @@ -1,849 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" -#include "option_list.h" -#include "blas.h" - - -int inverted = 1; -int noi = 1; -//static const unsigned int n_ind = 5; -#define n_ind 5 - -typedef struct { - char **data; - int n; -} moves; - -char *fgetgo(FILE *fp) -{ - if(feof(fp)) return 0; - size_t size = 94; - char* line = (char*)xmalloc(size * sizeof(char)); - if(size != fread(line, sizeof(char), size, fp)){ - free(line); - return 0; - } - - return line; -} - -moves load_go_moves(char *filename) -{ - moves m; - m.n = 128; - m.data = (char**)xcalloc(128, sizeof(char*)); - FILE *fp = fopen(filename, "rb"); - int count = 0; - char *line = 0; - while((line = fgetgo(fp))){ - if(count >= m.n){ - m.n *= 2; - m.data = (char**)xrealloc(m.data, m.n * sizeof(char*)); - } - m.data[count] = line; - ++count; - } - printf("%d\n", count); - m.n = count; - m.data = (char**)xrealloc(m.data, count * sizeof(char*)); - fclose(fp); - return m; -} - -void string_to_board(char *s, float *board) -{ - int i, j; - //memset(board, 0, 1*19*19*sizeof(float)); - int count = 0; - for(i = 0; i < 91; ++i){ - char c = s[i]; - for(j = 0; j < 4; ++j){ - int me = (c >> (2*j)) & 1; - int you = (c >> (2*j + 1)) & 1; - if (me) board[count] = 1; - else if (you) board[count] = -1; - else board[count] = 0; - ++count; - if(count >= 19*19) break; - } - } -} - -void board_to_string(char *s, float *board) -{ - int i, j; - memset(s, 0, (19*19/4+1)*sizeof(char)); - int count = 0; - for(i = 0; i < 91; ++i){ - for(j = 0; j < 4; ++j){ - int me = (board[count] == 1); - int you = (board[count] == -1); - if (me) s[i] = s[i] | (1<<(2*j)); - if (you) s[i] = s[i] | (1<<(2*j + 1)); - ++count; - if(count >= 19*19) break; - } - } -} - -void random_go_moves(moves m, float *boards, float *labels, int n) -{ - int i; - memset(labels, 0, 19*19*n*sizeof(float)); - for(i = 0; i < n; ++i){ - char *b = m.data[rand()%m.n]; - int row = b[0]; - int col = b[1]; - labels[col + 19*(row + i*19)] = 1; - string_to_board(b+2, boards+i*19*19); - boards[col + 19*(row + i*19)] = 0; - - int flip = rand()%2; - int rotate = rand()%4; - image in = float_to_image(19, 19, 1, boards+i*19*19); - image out = float_to_image(19, 19, 1, labels+i*19*19); - if(flip){ - flip_image(in); - flip_image(out); - } - rotate_image_cw(in, rotate); - rotate_image_cw(out, rotate); - } -} - - -void train_go(char *cfgfile, char *weightfile) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - - char* backup_directory = "backup/"; - - char buff[256]; - float* board = (float*)xcalloc(19 * 19 * net.batch, sizeof(float)); - float* move = (float*)xcalloc(19 * 19 * net.batch, sizeof(float)); - moves m = load_go_moves("backup/go.train"); - //moves m = load_go_moves("games.txt"); - - int N = m.n; - int epoch = (*net.seen)/N; - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - clock_t time=clock(); - - random_go_moves(m, board, move, net.batch); - float loss = train_network_datum(net, board, move) / net.batch; - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.95 + loss*.05; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory,base, epoch); - save_weights(net, buff); - - } - if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s.backup",backup_directory,base); - save_weights(net, buff); - } - if(get_current_batch(net)%10000 == 0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.backup",backup_directory,base,get_current_batch(net)); - save_weights(net, buff); - } - } - sprintf(buff, "%s/%s.weights", backup_directory, base); - save_weights(net, buff); - - free_network(net); - free(base); - free(board); - free(move); -} - -void propagate_liberty(float *board, int *lib, int *visited, int row, int col, int side) -{ - if (row < 0 || row > 18 || col < 0 || col > 18) return; - int index = row*19 + col; - if (board[index] != side) return; - if (visited[index]) return; - visited[index] = 1; - lib[index] += 1; - propagate_liberty(board, lib, visited, row+1, col, side); - propagate_liberty(board, lib, visited, row-1, col, side); - propagate_liberty(board, lib, visited, row, col+1, side); - propagate_liberty(board, lib, visited, row, col-1, side); -} - - -int *calculate_liberties(float *board) -{ - int* lib = (int*)xcalloc(19 * 19, sizeof(int)); - int visited[361]; - int i, j; - for(j = 0; j < 19; ++j){ - for(i = 0; i < 19; ++i){ - memset(visited, 0, 19*19*sizeof(int)); - int index = j*19 + i; - if(board[index] == 0){ - if ((i > 0) && board[index - 1]) propagate_liberty(board, lib, visited, j, i-1, board[index-1]); - if ((i < 18) && board[index + 1]) propagate_liberty(board, lib, visited, j, i+1, board[index+1]); - if ((j > 0) && board[index - 19]) propagate_liberty(board, lib, visited, j-1, i, board[index-19]); - if ((j < 18) && board[index + 19]) propagate_liberty(board, lib, visited, j+1, i, board[index+19]); - } - } - } - return lib; -} - -void print_board(float *board, int swap, int *indexes) -{ - //FILE *stream = stdout; - FILE *stream = stderr; - int i,j,n; - fprintf(stream, "\n\n"); - fprintf(stream, " "); - for(i = 0; i < 19; ++i){ - fprintf(stream, "%c ", 'A' + i + 1*(i > 7 && noi)); - } - fprintf(stream, "\n"); - for(j = 0; j < 19; ++j){ - fprintf(stream, "%2d", (inverted) ? 19-j : j+1); - for(i = 0; i < 19; ++i){ - int index = j*19 + i; - if(indexes){ - int found = 0; - for (n = 0; n < n_ind; ++n) { - if(index == indexes[n]){ - found = 1; - /* - if(n == 0) fprintf(stream, "\uff11"); - else if(n == 1) fprintf(stream, "\uff12"); - else if(n == 2) fprintf(stream, "\uff13"); - else if(n == 3) fprintf(stream, "\uff14"); - else if(n == 4) fprintf(stream, "\uff15"); - */ - if(n == 0) fprintf(stream, " 1"); - else if(n == 1) fprintf(stream, " 2"); - else if(n == 2) fprintf(stream, " 3"); - else if(n == 3) fprintf(stream, " 4"); - else if(n == 4) fprintf(stream, " 5"); - } - } - if(found) continue; - } - //if(board[index]*-swap > 0) fprintf(stream, "\u25C9 "); - //else if(board[index]*-swap < 0) fprintf(stream, "\u25EF "); - if(board[index]*-swap > 0) fprintf(stream, " O"); - else if(board[index]*-swap < 0) fprintf(stream, " X"); - else fprintf(stream, " "); - } - fprintf(stream, "\n"); - } -} - -void flip_board(float *board) -{ - int i; - for(i = 0; i < 19*19; ++i){ - board[i] = -board[i]; - } -} - -void predict_move(network net, float *board, float *move, int multi) -{ - float *output = network_predict(net, board); - copy_cpu(19*19, output, 1, move, 1); - int i; - if(multi){ - image bim = float_to_image(19, 19, 1, board); - for(i = 1; i < 8; ++i){ - rotate_image_cw(bim, i); - if(i >= 4) flip_image(bim); - - float *output = network_predict(net, board); - image oim = float_to_image(19, 19, 1, output); - - if(i >= 4) flip_image(oim); - rotate_image_cw(oim, -i); - - axpy_cpu(19*19, 1, output, 1, move, 1); - - if(i >= 4) flip_image(bim); - rotate_image_cw(bim, -i); - } - scal_cpu(19*19, 1./8., move, 1); - } - for(i = 0; i < 19*19; ++i){ - if(board[i]) move[i] = 0; - } -} - -void remove_connected(float *b, int *lib, int p, int r, int c) -{ - if (r < 0 || r >= 19 || c < 0 || c >= 19) return; - if (b[r*19 + c] != p) return; - if (lib[r*19 + c] != 1) return; - b[r*19 + c] = 0; - remove_connected(b, lib, p, r+1, c); - remove_connected(b, lib, p, r-1, c); - remove_connected(b, lib, p, r, c+1); - remove_connected(b, lib, p, r, c-1); -} - - -void move_go(float *b, int p, int r, int c) -{ - int *l = calculate_liberties(b); - b[r*19 + c] = p; - remove_connected(b, l, -p, r+1, c); - remove_connected(b, l, -p, r-1, c); - remove_connected(b, l, -p, r, c+1); - remove_connected(b, l, -p, r, c-1); - free(l); -} - -int makes_safe_go(float *b, int *lib, int p, int r, int c){ - if (r < 0 || r >= 19 || c < 0 || c >= 19) return 0; - if (b[r*19 + c] == -p){ - if (lib[r*19 + c] > 1) return 0; - else return 1; - } - if (b[r*19 + c] == 0) return 1; - if (lib[r*19 + c] > 1) return 1; - return 0; -} - -int suicide_go(float *b, int p, int r, int c) -{ - int *l = calculate_liberties(b); - int safe = 0; - safe = safe || makes_safe_go(b, l, p, r+1, c); - safe = safe || makes_safe_go(b, l, p, r-1, c); - safe = safe || makes_safe_go(b, l, p, r, c+1); - safe = safe || makes_safe_go(b, l, p, r, c-1); - free(l); - return !safe; -} - -int legal_go(float *b, char *ko, int p, int r, int c) -{ - if (b[r*19 + c]) return 0; - char curr[91]; - char next[91]; - board_to_string(curr, b); - move_go(b, p, r, c); - board_to_string(next, b); - string_to_board(curr, b); - if(memcmp(next, ko, 91) == 0) return 0; - return 1; -} - -int generate_move(network net, int player, float *board, int multi, float thresh, float temp, char *ko, int print) -{ - int i, j; - for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp; - - float move[361]; - if (player < 0) flip_board(board); - predict_move(net, board, move, multi); - if (player < 0) flip_board(board); - - - for(i = 0; i < 19; ++i){ - for(j = 0; j < 19; ++j){ - if (!legal_go(board, ko, player, i, j)) move[i*19 + j] = 0; - } - } - - int indexes[n_ind]; - top_k(move, 19*19, n_ind, indexes); - if(thresh > move[indexes[0]]) thresh = move[indexes[n_ind-1]]; - - for(i = 0; i < 19; ++i){ - for(j = 0; j < 19; ++j){ - if (move[i*19 + j] < thresh) move[i*19 + j] = 0; - } - } - - - int max = max_index(move, 19*19); - int row = max / 19; - int col = max % 19; - int index = sample_array(move, 19*19); - - if(print){ - top_k(move, 19*19, n_ind, indexes); - for(i = 0; i < n_ind; ++i){ - if (!move[indexes[i]]) indexes[i] = -1; - } - print_board(board, player, indexes); - for(i = 0; i < n_ind; ++i){ - fprintf(stderr, "%d: %f\n", i+1, move[indexes[i]]); - } - } - - if(suicide_go(board, player, row, col)){ - return -1; - } - if(suicide_go(board, player, index/19, index%19)) index = max; - return index; -} - -void valid_go(char *cfgfile, char *weightfile, int multi) -{ - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - - float* board = (float*)xcalloc(19 * 19, sizeof(float)); - float* move = (float*)xcalloc(19 * 19, sizeof(float)); - moves m = load_go_moves("backup/go.test"); - - int N = m.n; - int i; - int correct = 0; - for(i = 0; i = 'A' && c <= 'Z') c = c - 'A'; - if(c >= 'a' && c <= 'z') c = c - 'a'; - if(c >= 8) --c; - r = 19 - r; - fprintf(stderr, "move: %d %d\n", r, c); - - char *swap = two; - two = one; - one = swap; - move_go(board, player, r, c); - board_to_string(one, board); - - printf("=%s \n\n", ids); - print_board(board, 1, 0); - } else if (!strcmp(buff, "genmove")){ - char color[256]; - scanf("%s", color); - int player = (color[0] == 'b' || color[0] == 'B') ? 1 : -1; - - int index = generate_move(net, player, board, multi, .1, .7, two, 1); - if(passed || index < 0){ - printf("=%s pass\n\n", ids); - passed = 0; - } else { - int row = index / 19; - int col = index % 19; - - char *swap = two; - two = one; - one = swap; - - move_go(board, player, row, col); - board_to_string(one, board); - row = 19 - row; - if (col >= 8) ++col; - printf("=%s %c%d\n\n", ids, 'A' + col, row); - print_board(board, 1, 0); - } - - } else if (!strcmp(buff, "p")){ - //print_board(board, 1, 0); - } else if (!strcmp(buff, "final_status_list")){ - char type[256]; - scanf("%s", type); - fprintf(stderr, "final_status\n"); - char *line = fgetl(stdin); - free(line); - if(type[0] == 'd' || type[0] == 'D'){ - FILE *f = fopen("game.txt", "w"); - int i, j; - int count = 2; - fprintf(f, "boardsize 19\n"); - fprintf(f, "clear_board\n"); - for(j = 0; j < 19; ++j){ - for(i = 0; i < 19; ++i){ - if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j); - if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j); - if(board[j*19 + i]) ++count; - } - } - fprintf(f, "final_status_list dead\n"); - fclose(f); -#ifdef _WIN32 - FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); -#else - FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); -#endif - for(i = 0; i < count; ++i){ - free(fgetl(p)); - free(fgetl(p)); - } - char *l = 0; - while((l = fgetl(p))){ - printf("%s\n", l); - free(l); - } - } else { - printf("?%s unknown command\n\n", ids); - } - } else { - char *line = fgetl(stdin); - free(line); - printf("?%s unknown command\n\n", ids); - } - fflush(stdout); - fflush(stderr); - } -} - -void test_go(char *cfg, char *weights, int multi) -{ - network net = parse_network_cfg(cfg); - if(weights){ - load_weights(&net, weights); - } - srand(time(0)); - set_batch_network(&net, 1); - float* board = (float*)xcalloc(19 * 19, sizeof(float)); - float* move = (float*)xcalloc(19 * 19, sizeof(float)); - int color = 1; - while(1){ - float *output = network_predict(net, board); - copy_cpu(19*19, output, 1, move, 1); - int i; - if(multi){ - image bim = float_to_image(19, 19, 1, board); - for(i = 1; i < 8; ++i){ - rotate_image_cw(bim, i); - if(i >= 4) flip_image(bim); - - float *output = network_predict(net, board); - image oim = float_to_image(19, 19, 1, output); - - if(i >= 4) flip_image(oim); - rotate_image_cw(oim, -i); - - axpy_cpu(19*19, 1, output, 1, move, 1); - - if(i >= 4) flip_image(bim); - rotate_image_cw(bim, -i); - } - scal_cpu(19*19, 1./8., move, 1); - } - for(i = 0; i < 19*19; ++i){ - if(board[i]) move[i] = 0; - } - - int indexes[n_ind]; - int row, col; - top_k(move, 19 * 19, n_ind, indexes); - print_board(board, color, indexes); - for (i = 0; i < n_ind; ++i) { - int index = indexes[i]; - row = index / 19; - col = index % 19; - printf("%d: %c %d, %.2f%%\n", i+1, col + 'A' + 1*(col > 7 && noi), (inverted)?19 - row : row+1, move[index]*100); - } - //if(color == 1) printf("\u25EF Enter move: "); - //else printf("\u25C9 Enter move: "); - if(color == 1) printf("X Enter move: "); - else printf("O Enter move: "); - - char c; - char *line = fgetl(stdin); - int picked = 1; - int dnum = sscanf(line, "%d", &picked); - int cnum = sscanf(line, "%c", &c); - if (strlen(line) == 0 || dnum) { - --picked; - if (picked < n_ind){ - int index = indexes[picked]; - row = index / 19; - col = index % 19; - board[row*19 + col] = 1; - } - } else if (cnum){ - if (c <= 'T' && c >= 'A'){ - int num = sscanf(line, "%c %d", &c, &row); - row = (inverted)?19 - row : row-1; - col = c - 'A'; - if (col > 7 && noi) col -= 1; - if (num == 2) board[row*19 + col] = 1; - } else if (c == 'p') { - // Pass - } else if(c=='b' || c == 'w'){ - char g; - int num = sscanf(line, "%c %c %d", &g, &c, &row); - row = (inverted)?19 - row : row-1; - col = c - 'A'; - if (col > 7 && noi) col -= 1; - if (num == 3) board[row*19 + col] = (g == 'b') ? color : -color; - } else if(c == 'c'){ - char g; - int num = sscanf(line, "%c %c %d", &g, &c, &row); - row = (inverted)?19 - row : row-1; - col = c - 'A'; - if (col > 7 && noi) col -= 1; - if (num == 3) board[row*19 + col] = 0; - } - } - free(line); - flip_board(board); - color = -color; - } -} - -float score_game(float *board) -{ - FILE *f = fopen("game.txt", "w"); - int i, j; - int count = 3; - fprintf(f, "komi 6.5\n"); - fprintf(f, "boardsize 19\n"); - fprintf(f, "clear_board\n"); - for(j = 0; j < 19; ++j){ - for(i = 0; i < 19; ++i){ - if(board[j*19 + i] == 1) fprintf(f, "play black %c%d\n", 'A'+i+(i>=8), 19-j); - if(board[j*19 + i] == -1) fprintf(f, "play white %c%d\n", 'A'+i+(i>=8), 19-j); - if(board[j*19 + i]) ++count; - } - } - fprintf(f, "final_score\n"); - fclose(f); -#ifdef _WIN32 - FILE *p = _popen("./gnugo --mode gtp < game.txt", "r"); -#else - FILE *p = popen("./gnugo --mode gtp < game.txt", "r"); -#endif - for(i = 0; i < count; ++i){ - free(fgetl(p)); - free(fgetl(p)); - } - char *l = 0; - float score = 0; - char player = 0; - while((l = fgetl(p))){ - fprintf(stderr, "%s \t", l); - int n = sscanf(l, "= %c+%f", &player, &score); - free(l); - if (n == 2) break; - } - if(player == 'W') score = -score; -#ifdef _WIN32 - _pclose(p); -#else - pclose(p); -#endif - return score; -} - -void self_go(char *filename, char *weightfile, char *f2, char *w2, int multi) -{ - network net = parse_network_cfg(filename); - if(weightfile){ - load_weights(&net, weightfile); - } - - network net2 = net; - if(f2){ - net2 = parse_network_cfg(f2); - if(w2){ - load_weights(&net2, w2); - } - } - srand(time(0)); - char boards[300][93]; - int count = 0; - set_batch_network(&net, 1); - set_batch_network(&net2, 1); - float* board = (float*)xcalloc(19 * 19, sizeof(float)); - char* one = (char*)xcalloc(91, sizeof(char)); - char* two = (char*)xcalloc(91, sizeof(char)); - int done = 0; - int player = 1; - int p1 = 0; - int p2 = 0; - int total = 0; - while(1){ - if (done || count >= 300){ - float score = score_game(board); - int i = (score > 0)? 0 : 1; - if((score > 0) == (total%2==0)) ++p1; - else ++p2; - ++total; - fprintf(stderr, "Total: %d, Player 1: %f, Player 2: %f\n", total, (float)p1/total, (float)p2/total); - int j; - for(; i < count; i += 2){ - for(j = 0; j < 93; ++j){ - printf("%c", boards[i][j]); - } - printf("\n"); - } - memset(board, 0, 19*19*sizeof(float)); - player = 1; - done = 0; - count = 0; - fflush(stdout); - fflush(stderr); - } - //print_board(board, 1, 0); - //sleep(1); - network use = ((total%2==0) == (player==1)) ? net : net2; - int index = generate_move(use, player, board, multi, .1, .7, two, 0); - if(index < 0){ - done = 1; - continue; - } - int row = index / 19; - int col = index % 19; - - char *swap = two; - two = one; - one = swap; - - if(player < 0) flip_board(board); - boards[count][0] = row; - boards[count][1] = col; - board_to_string(boards[count] + 2, board); - if(player < 0) flip_board(board); - ++count; - - move_go(board, player, row, col); - board_to_string(one, board); - - player = -player; - } - free(board); - free(one); - free(two); -} - -void run_go(int argc, char **argv) -{ - //boards_go(); - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *c2 = (argc > 5) ? argv[5] : 0; - char *w2 = (argc > 6) ? argv[6] : 0; - int multi = find_arg(argc, argv, "-multi"); - if(0==strcmp(argv[2], "train")) train_go(cfg, weights); - else if(0==strcmp(argv[2], "valid")) valid_go(cfg, weights, multi); - else if(0==strcmp(argv[2], "self")) self_go(cfg, weights, c2, w2, multi); - else if(0==strcmp(argv[2], "test")) test_go(cfg, weights, multi); - else if(0==strcmp(argv[2], "engine")) engine_go(cfg, weights, multi); -} diff --git a/src/Detector/darknet/src/gru_layer.c b/src/Detector/darknet/src/gru_layer.c deleted file mode 100644 index de301df3e..000000000 --- a/src/Detector/darknet/src/gru_layer.c +++ /dev/null @@ -1,398 +0,0 @@ -#include "gru_layer.h" -#include "connected_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -static void increment_layer(layer *l, int steps) -{ - int num = l->outputs*l->batch*steps; - l->output += num; - l->delta += num; - l->x += num; - l->x_norm += num; - -#ifdef GPU - l->output_gpu += num; - l->delta_gpu += num; - l->x_gpu += num; - l->x_norm_gpu += num; -#endif -} - -layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize) -{ - fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs); - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.batch = batch; - l.type = GRU; - l.steps = steps; - l.inputs = inputs; - - l.input_z_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.input_z_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.input_z_layer->batch = batch; - - l.state_z_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.state_z_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.state_z_layer->batch = batch; - - - - l.input_r_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.input_r_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.input_r_layer->batch = batch; - - l.state_r_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.state_r_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.state_r_layer->batch = batch; - - - - l.input_h_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.input_h_layer) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.input_h_layer->batch = batch; - - l.state_h_layer = (layer*)xcalloc(1,sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.state_h_layer) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.state_h_layer->batch = batch; - - l.batch_normalize = batch_normalize; - - - l.outputs = outputs; - l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float)); - l.delta = (float*)xcalloc(outputs * batch * steps, sizeof(float)); - l.state = (float*)xcalloc(outputs * batch, sizeof(float)); - l.prev_state = (float*)xcalloc(outputs * batch, sizeof(float)); - l.forgot_state = (float*)xcalloc(outputs * batch, sizeof(float)); - l.forgot_delta = (float*)xcalloc(outputs * batch, sizeof(float)); - - l.r_cpu = (float*)xcalloc(outputs * batch, sizeof(float)); - l.z_cpu = (float*)xcalloc(outputs * batch, sizeof(float)); - l.h_cpu = (float*)xcalloc(outputs * batch, sizeof(float)); - - l.forward = forward_gru_layer; - l.backward = backward_gru_layer; - l.update = update_gru_layer; - -#ifdef GPU - l.forward_gpu = forward_gru_layer_gpu; - l.backward_gpu = backward_gru_layer_gpu; - l.update_gpu = update_gru_layer_gpu; - - l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs); - l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs); - l.prev_state_gpu = cuda_make_array(l.output, batch*outputs); - l.state_gpu = cuda_make_array(l.output, batch*outputs); - l.output_gpu = cuda_make_array(l.output, batch*outputs*steps); - l.delta_gpu = cuda_make_array(l.delta, batch*outputs*steps); - l.r_gpu = cuda_make_array(l.output_gpu, batch*outputs); - l.z_gpu = cuda_make_array(l.output_gpu, batch*outputs); - l.h_gpu = cuda_make_array(l.output_gpu, batch*outputs); -#endif - - return l; -} - -void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - update_connected_layer(*(l.input_layer), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.self_layer), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay); -} - -void forward_gru_layer(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_z_layer = *(l.input_z_layer); - layer input_r_layer = *(l.input_r_layer); - layer input_h_layer = *(l.input_h_layer); - - layer state_z_layer = *(l.state_z_layer); - layer state_r_layer = *(l.state_r_layer); - layer state_h_layer = *(l.state_h_layer); - - fill_cpu(l.outputs * l.batch * l.steps, 0, input_z_layer.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, input_r_layer.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, input_h_layer.delta, 1); - - fill_cpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta, 1); - if(state.train) { - fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); - copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = l.state; - forward_connected_layer(state_z_layer, s); - forward_connected_layer(state_r_layer, s); - - s.input = state.input; - forward_connected_layer(input_z_layer, s); - forward_connected_layer(input_r_layer, s); - forward_connected_layer(input_h_layer, s); - - - copy_cpu(l.outputs*l.batch, input_z_layer.output, 1, l.z_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, state_z_layer.output, 1, l.z_cpu, 1); - - copy_cpu(l.outputs*l.batch, input_r_layer.output, 1, l.r_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, state_r_layer.output, 1, l.r_cpu, 1); - - activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC); - - copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1); - mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1); - - s.input = l.forgot_state; - forward_connected_layer(state_h_layer, s); - - copy_cpu(l.outputs*l.batch, input_h_layer.output, 1, l.h_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, state_h_layer.output, 1, l.h_cpu, 1); - - #ifdef USET - activate_array(l.h_cpu, l.outputs*l.batch, TANH); - #else - activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC); - #endif - - weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output); - - copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1); - - state.input += l.inputs*l.batch; - l.output += l.outputs*l.batch; - increment_layer(&input_z_layer, 1); - increment_layer(&input_r_layer, 1); - increment_layer(&input_h_layer, 1); - - increment_layer(&state_z_layer, 1); - increment_layer(&state_r_layer, 1); - increment_layer(&state_h_layer, 1); - } -} - -void backward_gru_layer(layer l, network_state state) -{ -} - -#ifdef GPU - -void pull_gru_layer(layer l) -{ -} - -void push_gru_layer(layer l) -{ -} - -void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - update_connected_layer_gpu(*(l.input_r_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.input_z_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.input_h_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.state_r_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.state_z_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.state_h_layer), batch, learning_rate, momentum, decay, loss_scale); -} - -void forward_gru_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_z_layer = *(l.input_z_layer); - layer input_r_layer = *(l.input_r_layer); - layer input_h_layer = *(l.input_h_layer); - - layer state_z_layer = *(l.state_z_layer); - layer state_r_layer = *(l.state_r_layer); - layer state_h_layer = *(l.state_h_layer); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, input_z_layer.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, input_r_layer.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, input_h_layer.delta_gpu, 1); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta_gpu, 1); - if(state.train) { - fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = l.state_gpu; - forward_connected_layer_gpu(state_z_layer, s); - forward_connected_layer_gpu(state_r_layer, s); - - s.input = state.input; - forward_connected_layer_gpu(input_z_layer, s); - forward_connected_layer_gpu(input_r_layer, s); - forward_connected_layer_gpu(input_h_layer, s); - - - copy_ongpu(l.outputs*l.batch, input_z_layer.output_gpu, 1, l.z_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_z_layer.output_gpu, 1, l.z_gpu, 1); - - copy_ongpu(l.outputs*l.batch, input_r_layer.output_gpu, 1, l.r_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_r_layer.output_gpu, 1, l.r_gpu, 1); - - activate_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); - - copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); - - s.input = l.forgot_state_gpu; - forward_connected_layer_gpu(state_h_layer, s); - - copy_ongpu(l.outputs*l.batch, input_h_layer.output_gpu, 1, l.h_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_h_layer.output_gpu, 1, l.h_gpu, 1); - - #ifdef USET - activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH); - #else - activate_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); - #endif - - weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu); - - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1); - - state.input += l.inputs*l.batch; - l.output_gpu += l.outputs*l.batch; - increment_layer(&input_z_layer, 1); - increment_layer(&input_r_layer, 1); - increment_layer(&input_h_layer, 1); - - increment_layer(&state_z_layer, 1); - increment_layer(&state_r_layer, 1); - increment_layer(&state_h_layer, 1); - } -} - -void backward_gru_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_z_layer = *(l.input_z_layer); - layer input_r_layer = *(l.input_r_layer); - layer input_h_layer = *(l.input_h_layer); - - layer state_z_layer = *(l.state_z_layer); - layer state_r_layer = *(l.state_r_layer); - layer state_h_layer = *(l.state_h_layer); - - increment_layer(&input_z_layer, l.steps - 1); - increment_layer(&input_r_layer, l.steps - 1); - increment_layer(&input_h_layer, l.steps - 1); - - increment_layer(&state_z_layer, l.steps - 1); - increment_layer(&state_r_layer, l.steps - 1); - increment_layer(&state_h_layer, l.steps - 1); - - state.input += l.inputs*l.batch*(l.steps-1); - if(state.delta) state.delta += l.inputs*l.batch*(l.steps-1); - l.output_gpu += l.outputs*l.batch*(l.steps-1); - l.delta_gpu += l.outputs*l.batch*(l.steps-1); - for (i = l.steps-1; i >= 0; --i) { - if(i != 0) copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); - float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; - - copy_ongpu(l.outputs*l.batch, input_z_layer.output_gpu, 1, l.z_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_z_layer.output_gpu, 1, l.z_gpu, 1); - - copy_ongpu(l.outputs*l.batch, input_r_layer.output_gpu, 1, l.r_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_r_layer.output_gpu, 1, l.r_gpu, 1); - - activate_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC); - - copy_ongpu(l.outputs*l.batch, input_h_layer.output_gpu, 1, l.h_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, state_h_layer.output_gpu, 1, l.h_gpu, 1); - - #ifdef USET - activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH); - #else - activate_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC); - #endif - - weighted_delta_gpu(l.prev_state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, input_h_layer.delta_gpu, input_z_layer.delta_gpu, l.outputs*l.batch, l.delta_gpu); - - #ifdef USET - gradient_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH, input_h_layer.delta_gpu); - #else - gradient_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, input_h_layer.delta_gpu); - #endif - - copy_ongpu(l.outputs*l.batch, input_h_layer.delta_gpu, 1, state_h_layer.delta_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.forgot_state_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1); - fill_ongpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1); - - s.input = l.forgot_state_gpu; - s.delta = l.forgot_delta_gpu; - - backward_connected_layer_gpu(state_h_layer, s); - if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu); - mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.prev_state_gpu, input_r_layer.delta_gpu); - - gradient_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, input_r_layer.delta_gpu); - copy_ongpu(l.outputs*l.batch, input_r_layer.delta_gpu, 1, state_r_layer.delta_gpu, 1); - - gradient_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, input_z_layer.delta_gpu); - copy_ongpu(l.outputs*l.batch, input_z_layer.delta_gpu, 1, state_z_layer.delta_gpu, 1); - - s.input = l.prev_state_gpu; - s.delta = prev_delta_gpu; - - backward_connected_layer_gpu(state_r_layer, s); - backward_connected_layer_gpu(state_z_layer, s); - - s.input = state.input; - s.delta = state.delta; - - backward_connected_layer_gpu(input_h_layer, s); - backward_connected_layer_gpu(input_r_layer, s); - backward_connected_layer_gpu(input_z_layer, s); - - - state.input -= l.inputs*l.batch; - if(state.delta) state.delta -= l.inputs*l.batch; - l.output_gpu -= l.outputs*l.batch; - l.delta_gpu -= l.outputs*l.batch; - increment_layer(&input_z_layer, -1); - increment_layer(&input_r_layer, -1); - increment_layer(&input_h_layer, -1); - - increment_layer(&state_z_layer, -1); - increment_layer(&state_r_layer, -1); - increment_layer(&state_h_layer, -1); - } -} -#endif diff --git a/src/Detector/darknet/src/gru_layer.h b/src/Detector/darknet/src/gru_layer.h deleted file mode 100644 index c13e46e8e..000000000 --- a/src/Detector/darknet/src/gru_layer.h +++ /dev/null @@ -1,30 +0,0 @@ - -#ifndef GRU_LAYER_H -#define GRU_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); - -void forward_gru_layer(layer l, network_state state); -void backward_gru_layer(layer l, network_state state); -void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -#ifdef GPU -void forward_gru_layer_gpu(layer l, network_state state); -void backward_gru_layer_gpu(layer l, network_state state); -void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale); -void push_gru_layer(layer l); -void pull_gru_layer(layer l); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/http_stream.cpp b/src/Detector/darknet/src/http_stream.cpp deleted file mode 100644 index 3ec7e8515..000000000 --- a/src/Detector/darknet/src/http_stream.cpp +++ /dev/null @@ -1,937 +0,0 @@ -#define _XOPEN_SOURCE -#include "image.h" -#include "http_stream.h" - -// -// a single-threaded, multi client(using select), debug webserver - streaming out mjpg. -// on win, _WIN32 has to be defined, must link against ws2_32.lib (socks on linux are for free) -// - -#include -#include -#include -#include -#include -#include -#include -#include -#include -using std::cerr; -using std::endl; - -// -// socket related abstractions: -// -#ifdef _WIN32 -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "ws2_32.lib") -#endif -#define WIN32_LEAN_AND_MEAN -#define _WINSOCK_DEPRECATED_NO_WARNINGS -#include -#include -#include -#include "gettimeofday.h" -#define PORT unsigned long -#define ADDRPOINTER int* -struct _INIT_W32DATA -{ - WSADATA w; - _INIT_W32DATA() { WSAStartup(MAKEWORD(2, 1), &w); } -} _init_once; - -// Graceful closes will first close their output channels and then wait for the peer -// on the other side of the connection to close its output channels. When both sides are done telling -// each other they won,t be sending any more data (i.e., closing output channels), -// the connection can be closed fully, with no risk of reset. -static int close_socket(SOCKET s) { - int close_output = ::shutdown(s, 1); // 0 close input, 1 close output, 2 close both - char *buf = (char *)calloc(1024, sizeof(char)); - ::recv(s, buf, 1024, 0); - free(buf); - int close_input = ::shutdown(s, 0); - int result = ::closesocket(s); - cerr << "Close socket: out = " << close_output << ", in = " << close_input << " \n"; - return result; -} -#else // _WIN32 - else: nix -#include "darkunistd.h" -#include -#include -#include -#include -#include -#include -#include -#include -#define PORT unsigned short -#define SOCKET int -#define HOSTENT struct hostent -#define SOCKADDR struct sockaddr -#define SOCKADDR_IN struct sockaddr_in -#define ADDRPOINTER unsigned int* -#define INVALID_SOCKET -1 -#define SOCKET_ERROR -1 -struct _IGNORE_PIPE_SIGNAL -{ - struct sigaction new_actn, old_actn; - _IGNORE_PIPE_SIGNAL() { - new_actn.sa_handler = SIG_IGN; // ignore the broken pipe signal - sigemptyset(&new_actn.sa_mask); - new_actn.sa_flags = 0; - sigaction(SIGPIPE, &new_actn, &old_actn); - // sigaction (SIGPIPE, &old_actn, NULL); // - to restore the previous signal handling - } -} _init_once; - -static int close_socket(SOCKET s) { - int close_output = ::shutdown(s, 1); // 0 close input, 1 close output, 2 close both - char *buf = (char *)calloc(1024, sizeof(char)); - ::recv(s, buf, 1024, 0); - free(buf); - int close_input = ::shutdown(s, 0); - int result = close(s); - std::cerr << "Close socket: out = " << close_output << ", in = " << close_input << " \n"; - return result; -} -#endif // _WIN32 - - -class JSON_sender -{ - SOCKET sock; - SOCKET maxfd; - fd_set master; - int timeout; // master sock timeout, shutdown after timeout usec. - int close_all_sockets; - - int _write(int sock, char const*const s, int len) - { - if (len < 1) { len = strlen(s); } - return ::send(sock, s, len, 0); - } - -public: - - JSON_sender(int port = 0, int _timeout = 400000) - : sock(INVALID_SOCKET) - , timeout(_timeout) - { - close_all_sockets = 0; - FD_ZERO(&master); - if (port) - open(port); - } - - ~JSON_sender() - { - close_all(); - release(); - } - - bool release() - { - if (sock != INVALID_SOCKET) - ::shutdown(sock, 2); - sock = (INVALID_SOCKET); - return false; - } - - void close_all() - { - close_all_sockets = 1; - write("\n]"); // close JSON array - } - - bool open(int port) - { - sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - - SOCKADDR_IN address; - address.sin_addr.s_addr = INADDR_ANY; - address.sin_family = AF_INET; - address.sin_port = htons(port); // ::htons(port); - int reuse = 1; - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEADDR) failed" << endl; - - // Non-blocking sockets - // Windows: ioctlsocket() and FIONBIO - // Linux: fcntl() and O_NONBLOCK -#ifdef WIN32 - unsigned long i_mode = 1; - int result = ioctlsocket(sock, FIONBIO, &i_mode); - if (result != NO_ERROR) { - std::cerr << "ioctlsocket(FIONBIO) failed with error: " << result << std::endl; - } -#else // WIN32 - int flags = fcntl(sock, F_GETFL, 0); - fcntl(sock, F_SETFL, flags | O_NONBLOCK); -#endif // WIN32 - -#ifdef SO_REUSEPORT - if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEPORT) failed" << endl; -#endif - if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) - { - cerr << "error JSON_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; - return release(); - } - if (::listen(sock, 10) == SOCKET_ERROR) - { - cerr << "error JSON_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; - return release(); - } - FD_ZERO(&master); - FD_SET(sock, &master); - maxfd = sock; - return true; - } - - bool isOpened() - { - return sock != INVALID_SOCKET; - } - - bool write(char const* outputbuf) - { - fd_set rread = master; - struct timeval select_timeout = { 0, 0 }; - struct timeval socket_timeout = { 0, timeout }; - if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) - return true; // nothing broken, there's just noone listening - - int outlen = static_cast(strlen(outputbuf)); - -#ifdef _WIN32 - for (unsigned i = 0; iclient ? maxfd : client); - FD_SET(client, &master); - _write(client, "HTTP/1.0 200 OK\r\n", 0); - _write(client, - "Server: Mozarella/2.2\r\n" - "Accept-Range: bytes\r\n" - "Connection: close\r\n" - "Max-Age: 0\r\n" - "Expires: 0\r\n" - "Cache-Control: no-cache, private\r\n" - "Pragma: no-cache\r\n" - "Content-Type: application/json\r\n" - //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" - "\r\n", 0); - _write(client, "[\n", 0); // open JSON array - int n = _write(client, outputbuf, outlen); - cerr << "JSON_sender: new client " << client << endl; - } - else // existing client, just stream pix - { - //char head[400]; - // application/x-resource+json or application/x-collection+json - when you are representing REST resources and collections - // application/json or text/json or text/javascript or text/plain. - // https://stackoverflow.com/questions/477816/what-is-the-correct-json-content-type - //sprintf(head, "\r\nContent-Length: %zu\r\n\r\n", outlen); - //sprintf(head, "--boundary\r\nContent-Type: application/json\r\nContent-Length: %zu\r\n\r\n", outlen); - //_write(s, head, 0); - if (!close_all_sockets) _write(s, ", \n", 0); - int n = _write(s, outputbuf, outlen); - if (n < (int)outlen) - { - cerr << "JSON_sender: kill client " << s << endl; - close_socket(s); - //::shutdown(s, 2); - FD_CLR(s, &master); - } - - if (close_all_sockets) { - int result = close_socket(s); - cerr << "JSON_sender: close clinet: " << result << " \n"; - continue; - } - } - } - if (close_all_sockets) { - int result = close_socket(sock); - cerr << "JSON_sender: close acceptor: " << result << " \n\n"; - } - return true; - } -}; -// ---------------------------------------- - -static std::unique_ptr js_ptr; -static std::mutex mtx; - -void delete_json_sender() -{ - std::lock_guard lock(mtx); - js_ptr.release(); -} - -void send_json_custom(char const* send_buf, int port, int timeout) -{ - try { - std::lock_guard lock(mtx); - if(!js_ptr) js_ptr.reset(new JSON_sender(port, timeout)); - - js_ptr->write(send_buf); - } - catch (...) { - cerr << " Error in send_json_custom() function \n"; - } -} - -void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout) -{ - try { - char *send_buf = detection_to_json(dets, nboxes, classes, names, frame_id, NULL); - - send_json_custom(send_buf, port, timeout); - std::cout << " JSON-stream sent. \n"; - - free(send_buf); - } - catch (...) { - cerr << " Error in send_json() function \n"; - } -} -// ---------------------------------------- - - -#ifdef OPENCV - -#include -#include -#include -#include -#ifndef CV_VERSION_EPOCH -#include -#endif -using namespace cv; - - - -class MJPG_sender -{ - SOCKET sock; - SOCKET maxfd; - fd_set master; - int timeout; // master sock timeout, shutdown after timeout usec. - int quality; // jpeg compression [1..100] - int close_all_sockets; - - int _write(int sock, char const*const s, int len) - { - if (len < 1) { len = strlen(s); } - return ::send(sock, s, len, 0); - } - -public: - - MJPG_sender(int port = 0, int _timeout = 400000, int _quality = 30) - : sock(INVALID_SOCKET) - , timeout(_timeout) - , quality(_quality) - { - close_all_sockets = 0; - FD_ZERO(&master); - if (port) - open(port); - } - - ~MJPG_sender() - { - close_all(); - release(); - } - - bool release() - { - if (sock != INVALID_SOCKET) - ::shutdown(sock, 2); - sock = (INVALID_SOCKET); - return false; - } - - void close_all() - { - close_all_sockets = 1; - cv::Mat tmp(cv::Size(10, 10), CV_8UC3); - write(tmp); - } - - bool open(int port) - { - sock = ::socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - - SOCKADDR_IN address; - address.sin_addr.s_addr = INADDR_ANY; - address.sin_family = AF_INET; - address.sin_port = htons(port); // ::htons(port); - int reuse = 1; - if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEADDR) failed" << endl; - - // Non-blocking sockets - // Windows: ioctlsocket() and FIONBIO - // Linux: fcntl() and O_NONBLOCK -#ifdef WIN32 - unsigned long i_mode = 1; - int result = ioctlsocket(sock, FIONBIO, &i_mode); - if (result != NO_ERROR) { - std::cerr << "ioctlsocket(FIONBIO) failed with error: " << result << std::endl; - } -#else // WIN32 - int flags = fcntl(sock, F_GETFL, 0); - fcntl(sock, F_SETFL, flags | O_NONBLOCK); -#endif // WIN32 - -#ifdef SO_REUSEPORT - if (setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, (const char*)&reuse, sizeof(reuse)) < 0) - cerr << "setsockopt(SO_REUSEPORT) failed" << endl; -#endif - if (::bind(sock, (SOCKADDR*)&address, sizeof(SOCKADDR_IN)) == SOCKET_ERROR) - { - cerr << "error MJPG_sender: couldn't bind sock " << sock << " to port " << port << "!" << endl; - return release(); - } - if (::listen(sock, 10) == SOCKET_ERROR) - { - cerr << "error MJPG_sender: couldn't listen on sock " << sock << " on port " << port << " !" << endl; - return release(); - } - FD_ZERO(&master); - FD_SET(sock, &master); - maxfd = sock; - return true; - } - - bool isOpened() - { - return sock != INVALID_SOCKET; - } - - bool write(const Mat & frame) - { - fd_set rread = master; - struct timeval select_timeout = { 0, 0 }; - struct timeval socket_timeout = { 0, timeout }; - if (::select(maxfd + 1, &rread, NULL, NULL, &select_timeout) <= 0) - return true; // nothing broken, there's just noone listening - - std::vector outbuf; - std::vector params; - params.push_back(IMWRITE_JPEG_QUALITY); - params.push_back(quality); - cv::imencode(".jpg", frame, outbuf, params); //REMOVED FOR COMPATIBILITY - // https://docs.opencv.org/3.4/d4/da8/group__imgcodecs.html#ga292d81be8d76901bff7988d18d2b42ac - //std::cerr << "cv::imencode call disabled!" << std::endl; - int outlen = static_cast(outbuf.size()); - -#ifdef _WIN32 - for (unsigned i = 0; iclient ? maxfd : client); - FD_SET(client, &master); - _write(client, "HTTP/1.0 200 OK\r\n", 0); - _write(client, - "Server: Mozarella/2.2\r\n" - "Accept-Range: bytes\r\n" - "Connection: close\r\n" - "Max-Age: 0\r\n" - "Expires: 0\r\n" - "Cache-Control: no-cache, private\r\n" - "Pragma: no-cache\r\n" - "Content-Type: multipart/x-mixed-replace; boundary=mjpegstream\r\n" - "\r\n", 0); - cerr << "MJPG_sender: new client " << client << endl; - } - else // existing client, just stream pix - { - if (close_all_sockets) { - int result = close_socket(s); - cerr << "MJPG_sender: close clinet: " << result << " \n"; - continue; - } - - char head[400]; - sprintf(head, "--mjpegstream\r\nContent-Type: image/jpeg\r\nContent-Length: %zu\r\n\r\n", outlen); - _write(s, head, 0); - int n = _write(s, (char*)(&outbuf[0]), outlen); - cerr << "known client: " << s << ", sent = " << n << ", must be sent outlen = " << outlen << endl; - if (n < (int)outlen) - { - cerr << "MJPG_sender: kill client " << s << endl; - //::shutdown(s, 2); - close_socket(s); - FD_CLR(s, &master); - } - } - } - if (close_all_sockets) { - int result = close_socket(sock); - cerr << "MJPG_sender: close acceptor: " << result << " \n\n"; - } - return true; - } -}; -// ---------------------------------------- - -static std::mutex mtx_mjpeg; - -//struct mat_cv : cv::Mat { int a[0]; }; - -void send_mjpeg(mat_cv* mat, int port, int timeout, int quality) -{ - try { - std::lock_guard lock(mtx_mjpeg); - static MJPG_sender wri(port, timeout, quality); - //cv::Mat mat = cv::cvarrToMat(ipl); - wri.write(*(cv::Mat*)mat); - std::cout << " MJPEG-stream sent. \n"; - } - catch (...) { - cerr << " Error in send_mjpeg() function \n"; - } -} -// ---------------------------------------- - -std::string get_system_frame_time_string() -{ - std::time_t t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - static std::mutex mtx; - std::lock_guard lock(mtx); - struct tm *tmp_buf = localtime(&t); - char buff[256]; - std::strftime(buff, 256, "%A %F %T", tmp_buf); - std::string system_frame_time = buff; - return system_frame_time; -} -// ---------------------------------------- - - -#ifdef __CYGWIN__ -int send_http_post_request(char *http_post_host, int server_port, const char *videosource, - detection *dets, int nboxes, int classes, char **names, long long int frame_id, int ext_output, int timeout) -{ - std::cerr << " send_http_post_request() isn't implemented \n"; - return 0; -} -#else // __CYGWIN__ - -#ifndef NI_MAXHOST -#define NI_MAXHOST 1025 -#endif - -#ifndef NI_NUMERICHOST -#define NI_NUMERICHOST 0x02 -#endif - -//#define CPPHTTPLIB_OPENSSL_SUPPORT -#include "httplib.h" - -// https://webhook.site/ -// https://github.com/yhirose/cpp-httplib -// sent POST http request -int send_http_post_request(char *http_post_host, int server_port, const char *videosource, - detection *dets, int nboxes, int classes, char **names, long long int frame_id, int ext_output, int timeout) -{ - const float thresh = 0.005; // function get_network_boxes() has already filtred dets by actual threshold - - std::string message; - - for (int i = 0; i < nboxes; ++i) { - char labelstr[4096] = { 0 }; - int class_id = -1; - for (int j = 0; j < classes; ++j) { - int show = strncmp(names[j], "dont_show", 9); - if (dets[i].prob[j] > thresh && show) { - if (class_id < 0) { - strcat(labelstr, names[j]); - class_id = j; - char buff[10]; - sprintf(buff, " (%2.0f%%)", dets[i].prob[j] * 100); - strcat(labelstr, buff); - } - else { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - } - printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100); - } - } - if (class_id >= 0) { - message += std::string(names[class_id]) + std::string(", id: ") + std::to_string(class_id) + "\n"; - } - } - - if (!message.empty()) - { - std::string time = get_system_frame_time_string(); - message += "\ntime:\n" + time + "\n"; - message += "videosource:\n" + std::string(videosource); - - std::string http_post_host_str = http_post_host; - int slash_index = http_post_host_str.find("/"); - - std::string http_path = http_post_host_str.substr(slash_index, http_post_host_str.length() - slash_index); - http_post_host_str = http_post_host_str.substr(0, slash_index); - - // send HTTP-Post request - httplib::Client cli(http_post_host_str.c_str(), server_port, timeout); - auto res = cli.Post(http_path.c_str(), message, "text/plain"); - - return 1; - } - - return 0; -} -#endif // __CYGWIN__ - -#endif // OPENCV - -// ----------------------------------------------------- - -#if __cplusplus >= 201103L || _MSC_VER >= 1900 // C++11 - -#include -#include - -static std::chrono::steady_clock::time_point steady_start, steady_end; -static double total_time; - -double get_time_point() { - std::chrono::steady_clock::time_point current_time = std::chrono::steady_clock::now(); - //uint64_t now = std::chrono::duration_cast(current_time.time_since_epoch()).count(); - return std::chrono::duration_cast(current_time.time_since_epoch()).count(); -} - -void start_timer() { - steady_start = std::chrono::steady_clock::now(); -} - -void stop_timer() { - steady_end = std::chrono::steady_clock::now(); -} - -double get_time() { - double took_time = std::chrono::duration(steady_end - steady_start).count(); - total_time += took_time; - return took_time; -} - -void stop_timer_and_show() { - stop_timer(); - std::cout << " " << get_time() * 1000 << " msec" << std::endl; -} - -void stop_timer_and_show_name(char *name) { - stop_timer(); - std::cout << " " << name; - std::cout << " " << get_time() * 1000 << " msec" << std::endl; -} - -void show_total_time() { - std::cout << " Total: " << total_time * 1000 << " msec" << std::endl; -} - - -int custom_create_thread(custom_thread_t * tid, const custom_attr_t * attr, void *(*func) (void *), void *arg) -{ - std::thread *ptr = new std::thread(func, arg); - *tid = (custom_thread_t *)ptr; - if (tid) return 0; - else return -1; -} - -int custom_join(custom_thread_t tid, void **value_ptr) -{ - std::thread *ptr = (std::thread *)tid; - if (ptr) { - ptr->join(); - delete ptr; - return 0; - } - else printf(" Error: ptr of thread is NULL in custom_join() \n"); - - return -1; -} - -int custom_atomic_load_int(volatile int* obj) -{ - const volatile std::atomic* ptr_a = (const volatile std::atomic*)obj; - return std::atomic_load(ptr_a); -} - -void custom_atomic_store_int(volatile int* obj, int desr) -{ - volatile std::atomic* ptr_a = (volatile std::atomic*)obj; - std::atomic_store(ptr_a, desr); -} - -int get_num_threads() -{ - return std::thread::hardware_concurrency(); -} - -#if !defined(__MINGW64__) -void this_thread_sleep_for(int ms_time) -{ - std::chrono::milliseconds dura(ms_time); - std::this_thread::sleep_for(dura); -} -#else -void this_thread_sleep_for(int ms_time) -{ - std::cerr << " this_thread_sleep_for() isn't implemented \n"; - return; -} -#endif - -void this_thread_yield() -{ - std::this_thread::yield(); -} - -#else // C++11 -#include - -double get_time_point() { return 0; } -void start_timer() {} -void stop_timer() {} -double get_time() { return 0; } -void stop_timer_and_show() { - std::cout << " stop_timer_and_show() isn't implemented " << std::endl; -} -void stop_timer_and_show_name(char *name) { stop_timer_and_show(); } -void total_time() {} -#endif // C++11 - -#include -#include -#include -#include "blas.h" -#include "utils.h" - -struct similarity_detections_t { - int old_id, new_id; - float sim; -}; - -int check_prob(detection det, float thresh) -{ - for (int i = 0; i < det.classes; ++i) { - if (det.prob[i] > thresh) return 1; - } - return 0; -} - -int check_classes_id(detection det1, detection det2, float thresh) -{ - if (det1.classes != det2.classes) { - printf(" Error: det1.classes != det2.classes \n"); - getchar(); - } - - int det1_id = -1; - float det1_prob = 0; - int det2_id = -1; - float det2_prob = 0; - - for (int i = 0; i < det1.classes; ++i) { - if (det1.prob[i] > thresh && det1.prob[i] > det1_prob) { - det1_prob = det1.prob[i]; - det1_id = i; - } - if (det2.prob[i] > thresh && det2.prob[i] > det2_prob) { - det2_prob = det2.prob[i]; - det2_id = i; - } - } - - if (det1_id == det2_id && det2_id != -1) return 1; - - //for (int i = 0; i < det1.classes; ++i) { - // if (det1.prob[i] > thresh && det2.prob[i] > thresh) return 1; - //} - return 0; -} - -int fill_remaining_id(detection *new_dets, int new_dets_num, int new_track_id, float thresh, int detection_count) -{ - for (int i = 0; i < new_dets_num; ++i) { - if (new_dets[i].track_id == 0 && check_prob(new_dets[i], thresh)) { - //printf(" old_tid = %d, new_tid = %d, sim = %f \n", new_dets[i].track_id, new_track_id, new_dets[i].sim); - if (new_dets[i].sort_class > detection_count) { - new_dets[i].track_id = new_track_id; - new_track_id++; - } - } - } - return new_track_id; -} - -float *make_float_array(float* src, size_t size) -{ - float *dst = (float*)xcalloc(size, sizeof(float)); - memcpy(dst, src, size*sizeof(float)); - return dst; -} - -struct detection_t : detection { - int det_count; - detection_t(detection det) : detection(det), det_count(0) - { - if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size); - if (prob) prob = make_float_array(det.prob, classes); - if (uc) uc = make_float_array(det.uc, 4); - } - - detection_t(detection_t const& det) : detection(det) - { - if (embeddings) embeddings = make_float_array(det.embeddings, embedding_size); - if (prob) prob = make_float_array(det.prob, classes); - if (uc) uc = make_float_array(det.uc, 4); - } - - ~detection_t() { - if (embeddings) free(embeddings); - if (prob) free(prob); - if (uc) free(uc); - } -}; - - - -void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim_thresh, float track_ciou_norm, int deque_size, int dets_for_track, int dets_for_show) -{ - static int new_track_id = 1; - static std::deque> old_dets_dq; - - // copy detections from queue of vectors to the one vector - std::vector old_dets; - for (std::vector &v : old_dets_dq) { - for (int i = 0; i < v.size(); ++i) { - old_dets.push_back(v[i]); - } - } - - std::vector sim_det(old_dets.size() * new_dets_num); - - // calculate similarity - for (int old_id = 0; old_id < old_dets.size(); ++old_id) { - for (int new_id = 0; new_id < new_dets_num; ++new_id) { - const int index = old_id*new_dets_num + new_id; - const float sim = cosine_similarity(new_dets[new_id].embeddings, old_dets[old_id].embeddings, old_dets[0].embedding_size); - sim_det[index].new_id = new_id; - sim_det[index].old_id = old_id; - sim_det[index].sim = sim; - } - } - - // sort similarity - std::sort(sim_det.begin(), sim_det.end(), [](similarity_detections_t v1, similarity_detections_t v2) { return v1.sim > v2.sim; }); - //if(sim_det.size() > 0) printf(" sim_det_first = %f, sim_det_end = %f \n", sim_det.begin()->sim, sim_det.rbegin()->sim); - - std::vector new_idx(new_dets_num, 1); - std::vector old_idx(old_dets.size(), 1); - std::vector track_idx(new_track_id, 1); - - // match objects - for (int index = 0; index < new_dets_num*old_dets.size(); ++index) { - const int new_id = sim_det[index].new_id; - const int old_id = sim_det[index].old_id; - const int track_id = old_dets[old_id].track_id; - const int det_count = old_dets[old_id].sort_class; - //printf(" ciou = %f \n", box_ciou(new_dets[new_id].bbox, old_dets[old_id].bbox)); - if (track_idx[track_id] && new_idx[new_id] && old_idx[old_id] && check_classes_id(new_dets[new_id], old_dets[old_id], thresh)) { - float sim = sim_det[index].sim; - //float ciou = box_ciou(new_dets[new_id].bbox, old_dets[old_id].bbox); - float ciou = box_iou(new_dets[new_id].bbox, old_dets[old_id].bbox); - sim = sim * (1 - track_ciou_norm) + ciou * track_ciou_norm; - if (sim_thresh < sim && new_dets[new_id].sim < sim) { - new_dets[new_id].sim = sim; - new_dets[new_id].track_id = track_id; - new_dets[new_id].sort_class = det_count + 1; - //new_idx[new_id] = 0; - old_idx[old_id] = 0; - if(track_id) track_idx[track_id] = 0; - } - } - } - - // set new track_id - new_track_id = fill_remaining_id(new_dets, new_dets_num, new_track_id, thresh, dets_for_track); - - // store new_detections to the queue of vectors - std::vector new_det_vec; - for (int i = 0; i < new_dets_num; ++i) { - if (check_prob(new_dets[i], thresh)) { - new_det_vec.push_back(new_dets[i]); - } - } - - // add new - old_dets_dq.push_back(new_det_vec); - // remove old - if (old_dets_dq.size() > deque_size) old_dets_dq.pop_front(); - - // remove detection which were detected only on few frames - for (int i = 0; i < new_dets_num; ++i) { - if (new_dets[i].sort_class < dets_for_show) { - for (int j = 0; j < new_dets[i].classes; ++j) { - new_dets[i].prob[j] = 0; - } - } - } -} - diff --git a/src/Detector/darknet/src/http_stream.h b/src/Detector/darknet/src/http_stream.h deleted file mode 100644 index aace9d741..000000000 --- a/src/Detector/darknet/src/http_stream.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef HTTP_STREAM_H -#define HTTP_STREAM_H -#include "darknet.h" - -#ifdef __cplusplus -extern "C" { -#endif -#include "image.h" -#include - -void send_json(detection *dets, int nboxes, int classes, char **names, long long int frame_id, int port, int timeout); - -#ifdef OPENCV -void send_mjpeg(mat_cv* mat, int port, int timeout, int quality); - -int send_http_post_request(char *http_post_host, int server_port, const char *videosource, - detection *dets, int nboxes, int classes, char **names, long long int frame_id, int ext_output, int timeout); - -#endif // OPENCV - -typedef void* custom_thread_t; -typedef void* custom_attr_t; - -int custom_create_thread(custom_thread_t * tid, const custom_attr_t * attr, void *(*func) (void *), void *arg); -int custom_join(custom_thread_t thread, void **value_ptr); - -int custom_atomic_load_int(volatile int* obj); -void custom_atomic_store_int(volatile int* obj, int desr); -int get_num_threads(); -void this_thread_sleep_for(int ms_time); -void this_thread_yield(); - -#ifdef __cplusplus -} -#endif - -#endif // HTTP_STREAM_H diff --git a/src/Detector/darknet/src/httplib.h b/src/Detector/darknet/src/httplib.h deleted file mode 100644 index 41fbfb194..000000000 --- a/src/Detector/darknet/src/httplib.h +++ /dev/null @@ -1,4036 +0,0 @@ -// -// httplib.h -// -// Copyright (c) 2019 Yuji Hirose. All rights reserved. -// MIT License -// - -#ifndef CPPHTTPLIB_HTTPLIB_H -#define CPPHTTPLIB_HTTPLIB_H - -/* - * Configuration - */ - -#ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND -#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND 5 -#endif - -#ifndef CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND -#define CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND 0 -#endif - -#ifndef CPPHTTPLIB_KEEPALIVE_MAX_COUNT -#define CPPHTTPLIB_KEEPALIVE_MAX_COUNT 5 -#endif - -#ifndef CPPHTTPLIB_READ_TIMEOUT_SECOND -#define CPPHTTPLIB_READ_TIMEOUT_SECOND 5 -#endif - -#ifndef CPPHTTPLIB_READ_TIMEOUT_USECOND -#define CPPHTTPLIB_READ_TIMEOUT_USECOND 0 -#endif - -#ifndef CPPHTTPLIB_REQUEST_URI_MAX_LENGTH -#define CPPHTTPLIB_REQUEST_URI_MAX_LENGTH 8192 -#endif - -#ifndef CPPHTTPLIB_REDIRECT_MAX_COUNT -#define CPPHTTPLIB_REDIRECT_MAX_COUNT 20 -#endif - -#ifndef CPPHTTPLIB_PAYLOAD_MAX_LENGTH -#define CPPHTTPLIB_PAYLOAD_MAX_LENGTH (std::numeric_limits::max)() -#endif - -#ifndef CPPHTTPLIB_RECV_BUFSIZ -#define CPPHTTPLIB_RECV_BUFSIZ size_t(4096u) -#endif - -#ifndef CPPHTTPLIB_THREAD_POOL_COUNT -#define CPPHTTPLIB_THREAD_POOL_COUNT 8 -#endif - -/* - * Headers - */ - -#ifdef _WIN32 -#ifndef _CRT_SECURE_NO_WARNINGS -#define _CRT_SECURE_NO_WARNINGS -#endif //_CRT_SECURE_NO_WARNINGS - -#ifndef _CRT_NONSTDC_NO_DEPRECATE -#define _CRT_NONSTDC_NO_DEPRECATE -#endif //_CRT_NONSTDC_NO_DEPRECATE - -#if defined(_MSC_VER) -#ifdef _WIN64 -using ssize_t = __int64; -#else -using ssize_t = int; -#endif - -#if _MSC_VER < 1900 -#define snprintf _snprintf_s -#endif -#endif // _MSC_VER - -#ifndef S_ISREG -#define S_ISREG(m) (((m)&S_IFREG) == S_IFREG) -#endif // S_ISREG - -#ifndef S_ISDIR -#define S_ISDIR(m) (((m)&S_IFDIR) == S_IFDIR) -#endif // S_ISDIR - -#ifndef NOMINMAX -#define NOMINMAX -#endif // NOMINMAX - -#include -#include -#include - -#ifndef WSA_FLAG_NO_HANDLE_INHERIT -#define WSA_FLAG_NO_HANDLE_INHERIT 0x80 -#endif - -#ifdef _MSC_VER -#pragma comment(lib, "ws2_32.lib") -#endif - -#ifndef strcasecmp -#define strcasecmp _stricmp -#endif // strcasecmp - -using socket_t = SOCKET; -#ifdef CPPHTTPLIB_USE_POLL -#define poll(fds, nfds, timeout) WSAPoll(fds, nfds, timeout) -#endif - -#else // not _WIN32 - -#include -#include -#include -#include -#ifdef CPPHTTPLIB_USE_POLL -#include -#endif -#include -#include -#include -#include -#include - -using socket_t = int; -#define INVALID_SOCKET (-1) -#endif //_WIN32 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -#include -#include -#include - -// #if OPENSSL_VERSION_NUMBER < 0x1010100fL -// #error Sorry, OpenSSL versions prior to 1.1.1 are not supported -// #endif - -#if OPENSSL_VERSION_NUMBER < 0x10100000L -#include -inline const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *asn1) { - return M_ASN1_STRING_data(asn1); -} -#endif -#endif - -#ifdef CPPHTTPLIB_ZLIB_SUPPORT -#include -#endif - -/* - * Declaration - */ -namespace httplib { - -namespace detail { - -struct ci { - bool operator()(const std::string &s1, const std::string &s2) const { - return std::lexicographical_compare( - s1.begin(), s1.end(), s2.begin(), s2.end(), - [](char c1, char c2) { return ::tolower(c1) < ::tolower(c2); }); - } -}; - -} // namespace detail - -enum class HttpVersion { v1_0 = 0, v1_1 }; - -using Headers = std::multimap; - -using Params = std::multimap; -using Match = std::smatch; - -using DataSink = std::function; - -using Done = std::function; - -using ContentProvider = std::function; - -using ContentProviderWithCloser = std::function; - -using ContentReceiver = std::function; - -using ContentReader = std::function; - -using Progress = std::function; - -struct Response; -using ResponseHandler = std::function; - -struct MultipartFile { - std::string filename; - std::string content_type; - size_t offset = 0; - size_t length = 0; -}; -using MultipartFiles = std::multimap; - -struct MultipartFormData { - std::string name; - std::string content; - std::string filename; - std::string content_type; -}; -using MultipartFormDataItems = std::vector; - -using Range = std::pair; -using Ranges = std::vector; - -struct Request { - std::string method; - std::string path; - Headers headers; - std::string body; - - // for server - std::string version; - std::string target; - Params params; - MultipartFiles files; - Ranges ranges; - Match matches; - - // for client - size_t redirect_count = CPPHTTPLIB_REDIRECT_MAX_COUNT; - ResponseHandler response_handler; - ContentReceiver content_receiver; - Progress progress; - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - const SSL *ssl; -#endif - - bool has_header(const char *key) const; - std::string get_header_value(const char *key, size_t id = 0) const; - size_t get_header_value_count(const char *key) const; - void set_header(const char *key, const char *val); - void set_header(const char *key, const std::string &val); - - bool has_param(const char *key) const; - std::string get_param_value(const char *key, size_t id = 0) const; - size_t get_param_value_count(const char *key) const; - - bool has_file(const char *key) const; - MultipartFile get_file_value(const char *key) const; - - // private members... - size_t content_length; - ContentProvider content_provider; -}; - -struct Response { - std::string version; - int status; - Headers headers; - std::string body; - - bool has_header(const char *key) const; - std::string get_header_value(const char *key, size_t id = 0) const; - size_t get_header_value_count(const char *key) const; - void set_header(const char *key, const char *val); - void set_header(const char *key, const std::string &val); - - void set_redirect(const char *url); - void set_content(const char *s, size_t n, const char *content_type); - void set_content(const std::string &s, const char *content_type); - - void set_content_provider( - size_t length, - std::function provider, - std::function resource_releaser = [] {}); - - void set_chunked_content_provider( - std::function provider, - std::function resource_releaser = [] {}); - - Response() : status(-1), content_length(0) {} - - ~Response() { - if (content_provider_resource_releaser) { - content_provider_resource_releaser(); - } - } - - // private members... - size_t content_length; - ContentProviderWithCloser content_provider; - std::function content_provider_resource_releaser; -}; - -class Stream { -public: - virtual ~Stream() = default; - virtual int read(char *ptr, size_t size) = 0; - virtual int write(const char *ptr, size_t size1) = 0; - virtual int write(const char *ptr) = 0; - virtual int write(const std::string &s) = 0; - virtual std::string get_remote_addr() const = 0; - - template - int write_format(const char *fmt, const Args &... args); -}; - -class SocketStream : public Stream { -public: - SocketStream(socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec); - ~SocketStream() override; - - int read(char *ptr, size_t size) override; - int write(const char *ptr, size_t size) override; - int write(const char *ptr) override; - int write(const std::string &s) override; - std::string get_remote_addr() const override; - -private: - socket_t sock_; - time_t read_timeout_sec_; - time_t read_timeout_usec_; -}; - -class BufferStream : public Stream { -public: - BufferStream() = default; - ~BufferStream() override = default; - - int read(char *ptr, size_t size) override; - int write(const char *ptr, size_t size) override; - int write(const char *ptr) override; - int write(const std::string &s) override; - std::string get_remote_addr() const override; - - const std::string &get_buffer() const; - -private: - std::string buffer; -}; - -class TaskQueue { -public: - TaskQueue() = default; - virtual ~TaskQueue() = default; - virtual void enqueue(std::function fn) = 0; - virtual void shutdown() = 0; -}; - -#if CPPHTTPLIB_THREAD_POOL_COUNT > 0 -class ThreadPool : public TaskQueue { -public: - explicit ThreadPool(size_t n) : shutdown_(false) { - while (n) { - threads_.emplace_back(worker(*this)); - n--; - } - } - - ThreadPool(const ThreadPool &) = delete; - ~ThreadPool() override = default; - - void enqueue(std::function fn) override { - std::unique_lock lock(mutex_); - jobs_.push_back(fn); - cond_.notify_one(); - } - - void shutdown() override { - // Stop all worker threads... - { - std::unique_lock lock(mutex_); - shutdown_ = true; - } - - cond_.notify_all(); - - // Join... - for (auto& t : threads_) { - t.join(); - } - } - -private: - struct worker { - explicit worker(ThreadPool &pool) : pool_(pool) {} - - void operator()() { - for (;;) { - std::function fn; - { - std::unique_lock lock(pool_.mutex_); - - pool_.cond_.wait( - lock, [&] { return !pool_.jobs_.empty() || pool_.shutdown_; }); - - if (pool_.shutdown_ && pool_.jobs_.empty()) { break; } - - fn = pool_.jobs_.front(); - pool_.jobs_.pop_front(); - } - - assert(true == static_cast(fn)); - fn(); - } - } - - ThreadPool &pool_; - }; - friend struct worker; - - std::vector threads_; - std::list> jobs_; - - bool shutdown_; - - std::condition_variable cond_; - std::mutex mutex_; -}; -#elif CPPHTTPLIB_THREAD_POOL_COUNT == 0 -class Threads : public TaskQueue { -public: - Threads() : running_threads_(0) {} - virtual ~Threads() {} - - virtual void enqueue(std::function fn) override { - std::thread([=]() { - { - std::lock_guard guard(running_threads_mutex_); - running_threads_++; - } - - fn(); - - { - std::lock_guard guard(running_threads_mutex_); - running_threads_--; - } - }).detach(); - } - - virtual void shutdown() override { - for (;;) { - std::this_thread::sleep_for(std::chrono::milliseconds(10)); - std::lock_guard guard(running_threads_mutex_); - if (!running_threads_) { break; } - } - } - -private: - std::mutex running_threads_mutex_; - int running_threads_; -}; -#else -class NoThread : public TaskQueue { -public: - NoThread() {} - virtual ~NoThread() {} - - virtual void enqueue(std::function fn) override { - fn(); - } - - virtual void shutdown() override { - } -}; -#endif - -class Server { -public: - using Handler = std::function; - using HandlerWithContentReader = std::function; - using Logger = std::function; - - Server(); - - virtual ~Server(); - - virtual bool is_valid() const; - - Server &Get(const char *pattern, Handler handler); - Server &Post(const char *pattern, Handler handler); - Server &Post(const char *pattern, HandlerWithContentReader handler); - Server &Put(const char *pattern, Handler handler); - Server &Put(const char *pattern, HandlerWithContentReader handler); - Server &Patch(const char *pattern, Handler handler); - Server &Patch(const char *pattern, HandlerWithContentReader handler); - Server &Delete(const char *pattern, Handler handler); - Server &Options(const char *pattern, Handler handler); - - bool set_base_dir(const char *dir, const char *mount_point = nullptr); - void set_file_request_handler(Handler handler); - - void set_error_handler(Handler handler); - void set_logger(Logger logger); - - void set_keep_alive_max_count(size_t count); - void set_read_timeout(time_t sec, time_t usec); - void set_payload_max_length(size_t length); - - bool bind_to_port(const char *host, int port, int socket_flags = 0); - int bind_to_any_port(const char *host, int socket_flags = 0); - bool listen_after_bind(); - - bool listen(const char *host, int port, int socket_flags = 0); - - bool is_running() const; - void stop(); - - std::function new_task_queue; - -protected: - bool process_request(Stream &strm, bool last_connection, - bool &connection_close, - const std::function& setup_request); - - size_t keep_alive_max_count_; - time_t read_timeout_sec_; - time_t read_timeout_usec_; - size_t payload_max_length_; - -private: - using Handlers = std::vector>; - using HandersForContentReader = std::vector>; - - socket_t create_server_socket(const char *host, int port, - int socket_flags) const; - int bind_internal(const char *host, int port, int socket_flags); - bool listen_internal(); - - bool routing(Request &req, Response &res, Stream &strm, bool last_connection); - bool handle_file_request(Request &req, Response &res); - bool dispatch_request(Request &req, Response &res, Handlers &handlers); - bool dispatch_request_for_content_reader(Request &req, Response &res, - ContentReader content_reader, - HandersForContentReader &handlers); - - bool parse_request_line(const char *s, Request &req); - bool write_response(Stream &strm, bool last_connection, const Request &req, - Response &res); - bool write_content_with_provider(Stream &strm, const Request &req, - Response &res, const std::string &boundary, - const std::string &content_type); - bool read_content(Stream &strm, bool last_connection, Request &req, - Response &res); - bool read_content_with_content_receiver(Stream &strm, bool last_connection, - Request &req, Response &res, - ContentReceiver reveiver); - - virtual bool process_and_close_socket(socket_t sock); - - std::atomic is_running_; - std::atomic svr_sock_; - std::vector> base_dirs_; - Handler file_request_handler_; - Handlers get_handlers_; - Handlers post_handlers_; - HandersForContentReader post_handlers_for_content_reader; - Handlers put_handlers_; - HandersForContentReader put_handlers_for_content_reader; - Handlers patch_handlers_; - HandersForContentReader patch_handlers_for_content_reader; - Handlers delete_handlers_; - Handlers options_handlers_; - Handler error_handler_; - Logger logger_; -}; - -class Client { -public: - explicit Client(const char *host, int port = 80, time_t timeout_sec = 300); - - virtual ~Client(); - - virtual bool is_valid() const; - - std::shared_ptr Get(const char *path); - - std::shared_ptr Get(const char *path, const Headers &headers); - - std::shared_ptr Get(const char *path, Progress progress); - - std::shared_ptr Get(const char *path, const Headers &headers, - Progress progress); - - std::shared_ptr Get(const char *path, - ContentReceiver content_receiver); - - std::shared_ptr Get(const char *path, const Headers &headers, - ContentReceiver content_receiver); - - std::shared_ptr - Get(const char *path, ContentReceiver content_receiver, Progress progress); - - std::shared_ptr Get(const char *path, const Headers &headers, - ContentReceiver content_receiver, - Progress progress); - - std::shared_ptr Get(const char *path, const Headers &headers, - ResponseHandler response_handler, - ContentReceiver content_receiver); - - std::shared_ptr Get(const char *path, const Headers &headers, - ResponseHandler response_handler, - ContentReceiver content_receiver, - Progress progress); - - std::shared_ptr Head(const char *path); - - std::shared_ptr Head(const char *path, const Headers &headers); - - std::shared_ptr Post(const char *path, const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Post(const char *path, const Headers &headers, - const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Post(const char *path, size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Post(const char *path, const Headers &headers, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Post(const char *path, const Params ¶ms, - bool compress = false); - - std::shared_ptr Post(const char *path, const Headers &headers, - const Params ¶ms, bool compress = false); - - std::shared_ptr Post(const char *path, - const MultipartFormDataItems &items, - bool compress = false); - - std::shared_ptr Post(const char *path, const Headers &headers, - const MultipartFormDataItems &items, - bool compress = false); - - std::shared_ptr Put(const char *path, const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Put(const char *path, const Headers &headers, - const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Put(const char *path, size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Put(const char *path, const Headers &headers, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Patch(const char *path, const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Patch(const char *path, const Headers &headers, - const std::string &body, - const char *content_type, - bool compress = false); - - std::shared_ptr Patch(const char *path, size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Patch(const char *path, const Headers &headers, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress = false); - - std::shared_ptr Delete(const char *path); - - std::shared_ptr Delete(const char *path, const std::string &body, - const char *content_type); - - std::shared_ptr Delete(const char *path, const Headers &headers); - - std::shared_ptr Delete(const char *path, const Headers &headers, - const std::string &body, - const char *content_type); - - std::shared_ptr Options(const char *path); - - std::shared_ptr Options(const char *path, const Headers &headers); - - bool send(const Request &req, Response &res); - - bool send(const std::vector &requests, - std::vector &responses); - - void set_keep_alive_max_count(size_t count); - void set_read_timeout(time_t sec, time_t usec); - - void follow_location(bool on); - -protected: - bool process_request(Stream &strm, const Request &req, Response &res, - bool last_connection, bool &connection_close); - - const std::string host_; - const int port_; - time_t timeout_sec_; - const std::string host_and_port_; - size_t keep_alive_max_count_; - time_t read_timeout_sec_; - time_t read_timeout_usec_; - size_t follow_location_; - -private: - socket_t create_client_socket() const; - bool read_response_line(Stream &strm, Response &res); - void write_request(Stream &strm, const Request &req, bool last_connection); - bool redirect(const Request &req, Response &res); - - std::shared_ptr - send_with_content_provider(const char *method, const char *path, - const Headers &headers, const std::string &body, - size_t content_length, - ContentProvider content_provider, - const char *content_type, bool compress); - - virtual bool process_and_close_socket( - socket_t sock, size_t request_count, - std::function - callback); - - virtual bool is_ssl() const; -}; - -inline void Get(std::vector &requests, const char *path, - const Headers &headers) { - Request req; - req.method = "GET"; - req.path = path; - req.headers = headers; - requests.emplace_back(std::move(req)); -} - -inline void Get(std::vector &requests, const char *path) { - Get(requests, path, Headers()); -} - -inline void Post(std::vector &requests, const char *path, - const Headers &headers, const std::string &body, - const char *content_type) { - Request req; - req.method = "POST"; - req.path = path; - req.headers = headers; - req.headers.emplace("Content-Type", content_type); - req.body = body; - requests.emplace_back(std::move(req)); -} - -inline void Post(std::vector &requests, const char *path, - const std::string &body, const char *content_type) { - Post(requests, path, Headers(), body, content_type); -} - -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -class SSLSocketStream : public Stream { -public: - SSLSocketStream(socket_t sock, SSL *ssl, time_t read_timeout_sec, - time_t read_timeout_usec); - virtual ~SSLSocketStream(); - - virtual int read(char *ptr, size_t size); - virtual int write(const char *ptr, size_t size); - virtual int write(const char *ptr); - virtual int write(const std::string &s); - virtual std::string get_remote_addr() const; - -private: - socket_t sock_; - SSL *ssl_; - time_t read_timeout_sec_; - time_t read_timeout_usec_; -}; - -class SSLServer : public Server { -public: - SSLServer(const char *cert_path, const char *private_key_path, - const char *client_ca_cert_file_path = nullptr, - const char *client_ca_cert_dir_path = nullptr); - - virtual ~SSLServer(); - - virtual bool is_valid() const; - -private: - virtual bool process_and_close_socket(socket_t sock); - - SSL_CTX *ctx_; - std::mutex ctx_mutex_; -}; - -class SSLClient : public Client { -public: - SSLClient(const char *host, int port = 443, time_t timeout_sec = 300, - const char *client_cert_path = nullptr, - const char *client_key_path = nullptr); - - virtual ~SSLClient(); - - virtual bool is_valid() const; - - void set_ca_cert_path(const char *ca_ceert_file_path, - const char *ca_cert_dir_path = nullptr); - void enable_server_certificate_verification(bool enabled); - - long get_openssl_verify_result() const; - - SSL_CTX *ssl_context() const noexcept; - -private: - virtual bool process_and_close_socket( - socket_t sock, size_t request_count, - std::function - callback); - virtual bool is_ssl() const; - - bool verify_host(X509 *server_cert) const; - bool verify_host_with_subject_alt_name(X509 *server_cert) const; - bool verify_host_with_common_name(X509 *server_cert) const; - bool check_host_name(const char *pattern, size_t pattern_len) const; - - SSL_CTX *ctx_; - std::mutex ctx_mutex_; - std::vector host_components_; - std::string ca_cert_file_path_; - std::string ca_cert_dir_path_; - bool server_certificate_verification_ = false; - long verify_result_ = 0; -}; -#endif - -/* - * Implementation - */ - -namespace detail { - -inline bool is_hex(char c, int &v) { - if (0x20 <= c && isdigit(c)) { - v = c - '0'; - return true; - } else if ('A' <= c && c <= 'F') { - v = c - 'A' + 10; - return true; - } else if ('a' <= c && c <= 'f') { - v = c - 'a' + 10; - return true; - } - return false; -} - -inline bool from_hex_to_i(const std::string &s, size_t i, size_t cnt, - int &val) { - if (i >= s.size()) { return false; } - - val = 0; - for (; cnt; i++, cnt--) { - if (!s[i]) { return false; } - int v = 0; - if (is_hex(s[i], v)) { - val = val * 16 + v; - } else { - return false; - } - } - return true; -} - -inline std::string from_i_to_hex(size_t n) { - const char *charset = "0123456789abcdef"; - std::string ret; - do { - ret = charset[n & 15] + ret; - n >>= 4; - } while (n > 0); - return ret; -} - -inline size_t to_utf8(int code, char *buff) { - if (code < 0x0080) { - buff[0] = (code & 0x7F); - return 1; - } else if (code < 0x0800) { - buff[0] = (0xC0 | ((code >> 6) & 0x1F)); - buff[1] = (0x80 | (code & 0x3F)); - return 2; - } else if (code < 0xD800) { - buff[0] = (0xE0 | ((code >> 12) & 0xF)); - buff[1] = (0x80 | ((code >> 6) & 0x3F)); - buff[2] = (0x80 | (code & 0x3F)); - return 3; - } else if (code < 0xE000) { // D800 - DFFF is invalid... - return 0; - } else if (code < 0x10000) { - buff[0] = (0xE0 | ((code >> 12) & 0xF)); - buff[1] = (0x80 | ((code >> 6) & 0x3F)); - buff[2] = (0x80 | (code & 0x3F)); - return 3; - } else if (code < 0x110000) { - buff[0] = (0xF0 | ((code >> 18) & 0x7)); - buff[1] = (0x80 | ((code >> 12) & 0x3F)); - buff[2] = (0x80 | ((code >> 6) & 0x3F)); - buff[3] = (0x80 | (code & 0x3F)); - return 4; - } - - // NOTREACHED - return 0; -} - -// NOTE: This code came up with the following stackoverflow post: -// https://stackoverflow.com/questions/180947/base64-decode-snippet-in-c -inline std::string base64_encode(const std::string &in) { - static const auto lookup = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - std::string out; - out.reserve(in.size()); - - int val = 0; - int valb = -6; - - for (uint8_t c : in) { - val = (val << 8) + c; - valb += 8; - while (valb >= 0) { - out.push_back(lookup[(val >> valb) & 0x3F]); - valb -= 6; - } - } - - if (valb > -6) { out.push_back(lookup[((val << 8) >> (valb + 8)) & 0x3F]); } - - while (out.size() % 4) { - out.push_back('='); - } - - return out; -} - -inline bool is_file(const std::string &path) { - struct stat st; - return stat(path.c_str(), &st) >= 0 && S_ISREG(st.st_mode); -} - -inline bool is_dir(const std::string &path) { - struct stat st; - return stat(path.c_str(), &st) >= 0 && S_ISDIR(st.st_mode); -} - -inline bool is_valid_path(const std::string &path) { - size_t level = 0; - size_t i = 0; - - // Skip slash - while (i < path.size() && path[i] == '/') { - i++; - } - - while (i < path.size()) { - // Read component - auto beg = i; - while (i < path.size() && path[i] != '/') { - i++; - } - - auto len = i - beg; - assert(len > 0); - - if (!path.compare(beg, len, ".")) { - ; - } else if (!path.compare(beg, len, "..")) { - if (level == 0) { return false; } - level--; - } else { - level++; - } - - // Skip slash - while (i < path.size() && path[i] == '/') { - i++; - } - } - - return true; -} - -inline void read_file(const std::string &path, std::string &out) { - std::ifstream fs(path, std::ios_base::binary); - fs.seekg(0, std::ios_base::end); - auto size = fs.tellg(); - fs.seekg(0); - out.resize(static_cast(size)); - fs.read(&out[0], size); -} - -inline std::string file_extension(const std::string &path) { - std::smatch m; - auto re = std::regex("\\.([a-zA-Z0-9]+)$"); - if (std::regex_search(path, m, re)) { return m[1].str(); } - return std::string(); -} - -template void split(const char *b, const char *e, char d, Fn fn) { - int i = 0; - int beg = 0; - - while (e ? (b + i != e) : (b[i] != '\0')) { - if (b[i] == d) { - fn(&b[beg], &b[i]); - beg = i + 1; - } - i++; - } - - if (i) { fn(&b[beg], &b[i]); } -} - -// NOTE: until the read size reaches `fixed_buffer_size`, use `fixed_buffer` -// to store data. The call can set memory on stack for performance. -class stream_line_reader { -public: - stream_line_reader(Stream &strm, char *fixed_buffer, size_t fixed_buffer_size) - : strm_(strm), fixed_buffer_(fixed_buffer), - fixed_buffer_size_(fixed_buffer_size) {} - - const char *ptr() const { - if (glowable_buffer_.empty()) { - return fixed_buffer_; - } else { - return glowable_buffer_.data(); - } - } - - size_t size() const { - if (glowable_buffer_.empty()) { - return fixed_buffer_used_size_; - } else { - return glowable_buffer_.size(); - } - } - - bool getline() { - fixed_buffer_used_size_ = 0; - glowable_buffer_.clear(); - - for (size_t i = 0;; i++) { - char byte; - auto n = strm_.read(&byte, 1); - - if (n < 0) { - return false; - } else if (n == 0) { - if (i == 0) { - return false; - } else { - break; - } - } - - append(byte); - - if (byte == '\n') { break; } - } - - return true; - } - -private: - void append(char c) { - if (fixed_buffer_used_size_ < fixed_buffer_size_ - 1) { - fixed_buffer_[fixed_buffer_used_size_++] = c; - fixed_buffer_[fixed_buffer_used_size_] = '\0'; - } else { - if (glowable_buffer_.empty()) { - assert(fixed_buffer_[fixed_buffer_used_size_] == '\0'); - glowable_buffer_.assign(fixed_buffer_, fixed_buffer_used_size_); - } - glowable_buffer_ += c; - } - } - - Stream &strm_; - char *fixed_buffer_; - const size_t fixed_buffer_size_; - size_t fixed_buffer_used_size_ = 0; - std::string glowable_buffer_; -}; - -inline int close_socket(socket_t sock) { -#ifdef _WIN32 - return closesocket(sock); -#else - return close(sock); -#endif -} - -inline int select_read(socket_t sock, time_t sec, time_t usec) { -#ifdef CPPHTTPLIB_USE_POLL - struct pollfd pfd_read; - pfd_read.fd = sock; - pfd_read.events = POLLIN; - - auto timeout = static_cast(sec * 1000 + usec / 1000); - - return poll(&pfd_read, 1, timeout); -#else - fd_set fds; - FD_ZERO(&fds); - FD_SET(sock, &fds); - - timeval tv; - tv.tv_sec = static_cast(sec); - tv.tv_usec = static_cast(usec); - - return select(static_cast(sock + 1), &fds, nullptr, nullptr, &tv); -#endif -} - -inline bool wait_until_socket_is_ready(socket_t sock, time_t sec, time_t usec) { -#ifdef CPPHTTPLIB_USE_POLL - struct pollfd pfd_read; - pfd_read.fd = sock; - pfd_read.events = POLLIN | POLLOUT; - - auto timeout = static_cast(sec * 1000 + usec / 1000); - - if (poll(&pfd_read, 1, timeout) > 0 && - pfd_read.revents & (POLLIN | POLLOUT)) { - int error = 0; - socklen_t len = sizeof(error); - return getsockopt(sock, SOL_SOCKET, SO_ERROR, - reinterpret_cast(&error), &len) >= 0 && - !error; - } - return false; -#else - fd_set fdsr; - FD_ZERO(&fdsr); - FD_SET(sock, &fdsr); - - auto fdsw = fdsr; - auto fdse = fdsr; - - timeval tv; - tv.tv_sec = static_cast(sec); - tv.tv_usec = static_cast(usec); - - if (select(static_cast(sock + 1), &fdsr, &fdsw, &fdse, &tv) > 0 && - (FD_ISSET(sock, &fdsr) || FD_ISSET(sock, &fdsw))) { - int error = 0; - socklen_t len = sizeof(error); - return getsockopt(sock, SOL_SOCKET, SO_ERROR, reinterpret_cast(&error), &len) >= 0 && - !error; - } - return false; -#endif -} - -template -inline bool process_and_close_socket(bool is_client_request, socket_t sock, - size_t keep_alive_max_count, - time_t read_timeout_sec, - time_t read_timeout_usec, T callback) { - assert(keep_alive_max_count > 0); - - bool ret = false; - - if (keep_alive_max_count > 1) { - auto count = keep_alive_max_count; - while (count > 0 && - (is_client_request || - detail::select_read(sock, CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND, - CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND) > 0)) { - SocketStream strm(sock, read_timeout_sec, read_timeout_usec); - auto last_connection = count == 1; - auto connection_close = false; - - ret = callback(strm, last_connection, connection_close); - if (!ret || connection_close) { break; } - - count--; - } - } else { - SocketStream strm(sock, read_timeout_sec, read_timeout_usec); - auto dummy_connection_close = false; - ret = callback(strm, true, dummy_connection_close); - } - - close_socket(sock); - return ret; -} - -inline int shutdown_socket(socket_t sock) { -#ifdef _WIN32 - return shutdown(sock, SD_BOTH); -#else - return shutdown(sock, SHUT_RDWR); -#endif -} - -template -socket_t create_socket(const char *host, int port, Fn fn, - int socket_flags = 0) { -#ifdef _WIN32 -#define SO_SYNCHRONOUS_NONALERT 0x20 -#define SO_OPENTYPE 0x7008 - - int opt = SO_SYNCHRONOUS_NONALERT; - setsockopt(INVALID_SOCKET, SOL_SOCKET, SO_OPENTYPE, (char *)&opt, - sizeof(opt)); -#endif - - // Get address info - struct addrinfo hints; - struct addrinfo *result; - - memset(&hints, 0, sizeof(struct addrinfo)); - hints.ai_family = AF_UNSPEC; - hints.ai_socktype = SOCK_STREAM; - hints.ai_flags = socket_flags; - hints.ai_protocol = 0; - - auto service = std::to_string(port); - - if (getaddrinfo(host, service.c_str(), &hints, &result)) { - return INVALID_SOCKET; - } - - for (auto rp = result; rp; rp = rp->ai_next) { - // Create a socket -#ifdef _WIN32 - auto sock = WSASocketW(rp->ai_family, rp->ai_socktype, rp->ai_protocol, - nullptr, 0, WSA_FLAG_NO_HANDLE_INHERIT); -#else - auto sock = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); -#endif - if (sock == INVALID_SOCKET) { continue; } - -#ifndef _WIN32 - if (fcntl(sock, F_SETFD, FD_CLOEXEC) == -1) { continue; } -#endif - - // Make 'reuse address' option available - int yes = 1; - setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast(&yes), - sizeof(yes)); -#ifdef SO_REUSEPORT - setsockopt(sock, SOL_SOCKET, SO_REUSEPORT, reinterpret_cast(&yes), - sizeof(yes)); -#endif - - // bind or connect - if (fn(sock, *rp)) { - freeaddrinfo(result); - return sock; - } - - close_socket(sock); - } - - freeaddrinfo(result); - return INVALID_SOCKET; -} - -inline void set_nonblocking(socket_t sock, bool nonblocking) { -#ifdef _WIN32 - auto flags = nonblocking ? 1UL : 0UL; - ioctlsocket(sock, FIONBIO, &flags); -#else - auto flags = fcntl(sock, F_GETFL, 0); - fcntl(sock, F_SETFL, - nonblocking ? (flags | O_NONBLOCK) : (flags & (~O_NONBLOCK))); -#endif -} - -inline bool is_connection_error() { -#ifdef _WIN32 - return WSAGetLastError() != WSAEWOULDBLOCK; -#else - return errno != EINPROGRESS; -#endif -} - -inline std::string get_remote_addr(socket_t sock) { - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - - if (!getpeername(sock, reinterpret_cast(&addr), &len)) { - std::array ipstr{}; - - if (!getnameinfo(reinterpret_cast(&addr), len, ipstr.data(), ipstr.size(), - nullptr, 0, NI_NUMERICHOST)) { - return ipstr.data(); - } - } - - return std::string(); -} - -inline const char *find_content_type(const std::string &path) { - auto ext = file_extension(path); - if (ext == "txt") { - return "text/plain"; - } else if (ext == "html" || ext == "htm") { - return "text/html"; - } else if (ext == "css") { - return "text/css"; - } else if (ext == "jpeg" || ext == "jpg") { - return "image/jpg"; - } else if (ext == "png") { - return "image/png"; - } else if (ext == "gif") { - return "image/gif"; - } else if (ext == "svg") { - return "image/svg+xml"; - } else if (ext == "ico") { - return "image/x-icon"; - } else if (ext == "json") { - return "application/json"; - } else if (ext == "pdf") { - return "application/pdf"; - } else if (ext == "js") { - return "application/javascript"; - } else if (ext == "xml") { - return "application/xml"; - } else if (ext == "xhtml") { - return "application/xhtml+xml"; - } - return nullptr; -} - -inline const char *status_message(int status) { - switch (status) { - case 200: return "OK"; - case 206: return "Partial Content"; - case 301: return "Moved Permanently"; - case 302: return "Found"; - case 303: return "See Other"; - case 304: return "Not Modified"; - case 400: return "Bad Request"; - case 403: return "Forbidden"; - case 404: return "Not Found"; - case 413: return "Payload Too Large"; - case 414: return "Request-URI Too Long"; - case 415: return "Unsupported Media Type"; - case 416: return "Range Not Satisfiable"; - - default: - case 500: return "Internal Server Error"; - } -} - -#ifdef CPPHTTPLIB_ZLIB_SUPPORT -inline bool can_compress(const std::string &content_type) { - return !content_type.find("text/") || content_type == "image/svg+xml" || - content_type == "application/javascript" || - content_type == "application/json" || - content_type == "application/xml" || - content_type == "application/xhtml+xml"; -} - -inline bool compress(std::string &content) { - z_stream strm; - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - - auto ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 31, 8, - Z_DEFAULT_STRATEGY); - if (ret != Z_OK) { return false; } - - strm.avail_in = content.size(); - strm.next_in = - const_cast(reinterpret_cast(content.data())); - - std::string compressed; - - std::array buff{}; - do { - strm.avail_out = buff.size(); - strm.next_out = reinterpret_cast(buff.data()); - ret = deflate(&strm, Z_FINISH); - assert(ret != Z_STREAM_ERROR); - compressed.append(buff.data(), buff.size() - strm.avail_out); - } while (strm.avail_out == 0); - - assert(ret == Z_STREAM_END); - assert(strm.avail_in == 0); - - content.swap(compressed); - - deflateEnd(&strm); - return true; -} - -class decompressor { -public: - decompressor() { - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - - // 15 is the value of wbits, which should be at the maximum possible value - // to ensure that any gzip stream can be decoded. The offset of 16 specifies - // that the stream to decompress will be formatted with a gzip wrapper. - is_valid_ = inflateInit2(&strm, 16 + 15) == Z_OK; - } - - ~decompressor() { inflateEnd(&strm); } - - bool is_valid() const { return is_valid_; } - - template - bool decompress(const char *data, size_t data_length, T callback) { - int ret = Z_OK; - - strm.avail_in = data_length; - strm.next_in = const_cast(reinterpret_cast(data)); - - std::array buff{}; - do { - strm.avail_out = buff.size(); - strm.next_out = reinterpret_cast(buff.data()); - - ret = inflate(&strm, Z_NO_FLUSH); - assert(ret != Z_STREAM_ERROR); - switch (ret) { - case Z_NEED_DICT: - case Z_DATA_ERROR: - case Z_MEM_ERROR: inflateEnd(&strm); return false; - } - - if (!callback(buff.data(), buff.size() - strm.avail_out)) { return false; } - } while (strm.avail_out == 0); - - return ret == Z_OK || ret == Z_STREAM_END; - } - -private: - bool is_valid_; - z_stream strm; -}; -#endif - -inline bool has_header(const Headers &headers, const char *key) { - return headers.find(key) != headers.end(); -} - -inline const char *get_header_value(const Headers &headers, const char *key, - size_t id = 0, const char *def = nullptr) { - auto it = headers.find(key); - std::advance(it, id); - if (it != headers.end()) { return it->second.c_str(); } - return def; -} - -inline uint64_t get_header_value_uint64(const Headers &headers, const char *key, - int def = 0) { - auto it = headers.find(key); - if (it != headers.end()) { - return std::strtoull(it->second.data(), nullptr, 10); - } - return def; -} - -inline bool read_headers(Stream &strm, Headers &headers) { - static std::regex re(R"((.+?):\s*(.+?)\s*\r\n)"); - - const auto bufsiz = 2048; - char buf[bufsiz]; - - stream_line_reader line_reader(strm, buf, bufsiz); - - for (;;) { - if (!line_reader.getline()) { return false; } - if (!strcmp(line_reader.ptr(), "\r\n")) { break; } - std::cmatch m; - if (std::regex_match(line_reader.ptr(), m, re)) { - auto key = std::string(m[1]); - auto val = std::string(m[2]); - headers.emplace(key, val); - } - } - - return true; -} - -inline bool read_content_with_length(Stream &strm, uint64_t len, - Progress progress, ContentReceiver out) { - char buf[CPPHTTPLIB_RECV_BUFSIZ]; - - uint64_t r = 0; - while (r < len) { - auto read_len = static_cast(len - r); - auto n = strm.read(buf, std::min(read_len, CPPHTTPLIB_RECV_BUFSIZ)); - if (n <= 0) { return false; } - - if (!out(buf, n)) { return false; } - - r += n; - - if (progress) { - if (!progress(r, len)) { return false; } - } - } - - return true; -} - -inline void skip_content_with_length(Stream &strm, uint64_t len) { - char buf[CPPHTTPLIB_RECV_BUFSIZ]; - uint64_t r = 0; - while (r < len) { - auto read_len = static_cast(len - r); - auto n = strm.read(buf, std::min(read_len, CPPHTTPLIB_RECV_BUFSIZ)); - if (n <= 0) { return; } - r += n; - } -} - -inline bool read_content_without_length(Stream &strm, ContentReceiver out) { - char buf[CPPHTTPLIB_RECV_BUFSIZ]; - for (;;) { - auto n = strm.read(buf, CPPHTTPLIB_RECV_BUFSIZ); - if (n < 0) { - return false; - } else if (n == 0) { - return true; - } - if (!out(buf, n)) { return false; } - } - - return true; -} - -inline bool read_content_chunked(Stream &strm, ContentReceiver out) { - const auto bufsiz = 16; - char buf[bufsiz]; - - stream_line_reader line_reader(strm, buf, bufsiz); - - if (!line_reader.getline()) { return false; } - - auto chunk_len = std::stoi(line_reader.ptr(), 0, 16); - - while (chunk_len > 0) { - if (!read_content_with_length(strm, chunk_len, nullptr, out)) { - return false; - } - - if (!line_reader.getline()) { return false; } - - if (strcmp(line_reader.ptr(), "\r\n")) { break; } - - if (!line_reader.getline()) { return false; } - - chunk_len = std::stoi(line_reader.ptr(), 0, 16); - } - - if (chunk_len == 0) { - // Reader terminator after chunks - if (!line_reader.getline() || strcmp(line_reader.ptr(), "\r\n")) - return false; - } - - return true; -} - -inline bool is_chunked_transfer_encoding(const Headers &headers) { - return !strcasecmp(get_header_value(headers, "Transfer-Encoding", 0, ""), - "chunked"); -} - -template -bool read_content(Stream &strm, T &x, size_t payload_max_length, int &status, - Progress progress, ContentReceiver receiver) { - - ContentReceiver out = [&](const char *buf, size_t n) { - return receiver(buf, n); - }; - -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - detail::decompressor decompressor; - - if (!decompressor.is_valid()) { - status = 500; - return false; - } - - if (x.get_header_value("Content-Encoding") == "gzip") { - out = [&](const char *buf, size_t n) { - return decompressor.decompress( - buf, n, [&](const char *buf, size_t n) { return receiver(buf, n); }); - }; - } -#else - if (x.get_header_value("Content-Encoding") == "gzip") { - status = 415; - return false; - } -#endif - - auto ret = true; - auto exceed_payload_max_length = false; - - if (is_chunked_transfer_encoding(x.headers)) { - ret = read_content_chunked(strm, out); - } else if (!has_header(x.headers, "Content-Length")) { - ret = read_content_without_length(strm, out); - } else { - auto len = get_header_value_uint64(x.headers, "Content-Length", 0); - if (len > payload_max_length) { - exceed_payload_max_length = true; - skip_content_with_length(strm, len); - ret = false; - } else if (len > 0) { - ret = read_content_with_length(strm, len, progress, out); - } - } - - if (!ret) { status = exceed_payload_max_length ? 413 : 400; } - - return ret; -} - -template -inline int write_headers(Stream &strm, const T &info, const Headers &headers) { - auto write_len = 0; - for (const auto &x : info.headers) { - auto len = - strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str()); - if (len < 0) { return len; } - write_len += len; - } - for (const auto &x : headers) { - auto len = - strm.write_format("%s: %s\r\n", x.first.c_str(), x.second.c_str()); - if (len < 0) { return len; } - write_len += len; - } - auto len = strm.write("\r\n"); - if (len < 0) { return len; } - write_len += len; - return write_len; -} - -inline ssize_t write_content(Stream &strm, - ContentProviderWithCloser content_provider, - size_t offset, size_t length) { - size_t begin_offset = offset; - size_t end_offset = offset + length; - while (offset < end_offset) { - ssize_t written_length = 0; - content_provider( - offset, end_offset - offset, - [&](const char *d, size_t l) { - offset += l; - written_length = strm.write(d, l); - }, - [&](void) { written_length = -1; }); - if (written_length < 0) { return written_length; } - } - return static_cast(offset - begin_offset); -} - -inline ssize_t -write_content_chunked(Stream &strm, - ContentProviderWithCloser content_provider) { - size_t offset = 0; - auto data_available = true; - ssize_t total_written_length = 0; - while (data_available) { - ssize_t written_length = 0; - content_provider( - offset, 0, - [&](const char *d, size_t l) { - data_available = l > 0; - offset += l; - - // Emit chunked response header and footer for each chunk - auto chunk = from_i_to_hex(l) + "\r\n" + std::string(d, l) + "\r\n"; - written_length = strm.write(chunk); - }, - [&](void) { - data_available = false; - written_length = strm.write("0\r\n\r\n"); - }); - - if (written_length < 0) { return written_length; } - total_written_length += written_length; - } - return total_written_length; -} - -template -inline bool redirect(T &cli, const Request &req, Response &res, - const std::string &path) { - Request new_req; - new_req.method = req.method; - new_req.path = path; - new_req.headers = req.headers; - new_req.body = req.body; - new_req.redirect_count = req.redirect_count - 1; - new_req.response_handler = req.response_handler; - new_req.content_receiver = req.content_receiver; - new_req.progress = req.progress; - - Response new_res; - auto ret = cli.send(new_req, new_res); - if (ret) { res = new_res; } - return ret; -} - -inline std::string encode_url(const std::string &s) { - std::string result; - - for (auto i = 0; s[i]; i++) { - switch (s[i]) { - case ' ': result += "%20"; break; - case '+': result += "%2B"; break; - case '\r': result += "%0D"; break; - case '\n': result += "%0A"; break; - case '\'': result += "%27"; break; - case ',': result += "%2C"; break; - case ':': result += "%3A"; break; - case ';': result += "%3B"; break; - default: - auto c = static_cast(s[i]); - if (c >= 0x80) { - result += '%'; - char hex[4]; - size_t len = snprintf(hex, sizeof(hex) - 1, "%02X", c); - assert(len == 2); - result.append(hex, len); - } else { - result += s[i]; - } - break; - } - } - - return result; -} - -inline std::string decode_url(const std::string &s) { - std::string result; - - for (size_t i = 0; i < s.size(); i++) { - if (s[i] == '%' && i + 1 < s.size()) { - if (s[i + 1] == 'u') { - int val = 0; - if (from_hex_to_i(s, i + 2, 4, val)) { - // 4 digits Unicode codes - char buff[4]; - size_t len = to_utf8(val, buff); - if (len > 0) { result.append(buff, len); } - i += 5; // 'u0000' - } else { - result += s[i]; - } - } else { - int val = 0; - if (from_hex_to_i(s, i + 1, 2, val)) { - // 2 digits hex codes - result += static_cast(val); - i += 2; // '00' - } else { - result += s[i]; - } - } - } else if (s[i] == '+') { - result += ' '; - } else { - result += s[i]; - } - } - - return result; -} - -inline void parse_query_text(const std::string &s, Params ¶ms) { - split(&s[0], &s[s.size()], '&', [&](const char *b, const char *e) { - std::string key; - std::string val; - split(b, e, '=', [&](const char *b, const char *e) { - if (key.empty()) { - key.assign(b, e); - } else { - val.assign(b, e); - } - }); - params.emplace(key, decode_url(val)); - }); -} - -inline bool parse_multipart_boundary(const std::string &content_type, - std::string &boundary) { - auto pos = content_type.find("boundary="); - if (pos == std::string::npos) { return false; } - - boundary = content_type.substr(pos + 9); - return true; -} - -inline bool parse_multipart_formdata(const std::string &boundary, - const std::string &body, - MultipartFiles &files) { - static std::string dash = "--"; - static std::string crlf = "\r\n"; - - static std::regex re_content_type("Content-Type: (.*?)$", - std::regex_constants::icase); - - static std::regex re_content_disposition( - "Content-Disposition: form-data; name=\"(.*?)\"(?:; filename=\"(.*?)\")?", - std::regex_constants::icase); - - auto dash_boundary = dash + boundary; - - auto pos = body.find(dash_boundary); - if (pos != 0) { return false; } - - pos += dash_boundary.size(); - - auto next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } - - pos = next_pos + crlf.size(); - - while (pos < body.size()) { - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } - - std::string name; - MultipartFile file; - - auto header = body.substr(pos, (next_pos - pos)); - - while (pos != next_pos) { - std::smatch m; - if (std::regex_match(header, m, re_content_type)) { - file.content_type = m[1]; - } else if (std::regex_match(header, m, re_content_disposition)) { - name = m[1]; - file.filename = m[2]; - } - - pos = next_pos + crlf.size(); - - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } - - header = body.substr(pos, (next_pos - pos)); - } - - pos = next_pos + crlf.size(); - - next_pos = body.find(crlf + dash_boundary, pos); - - if (next_pos == std::string::npos) { return false; } - - file.offset = pos; - file.length = next_pos - pos; - - pos = next_pos + crlf.size() + dash_boundary.size(); - - next_pos = body.find(crlf, pos); - if (next_pos == std::string::npos) { return false; } - - files.emplace(name, file); - - pos = next_pos + crlf.size(); - } - - return true; -} - -inline bool parse_range_header(const std::string &s, Ranges &ranges) { - try { - static auto re_first_range = - std::regex(R"(bytes=(\d*-\d*(?:,\s*\d*-\d*)*))"); - std::smatch m; - if (std::regex_match(s, m, re_first_range)) { - auto pos = m.position(1); - auto len = m.length(1); - detail::split( - &s[pos], &s[pos + len], ',', [&](const char *b, const char *e) { - static auto re_another_range = std::regex(R"(\s*(\d*)-(\d*))"); - std::cmatch m; - if (std::regex_match(b, e, m, re_another_range)) { - ssize_t first = -1; - if (!m.str(1).empty()) { - first = static_cast(std::stoll(m.str(1))); - } - - ssize_t last = -1; - if (!m.str(2).empty()) { - last = static_cast(std::stoll(m.str(2))); - } - - if (first != -1 && last != -1 && first > last) { - throw std::runtime_error("invalid range error"); - } - ranges.emplace_back(std::make_pair(first, last)); - } - }); - return true; - } - return false; - } catch (...) { return false; } -} - -inline std::string to_lower(const char *beg, const char *end) { - std::string out; - auto it = beg; - while (it != end) { - out += static_cast(::tolower(*it)); - it++; - } - return out; -} - -inline std::string make_multipart_data_boundary() { - static const char data[] = - "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - - std::random_device seed_gen; - std::mt19937 engine(seed_gen()); - - std::string result = "--cpp-httplib-multipart-data-"; - - for (auto i = 0; i < 16; i++) { - result += data[engine() % (sizeof(data) - 1)]; - } - - return result; -} - -inline std::pair -get_range_offset_and_length(const Request &req, size_t content_length, - size_t index) { - auto r = req.ranges[index]; - - if (r.first == -1 && r.second == -1) { - return std::make_pair(0, content_length); - } - - if (r.first == -1) { - r.first = content_length - r.second; - r.second = content_length - 1; - } - - if (r.second == -1) { r.second = content_length - 1; } - - return std::make_pair(r.first, r.second - r.first + 1); -} - -inline std::string make_content_range_header_field(size_t offset, size_t length, - size_t content_length) { - std::string field = "bytes "; - field += std::to_string(offset); - field += "-"; - field += std::to_string(offset + length - 1); - field += "/"; - field += std::to_string(content_length); - return field; -} - -template -bool process_multipart_ranges_data(const Request &req, Response &res, - const std::string &boundary, - const std::string &content_type, - SToken stoken, CToken ctoken, - Content content) { - for (size_t i = 0; i < req.ranges.size(); i++) { - ctoken("--"); - stoken(boundary); - ctoken("\r\n"); - if (!content_type.empty()) { - ctoken("Content-Type: "); - stoken(content_type); - ctoken("\r\n"); - } - - auto offsets = detail::get_range_offset_and_length(req, res.body.size(), i); - auto offset = offsets.first; - auto length = offsets.second; - - ctoken("Content-Range: "); - stoken(make_content_range_header_field(offset, length, res.body.size())); - ctoken("\r\n"); - ctoken("\r\n"); - if (!content(offset, length)) { return false; } - ctoken("\r\n"); - } - - ctoken("--"); - stoken(boundary); - ctoken("--\r\n"); - - return true; -} - -inline std::string make_multipart_ranges_data(const Request &req, Response &res, - const std::string &boundary, - const std::string &content_type) { - std::string data; - - process_multipart_ranges_data( - req, res, boundary, content_type, - [&](const std::string &token) { data += token; }, - [&](const char *token) { data += token; }, - [&](size_t offset, size_t length) { - data += res.body.substr(offset, length); - return true; - }); - - return data; -} - -inline size_t -get_multipart_ranges_data_length(const Request &req, Response &res, - const std::string &boundary, - const std::string &content_type) { - size_t data_length = 0; - - process_multipart_ranges_data( - req, res, boundary, content_type, - [&](const std::string &token) { data_length += token.size(); }, - [&](const char *token) { data_length += strlen(token); }, - [&](size_t /*offset*/, size_t length) { - data_length += length; - return true; - }); - - return data_length; -} - -inline bool write_multipart_ranges_data(Stream &strm, const Request &req, - Response &res, - const std::string &boundary, - const std::string &content_type) { - return process_multipart_ranges_data( - req, res, boundary, content_type, - [&](const std::string &token) { strm.write(token); }, - [&](const char *token) { strm.write(token); }, - [&](size_t offset, size_t length) { - return detail::write_content(strm, res.content_provider, offset, - length) >= 0; - }); -} - -inline std::pair -get_range_offset_and_length(const Request &req, const Response &res, - size_t index) { - auto r = req.ranges[index]; - - if (r.second == -1) { r.second = res.content_length - 1; } - - return std::make_pair(r.first, r.second - r.first + 1); -} - -#ifdef _WIN32 -class WSInit { -public: - WSInit() { - WSADATA wsaData; - WSAStartup(0x0002, &wsaData); - } - - ~WSInit() { WSACleanup(); } -}; - -static WSInit wsinit_; -#endif - -} // namespace detail - -// Header utilities -inline std::pair make_range_header(Ranges ranges) { - std::string field = "bytes="; - auto i = 0; - for (auto r : ranges) { - if (i != 0) { field += ", "; } - if (r.first != -1) { field += std::to_string(r.first); } - field += '-'; - if (r.second != -1) { field += std::to_string(r.second); } - i++; - } - return std::make_pair("Range", field); -} - -inline std::pair -make_basic_authentication_header(const std::string &username, - const std::string &password) { - auto field = "Basic " + detail::base64_encode(username + ":" + password); - return std::make_pair("Authorization", field); -} - -// Request implementation -inline bool Request::has_header(const char *key) const { - return detail::has_header(headers, key); -} - -inline std::string Request::get_header_value(const char *key, size_t id) const { - return detail::get_header_value(headers, key, id, ""); -} - -inline size_t Request::get_header_value_count(const char *key) const { - auto r = headers.equal_range(key); - return std::distance(r.first, r.second); -} - -inline void Request::set_header(const char *key, const char *val) { - headers.emplace(key, val); -} - -inline void Request::set_header(const char *key, const std::string &val) { - headers.emplace(key, val); -} - -inline bool Request::has_param(const char *key) const { - return params.find(key) != params.end(); -} - -inline std::string Request::get_param_value(const char *key, size_t id) const { - auto it = params.find(key); - std::advance(it, id); - if (it != params.end()) { return it->second; } - return std::string(); -} - -inline size_t Request::get_param_value_count(const char *key) const { - auto r = params.equal_range(key); - return std::distance(r.first, r.second); -} - -inline bool Request::has_file(const char *key) const { - return files.find(key) != files.end(); -} - -inline MultipartFile Request::get_file_value(const char *key) const { - auto it = files.find(key); - if (it != files.end()) { return it->second; } - return MultipartFile(); -} - -// Response implementation -inline bool Response::has_header(const char *key) const { - return headers.find(key) != headers.end(); -} - -inline std::string Response::get_header_value(const char *key, - size_t id) const { - return detail::get_header_value(headers, key, id, ""); -} - -inline size_t Response::get_header_value_count(const char *key) const { - auto r = headers.equal_range(key); - return std::distance(r.first, r.second); -} - -inline void Response::set_header(const char *key, const char *val) { - headers.emplace(key, val); -} - -inline void Response::set_header(const char *key, const std::string &val) { - headers.emplace(key, val); -} - -inline void Response::set_redirect(const char *url) { - set_header("Location", url); - status = 302; -} - -inline void Response::set_content(const char *s, size_t n, - const char *content_type) { - body.assign(s, n); - set_header("Content-Type", content_type); -} - -inline void Response::set_content(const std::string &s, - const char *content_type) { - body = s; - set_header("Content-Type", content_type); -} - -inline void Response::set_content_provider( - size_t length, - std::function provider, - std::function resource_releaser) { - assert(length > 0); - content_length = length; - content_provider = [provider](size_t offset, size_t length, DataSink sink, - Done) { provider(offset, length, sink); }; - content_provider_resource_releaser = resource_releaser; -} - -inline void Response::set_chunked_content_provider( - std::function provider, - std::function resource_releaser) { - content_length = 0; - content_provider = [provider](size_t offset, size_t, DataSink sink, - Done done) { provider(offset, sink, done); }; - content_provider_resource_releaser = resource_releaser; -} - -// Rstream implementation -template -inline int Stream::write_format(const char *fmt, const Args &... args) { - std::array buf; - -#if defined(_MSC_VER) && _MSC_VER < 1900 - auto n = _snprintf_s(buf, bufsiz, buf.size() - 1, fmt, args...); -#else - auto n = snprintf(buf.data(), buf.size() - 1, fmt, args...); -#endif - if (n <= 0) { return n; } - - if (n >= static_cast(buf.size()) - 1) { - std::vector glowable_buf(buf.size()); - - while (n >= static_cast(glowable_buf.size() - 1)) { - glowable_buf.resize(glowable_buf.size() * 2); -#if defined(_MSC_VER) && _MSC_VER < 1900 - n = _snprintf_s(&glowable_buf[0], glowable_buf.size(), - glowable_buf.size() - 1, fmt, args...); -#else - n = snprintf(&glowable_buf[0], glowable_buf.size() - 1, fmt, args...); -#endif - } - return write(&glowable_buf[0], n); - } else { - return write(buf.data(), n); - } -} - -// Socket stream implementation -inline SocketStream::SocketStream(socket_t sock, time_t read_timeout_sec, - time_t read_timeout_usec) - : sock_(sock), read_timeout_sec_(read_timeout_sec), - read_timeout_usec_(read_timeout_usec) {} - -inline SocketStream::~SocketStream() {} - -inline int SocketStream::read(char *ptr, size_t size) { - if (detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0) { - return recv(sock_, ptr, static_cast(size), 0); - } - return -1; -} - -inline int SocketStream::write(const char *ptr, size_t size) { - return send(sock_, ptr, static_cast(size), 0); -} - -inline int SocketStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); -} - -inline int SocketStream::write(const std::string &s) { - return write(s.data(), s.size()); -} - -inline std::string SocketStream::get_remote_addr() const { - return detail::get_remote_addr(sock_); -} - -// Buffer stream implementation -inline int BufferStream::read(char *ptr, size_t size) { -#if defined(_MSC_VER) && _MSC_VER < 1900 - return static_cast(buffer._Copy_s(ptr, size, size)); -#else - return static_cast(buffer.copy(ptr, size)); -#endif -} - -inline int BufferStream::write(const char *ptr, size_t size) { - buffer.append(ptr, size); - return static_cast(size); -} - -inline int BufferStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); -} - -inline int BufferStream::write(const std::string &s) { - return write(s.data(), s.size()); -} - -inline std::string BufferStream::get_remote_addr() const { return ""; } - -inline const std::string &BufferStream::get_buffer() const { return buffer; } - -// HTTP server implementation -inline Server::Server() - : keep_alive_max_count_(CPPHTTPLIB_KEEPALIVE_MAX_COUNT), - read_timeout_sec_(CPPHTTPLIB_READ_TIMEOUT_SECOND), - read_timeout_usec_(CPPHTTPLIB_READ_TIMEOUT_USECOND), - payload_max_length_(CPPHTTPLIB_PAYLOAD_MAX_LENGTH), is_running_(false), - svr_sock_(INVALID_SOCKET) { -#ifndef _WIN32 - signal(SIGPIPE, SIG_IGN); -#endif - new_task_queue = [] { -#if CPPHTTPLIB_THREAD_POOL_COUNT > 0 - return new ThreadPool(CPPHTTPLIB_THREAD_POOL_COUNT); -#elif CPPHTTPLIB_THREAD_POOL_COUNT == 0 - return new Threads(); -#else - return new NoThread(); -#endif - }; -} - -inline Server::~Server() {} - -inline Server &Server::Get(const char *pattern, Handler handler) { - get_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Post(const char *pattern, Handler handler) { - post_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Post(const char *pattern, - HandlerWithContentReader handler) { - post_handlers_for_content_reader.push_back( - std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Put(const char *pattern, Handler handler) { - put_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Put(const char *pattern, - HandlerWithContentReader handler) { - put_handlers_for_content_reader.push_back( - std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Patch(const char *pattern, Handler handler) { - patch_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Patch(const char *pattern, - HandlerWithContentReader handler) { - patch_handlers_for_content_reader.push_back( - std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Delete(const char *pattern, Handler handler) { - delete_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline Server &Server::Options(const char *pattern, Handler handler) { - options_handlers_.push_back(std::make_pair(std::regex(pattern), handler)); - return *this; -} - -inline bool Server::set_base_dir(const char *dir, const char *mount_point) { - if (detail::is_dir(dir)) { - std::string mnt = mount_point ? mount_point : "/"; - if (!mnt.empty() && mnt[0] == '/') { - base_dirs_.emplace_back(mnt, dir); - return true; - } - } - return false; -} - -inline void Server::set_file_request_handler(Handler handler) { - file_request_handler_ = std::move(handler); -} - -inline void Server::set_error_handler(Handler handler) { - error_handler_ = std::move(handler); -} - -inline void Server::set_logger(Logger logger) { logger_ = std::move(logger); } - -inline void Server::set_keep_alive_max_count(size_t count) { - keep_alive_max_count_ = count; -} - -inline void Server::set_read_timeout(time_t sec, time_t usec) { - read_timeout_sec_ = sec; - read_timeout_usec_ = usec; -} - -inline void Server::set_payload_max_length(size_t length) { - payload_max_length_ = length; -} - -inline bool Server::bind_to_port(const char *host, int port, int socket_flags) { - if (bind_internal(host, port, socket_flags) < 0) return false; - return true; -} -inline int Server::bind_to_any_port(const char *host, int socket_flags) { - return bind_internal(host, 0, socket_flags); -} - -inline bool Server::listen_after_bind() { return listen_internal(); } - -inline bool Server::listen(const char *host, int port, int socket_flags) { - return bind_to_port(host, port, socket_flags) && listen_internal(); -} - -inline bool Server::is_running() const { return is_running_; } - -inline void Server::stop() { - if (is_running_) { - assert(svr_sock_ != INVALID_SOCKET); - std::atomic sock(svr_sock_.exchange(INVALID_SOCKET)); - detail::shutdown_socket(sock); - detail::close_socket(sock); - } -} - -inline bool Server::parse_request_line(const char *s, Request &req) { - static std::regex re( - "(GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH|PRI) " - "(([^?]+)(?:\\?(.*?))?) (HTTP/1\\.[01])\r\n"); - - std::cmatch m; - if (std::regex_match(s, m, re)) { - req.version = std::string(m[5]); - req.method = std::string(m[1]); - req.target = std::string(m[2]); - req.path = detail::decode_url(m[3]); - - // Parse query text - auto len = std::distance(m[4].first, m[4].second); - if (len > 0) { detail::parse_query_text(m[4], req.params); } - - return true; - } - - return false; -} - -inline bool Server::write_response(Stream &strm, bool last_connection, - const Request &req, Response &res) { - assert(res.status != -1); - - if (400 <= res.status && error_handler_) { error_handler_(req, res); } - - // Response line - if (!strm.write_format("HTTP/1.1 %d %s\r\n", res.status, - detail::status_message(res.status))) { - return false; - } - - // Headers - if (last_connection || req.get_header_value("Connection") == "close") { - res.set_header("Connection", "close"); - } - - if (!last_connection && req.get_header_value("Connection") == "Keep-Alive") { - res.set_header("Connection", "Keep-Alive"); - } - - if (!res.has_header("Content-Type")) { - res.set_header("Content-Type", "text/plain"); - } - - if (!res.has_header("Accept-Ranges")) { - res.set_header("Accept-Ranges", "bytes"); - } - - std::string content_type; - std::string boundary; - - if (req.ranges.size() > 1) { - boundary = detail::make_multipart_data_boundary(); - - auto it = res.headers.find("Content-Type"); - if (it != res.headers.end()) { - content_type = it->second; - res.headers.erase(it); - } - - res.headers.emplace("Content-Type", - "multipart/byteranges; boundary=" + boundary); - } - - if (res.body.empty()) { - if (res.content_length > 0) { - size_t length = 0; - if (req.ranges.empty()) { - length = res.content_length; - } else if (req.ranges.size() == 1) { - auto offsets = - detail::get_range_offset_and_length(req, res.content_length, 0); - auto offset = offsets.first; - length = offsets.second; - auto content_range = detail::make_content_range_header_field( - offset, length, res.content_length); - res.set_header("Content-Range", content_range); - } else { - length = detail::get_multipart_ranges_data_length(req, res, boundary, - content_type); - } - res.set_header("Content-Length", std::to_string(length)); - } else { - if (res.content_provider) { - res.set_header("Transfer-Encoding", "chunked"); - } else { - res.set_header("Content-Length", "0"); - } - } - } else { - if (req.ranges.empty()) { - ; - } else if (req.ranges.size() == 1) { - auto offsets = - detail::get_range_offset_and_length(req, res.body.size(), 0); - auto offset = offsets.first; - auto length = offsets.second; - auto content_range = detail::make_content_range_header_field( - offset, length, res.body.size()); - res.set_header("Content-Range", content_range); - res.body = res.body.substr(offset, length); - } else { - res.body = - detail::make_multipart_ranges_data(req, res, boundary, content_type); - } - -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - // TODO: 'Accpet-Encoding' has gzip, not gzip;q=0 - const auto &encodings = req.get_header_value("Accept-Encoding"); - if (encodings.find("gzip") != std::string::npos && - detail::can_compress(res.get_header_value("Content-Type"))) { - if (detail::compress(res.body)) { - res.set_header("Content-Encoding", "gzip"); - } - } -#endif - - auto length = std::to_string(res.body.size()); - res.set_header("Content-Length", length); - } - - if (!detail::write_headers(strm, res, Headers())) { return false; } - - // Body - if (req.method != "HEAD") { - if (!res.body.empty()) { - if (!strm.write(res.body)) { return false; } - } else if (res.content_provider) { - if (!write_content_with_provider(strm, req, res, boundary, - content_type)) { - return false; - } - } - } - - // Log - if (logger_) { logger_(req, res); } - - return true; -} - -inline bool -Server::write_content_with_provider(Stream &strm, const Request &req, - Response &res, const std::string &boundary, - const std::string &content_type) { - if (res.content_length) { - if (req.ranges.empty()) { - if (detail::write_content(strm, res.content_provider, 0, - res.content_length) < 0) { - return false; - } - } else if (req.ranges.size() == 1) { - auto offsets = - detail::get_range_offset_and_length(req, res.content_length, 0); - auto offset = offsets.first; - auto length = offsets.second; - if (detail::write_content(strm, res.content_provider, offset, length) < - 0) { - return false; - } - } else { - if (!detail::write_multipart_ranges_data(strm, req, res, boundary, - content_type)) { - return false; - } - } - } else { - if (detail::write_content_chunked(strm, res.content_provider) < 0) { - return false; - } - } - return true; -} - -inline bool Server::read_content(Stream &strm, bool last_connection, - Request &req, Response &res) { - if (!detail::read_content(strm, req, payload_max_length_, res.status, - Progress(), [&](const char *buf, size_t n) { - if (req.body.size() + n > req.body.max_size()) { - return false; - } - req.body.append(buf, n); - return true; - })) { - return write_response(strm, last_connection, req, res); - } - - const auto &content_type = req.get_header_value("Content-Type"); - - if (!content_type.find("application/x-www-form-urlencoded")) { - detail::parse_query_text(req.body, req.params); - } else if (!content_type.find("multipart/form-data")) { - std::string boundary; - if (!detail::parse_multipart_boundary(content_type, boundary) || - !detail::parse_multipart_formdata(boundary, req.body, req.files)) { - res.status = 400; - return write_response(strm, last_connection, req, res); - } - } - - return true; -} - -inline bool -Server::read_content_with_content_receiver(Stream &strm, bool last_connection, - Request &req, Response &res, - ContentReceiver receiver) { - if (!detail::read_content( - strm, req, payload_max_length_, res.status, Progress(), - [&](const char *buf, size_t n) { return receiver(buf, n); })) { - return write_response(strm, last_connection, req, res); - } - - return true; -} - -inline bool Server::handle_file_request(Request &req, Response &res) { - for (const auto& kv: base_dirs_) { - const auto& mount_point = kv.first; - const auto& base_dir = kv.second; - - // Prefix match - if (!req.path.find(mount_point)) { - std::string sub_path = "/" + req.path.substr(mount_point.size()); - if (detail::is_valid_path(sub_path)) { - auto path = base_dir + sub_path; - if (path.back() == '/') { path += "index.html"; } - - if (detail::is_file(path)) { - detail::read_file(path, res.body); - auto type = detail::find_content_type(path); - if (type) { res.set_header("Content-Type", type); } - res.status = 200; - if (file_request_handler_) { file_request_handler_(req, res); } - return true; - } - } - } - } - return false; -} - -inline socket_t Server::create_server_socket(const char *host, int port, - int socket_flags) const { - return detail::create_socket( - host, port, - [](socket_t sock, struct addrinfo &ai) -> bool { - if (::bind(sock, ai.ai_addr, static_cast(ai.ai_addrlen))) { - return false; - } - if (::listen(sock, 5)) { // Listen through 5 channels - return false; - } - return true; - }, - socket_flags); -} - -inline int Server::bind_internal(const char *host, int port, int socket_flags) { - if (!is_valid()) { return -1; } - - svr_sock_ = create_server_socket(host, port, socket_flags); - if (svr_sock_ == INVALID_SOCKET) { return -1; } - - if (port == 0) { - struct sockaddr_storage address; - socklen_t len = sizeof(address); - if (getsockname(svr_sock_, reinterpret_cast(&address), - &len) == -1) { - return -1; - } - if (address.ss_family == AF_INET) { - return ntohs(reinterpret_cast(&address)->sin_port); - } else if (address.ss_family == AF_INET6) { - return ntohs(reinterpret_cast(&address)->sin6_port); - } else { - return -1; - } - } else { - return port; - } -} - -inline bool Server::listen_internal() { - auto ret = true; - is_running_ = true; - - { - std::unique_ptr task_queue(new_task_queue()); - - for (;;) { - if (svr_sock_ == INVALID_SOCKET) { - // The server socket was closed by 'stop' method. - break; - } - - auto val = detail::select_read(svr_sock_, 0, 100000); - - if (val == 0) { // Timeout - continue; - } - - socket_t sock = accept(svr_sock_, nullptr, nullptr); - - if (sock == INVALID_SOCKET) { - if (errno == EMFILE) { - // The per-process limit of open file descriptors has been reached. - // Try to accept new connections after a short sleep. - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; - } - if (svr_sock_ != INVALID_SOCKET) { - detail::close_socket(svr_sock_); - ret = false; - } else { - ; // The server socket was closed by user. - } - break; - } - - task_queue->enqueue([=]() { process_and_close_socket(sock); }); - } - - task_queue->shutdown(); - } - - is_running_ = false; - return ret; -} - -inline bool Server::routing(Request &req, Response &res, Stream &strm, bool last_connection) { - // File handler - if (req.method == "GET" && handle_file_request(req, res)) { return true; } - - // Content reader handler - if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH") { - ContentReader content_reader = [&](ContentReceiver receiver) { - return read_content_with_content_receiver(strm, last_connection, req, res, receiver); - }; - - if (req.method == "POST") { - if (dispatch_request_for_content_reader(req, res, content_reader, - post_handlers_for_content_reader)) { - return true; - } - } else if (req.method == "PUT") { - if (dispatch_request_for_content_reader(req, res, content_reader, - put_handlers_for_content_reader)) { - return true; - } - } else if (req.method == "PATCH") { - if (dispatch_request_for_content_reader( - req, res, content_reader, patch_handlers_for_content_reader)) { - return true; - } - } - } - - // Read content into `req.body` - if (req.method == "POST" || req.method == "PUT" || req.method == "PATCH" || req.method == "PRI") { - if (!read_content(strm, last_connection, req, res)) { - return false; - } - } - - // Regular handler - if (req.method == "GET" || req.method == "HEAD") { - return dispatch_request(req, res, get_handlers_); - } else if (req.method == "POST") { - return dispatch_request(req, res, post_handlers_); - } else if (req.method == "PUT") { - return dispatch_request(req, res, put_handlers_); - } else if (req.method == "DELETE") { - return dispatch_request(req, res, delete_handlers_); - } else if (req.method == "OPTIONS") { - return dispatch_request(req, res, options_handlers_); - } else if (req.method == "PATCH") { - return dispatch_request(req, res, patch_handlers_); - } - - res.status = 400; - return false; -} - -inline bool Server::dispatch_request(Request &req, Response &res, - Handlers &handlers) { - for (const auto &x : handlers) { - const auto &pattern = x.first; - const auto &handler = x.second; - - if (std::regex_match(req.path, req.matches, pattern)) { - handler(req, res); - return true; - } - } - return false; -} - -inline bool -Server::dispatch_request_for_content_reader(Request &req, Response &res, - ContentReader content_reader, - HandersForContentReader &handlers) { - for (const auto &x : handlers) { - const auto &pattern = x.first; - const auto &handler = x.second; - - if (std::regex_match(req.path, req.matches, pattern)) { - handler(req, res, content_reader); - return true; - } - } - return false; -} - -inline bool -Server::process_request(Stream &strm, bool last_connection, - bool &connection_close, - const std::function& setup_request) { - std::array buf{}; - - detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); - - // Connection has been closed on client - if (!line_reader.getline()) { return false; } - - Request req; - Response res; - - res.version = "HTTP/1.1"; - - // Check if the request URI doesn't exceed the limit - if (line_reader.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) { - Headers dummy; - detail::read_headers(strm, dummy); - res.status = 414; - return write_response(strm, last_connection, req, res); - } - - // Request line and headers - if (!parse_request_line(line_reader.ptr(), req) || - !detail::read_headers(strm, req.headers)) { - res.status = 400; - return write_response(strm, last_connection, req, res); - } - - if (req.get_header_value("Connection") == "close") { - connection_close = true; - } - - if (req.version == "HTTP/1.0" && - req.get_header_value("Connection") != "Keep-Alive") { - connection_close = true; - } - - req.set_header("REMOTE_ADDR", strm.get_remote_addr()); - - if (req.has_header("Range")) { - const auto &range_header_value = req.get_header_value("Range"); - if (!detail::parse_range_header(range_header_value, req.ranges)) { - // TODO: error - } - } - - if (setup_request) { setup_request(req); } - - // Rounting - if (routing(req, res, strm, last_connection)) { - if (res.status == -1) { res.status = req.ranges.empty() ? 200 : 206; } - } else { - if (res.status == -1) { res.status = 404; } - } - - return write_response(strm, last_connection, req, res); -} - -inline bool Server::is_valid() const { return true; } - -inline bool Server::process_and_close_socket(socket_t sock) { - return detail::process_and_close_socket( - false, sock, keep_alive_max_count_, read_timeout_sec_, read_timeout_usec_, - [this](Stream &strm, bool last_connection, bool &connection_close) { - return process_request(strm, last_connection, connection_close, - nullptr); - }); -} - -// HTTP client implementation -inline Client::Client(const char *host, int port, time_t timeout_sec) - : host_(host), port_(port), timeout_sec_(timeout_sec), - host_and_port_(host_ + ":" + std::to_string(port_)), - keep_alive_max_count_(CPPHTTPLIB_KEEPALIVE_MAX_COUNT), - read_timeout_sec_(CPPHTTPLIB_READ_TIMEOUT_SECOND), - read_timeout_usec_(CPPHTTPLIB_READ_TIMEOUT_USECOND), - follow_location_(false) {} - -inline Client::~Client() {} - -inline bool Client::is_valid() const { return true; } - -inline socket_t Client::create_client_socket() const { - return detail::create_socket( - host_.c_str(), port_, [=](socket_t sock, struct addrinfo &ai) -> bool { - detail::set_nonblocking(sock, true); - - auto ret = connect(sock, ai.ai_addr, static_cast(ai.ai_addrlen)); - if (ret < 0) { - if (detail::is_connection_error() || - !detail::wait_until_socket_is_ready(sock, timeout_sec_, 0)) { - detail::close_socket(sock); - return false; - } - } - - detail::set_nonblocking(sock, false); - return true; - }); -} - -inline bool Client::read_response_line(Stream &strm, Response &res) { - std::array buf; - - detail::stream_line_reader line_reader(strm, buf.data(), buf.size()); - - if (!line_reader.getline()) { return false; } - - const static std::regex re("(HTTP/1\\.[01]) (\\d+?) .*\r\n"); - - std::cmatch m; - if (std::regex_match(line_reader.ptr(), m, re)) { - res.version = std::string(m[1]); - res.status = std::stoi(std::string(m[2])); - } - - return true; -} - -inline bool Client::send(const Request &req, Response &res) { - if (req.path.empty()) { return false; } - - auto sock = create_client_socket(); - if (sock == INVALID_SOCKET) { return false; } - - auto ret = process_and_close_socket( - sock, 1, [&](Stream &strm, bool last_connection, bool &connection_close) { - return process_request(strm, req, res, last_connection, - connection_close); - }); - - if (ret && follow_location_ && (300 < res.status && res.status < 400)) { - ret = redirect(req, res); - } - - return ret; -} - -inline bool Client::send(const std::vector &requests, - std::vector &responses) { - size_t i = 0; - while (i < requests.size()) { - auto sock = create_client_socket(); - if (sock == INVALID_SOCKET) { return false; } - - if (!process_and_close_socket( - sock, requests.size() - i, - [&](Stream &strm, bool last_connection, - bool &connection_close) -> bool { - auto &req = requests[i]; - auto res = Response(); - i++; - - if (req.path.empty()) { return false; } - auto ret = process_request(strm, req, res, last_connection, - connection_close); - - if (ret && follow_location_ && - (300 < res.status && res.status < 400)) { - ret = redirect(req, res); - } - - if (ret) { responses.emplace_back(std::move(res)); } - - return ret; - })) { - return false; - } - } - - return true; -} - -inline bool Client::redirect(const Request &req, Response &res) { - if (req.redirect_count == 0) { return false; } - - auto location = res.get_header_value("location"); - if (location.empty()) { return false; } - - std::regex re( - R"(^(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*(?:\?[^#]*)?)(?:#.*)?)"); - - auto scheme = is_ssl() ? "https" : "http"; - - std::smatch m; - if (regex_match(location, m, re)) { - auto next_scheme = m[1].str(); - auto next_host = m[2].str(); - auto next_path = m[3].str(); - if (next_host.empty()) { next_host = host_; } - if (next_path.empty()) { next_path = "/"; } - - if (next_scheme == scheme && next_host == host_) { - return detail::redirect(*this, req, res, next_path); - } else { - if (next_scheme == "https") { -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT - SSLClient cli(next_host.c_str()); - cli.follow_location(true); - return detail::redirect(cli, req, res, next_path); -#else - return false; -#endif - } else { - Client cli(next_host.c_str()); - cli.follow_location(true); - return detail::redirect(cli, req, res, next_path); - } - } - } - return false; -} - -inline void Client::write_request(Stream &strm, const Request &req, - bool last_connection) { - BufferStream bstrm; - - // Request line - auto path = detail::encode_url(req.path); - - bstrm.write_format("%s %s HTTP/1.1\r\n", req.method.c_str(), path.c_str()); - - // Additonal headers - Headers headers; - if (last_connection) { headers.emplace("Connection", "close"); } - - if (!req.has_header("Host")) { - if (is_ssl()) { - if (port_ == 443) { - headers.emplace("Host", host_); - } else { - headers.emplace("Host", host_and_port_); - } - } else { - if (port_ == 80) { - headers.emplace("Host", host_); - } else { - headers.emplace("Host", host_and_port_); - } - } - } - - if (!req.has_header("Accept")) { headers.emplace("Accept", "*/*"); } - - if (!req.has_header("User-Agent")) { - headers.emplace("User-Agent", "cpp-httplib/0.2"); - } - - if (req.body.empty()) { - if (req.content_provider) { - auto length = std::to_string(req.content_length); - headers.emplace("Content-Length", length); - } else { - headers.emplace("Content-Length", "0"); - } - } else { - if (!req.has_header("Content-Type")) { - headers.emplace("Content-Type", "text/plain"); - } - - if (!req.has_header("Content-Length")) { - auto length = std::to_string(req.body.size()); - headers.emplace("Content-Length", length); - } - } - - detail::write_headers(bstrm, req, headers); - - // Flush buffer - auto &data = bstrm.get_buffer(); - strm.write(data.data(), data.size()); - - // Body - if (req.body.empty()) { - if (req.content_provider) { - size_t offset = 0; - size_t end_offset = req.content_length; - while (offset < end_offset) { - req.content_provider(offset, end_offset - offset, - [&](const char *d, size_t l) { - auto written_length = strm.write(d, l); - offset += written_length; - }); - } - } - } else { - strm.write(req.body); - } -} - -inline std::shared_ptr Client::send_with_content_provider( - const char *method, const char *path, const Headers &headers, - const std::string &body, size_t content_length, - ContentProvider content_provider, const char *content_type, bool compress) { -#ifndef CPPHTTPLIB_ZLIB_SUPPORT - (void)compress; -#endif - - Request req; - req.method = method; - req.headers = headers; - req.path = path; - - req.headers.emplace("Content-Type", content_type); - -#ifdef CPPHTTPLIB_ZLIB_SUPPORT - if (compress) { - if (content_provider) { - size_t offset = 0; - while (offset < content_length) { - content_provider(offset, content_length - offset, - [&](const char *data, size_t data_len) { - req.body.append(data, data_len); - offset += data_len; - }); - } - } else { - req.body = body; - } - - if (!detail::compress(req.body)) { return nullptr; } - req.headers.emplace("Content-Encoding", "gzip"); - } else -#endif - { - if (content_provider) { - req.content_length = content_length; - req.content_provider = content_provider; - } else { - req.body = body; - } - } - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline bool Client::process_request(Stream &strm, const Request &req, - Response &res, bool last_connection, - bool &connection_close) { - // Send request - write_request(strm, req, last_connection); - - // Receive response and headers - if (!read_response_line(strm, res) || - !detail::read_headers(strm, res.headers)) { - return false; - } - - if (res.get_header_value("Connection") == "close" || - res.version == "HTTP/1.0") { - connection_close = true; - } - - if (req.response_handler) { - if (!req.response_handler(res)) { return false; } - } - - // Body - if (req.method != "HEAD") { - ContentReceiver out = [&](const char *buf, size_t n) { - if (res.body.size() + n > res.body.max_size()) { return false; } - res.body.append(buf, n); - return true; - }; - - if (req.content_receiver) { - out = [&](const char *buf, size_t n) { - return req.content_receiver(buf, n); - }; - } - - int dummy_status; - if (!detail::read_content(strm, res, std::numeric_limits::max(), - dummy_status, req.progress, out)) { - return false; - } - } - - return true; -} - -inline bool Client::process_and_close_socket( - socket_t sock, size_t request_count, - std::function - callback) { - request_count = std::min(request_count, keep_alive_max_count_); - return detail::process_and_close_socket(true, sock, request_count, - read_timeout_sec_, read_timeout_usec_, - callback); -} - -inline bool Client::is_ssl() const { return false; } - -inline std::shared_ptr Client::Get(const char *path) { - Progress dummy; - return Get(path, Headers(), dummy); -} - -inline std::shared_ptr Client::Get(const char *path, - Progress progress) { - return Get(path, Headers(), std::move(progress)); -} - -inline std::shared_ptr Client::Get(const char *path, - const Headers &headers) { - Progress dummy; - return Get(path, headers, dummy); -} - -inline std::shared_ptr -Client::Get(const char *path, const Headers &headers, Progress progress) { - Request req; - req.method = "GET"; - req.path = path; - req.headers = headers; - req.progress = std::move(progress); - - auto res = std::make_shared(); - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::Get(const char *path, - ContentReceiver content_receiver) { - Progress dummy; - return Get(path, Headers(), nullptr, std::move(content_receiver), dummy); -} - -inline std::shared_ptr Client::Get(const char *path, - ContentReceiver content_receiver, - Progress progress) { - return Get(path, Headers(), nullptr, std::move(content_receiver), progress); -} - -inline std::shared_ptr Client::Get(const char *path, - const Headers &headers, - ContentReceiver content_receiver) { - Progress dummy; - return Get(path, headers, nullptr, std::move(content_receiver), dummy); -} - -inline std::shared_ptr Client::Get(const char *path, - const Headers &headers, - ContentReceiver content_receiver, - Progress progress) { - return Get(path, headers, nullptr, std::move(content_receiver), progress); -} - -inline std::shared_ptr Client::Get(const char *path, - const Headers &headers, - ResponseHandler response_handler, - ContentReceiver content_receiver) { - Progress dummy; - return Get(path, headers, std::move(response_handler), content_receiver, dummy); -} - -inline std::shared_ptr Client::Get(const char *path, - const Headers &headers, - ResponseHandler response_handler, - ContentReceiver content_receiver, - Progress progress) { - Request req; - req.method = "GET"; - req.path = path; - req.headers = headers; - req.response_handler = std::move(response_handler); - req.content_receiver = std::move(content_receiver); - req.progress = std::move(progress); - - auto res = std::make_shared(); - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::Head(const char *path) { - return Head(path, Headers()); -} - -inline std::shared_ptr Client::Head(const char *path, - const Headers &headers) { - Request req; - req.method = "HEAD"; - req.headers = headers; - req.path = path; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::Post(const char *path, - const std::string &body, - const char *content_type, - bool compress) { - return Post(path, Headers(), body, content_type, compress); -} - -inline std::shared_ptr -Client::Post(const char *path, const Headers &headers, const std::string &body, - const char *content_type, bool compress) { - return send_with_content_provider("POST", path, headers, body, 0, nullptr, - content_type, compress); -} - -inline std::shared_ptr -Client::Post(const char *path, const Params ¶ms, bool compress) { - return Post(path, Headers(), params, compress); -} - -inline std::shared_ptr Client::Post(const char *path, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress) { - return Post(path, Headers(), content_length, content_provider, content_type, - compress); -} - -inline std::shared_ptr -Client::Post(const char *path, const Headers &headers, size_t content_length, - ContentProvider content_provider, const char *content_type, - bool compress) { - return send_with_content_provider("POST", path, headers, std::string(), - content_length, content_provider, - content_type, compress); -} - -inline std::shared_ptr Client::Post(const char *path, - const Headers &headers, - const Params ¶ms, - bool compress) { - std::string query; - for (auto it = params.begin(); it != params.end(); ++it) { - if (it != params.begin()) { query += "&"; } - query += it->first; - query += "="; - query += detail::encode_url(it->second); - } - - return Post(path, headers, query, "application/x-www-form-urlencoded", - compress); -} - -inline std::shared_ptr -Client::Post(const char *path, const MultipartFormDataItems &items, - bool compress) { - return Post(path, Headers(), items, compress); -} - -inline std::shared_ptr -Client::Post(const char *path, const Headers &headers, - const MultipartFormDataItems &items, bool compress) { - auto boundary = detail::make_multipart_data_boundary(); - - std::string body; - - for (const auto &item : items) { - body += "--" + boundary + "\r\n"; - body += "Content-Disposition: form-data; name=\"" + item.name + "\""; - if (!item.filename.empty()) { - body += "; filename=\"" + item.filename + "\""; - } - body += "\r\n"; - if (!item.content_type.empty()) { - body += "Content-Type: " + item.content_type + "\r\n"; - } - body += "\r\n"; - body += item.content + "\r\n"; - } - - body += "--" + boundary + "--\r\n"; - - std::string content_type = "multipart/form-data; boundary=" + boundary; - return Post(path, headers, body, content_type.c_str(), compress); -} - -inline std::shared_ptr Client::Put(const char *path, - const std::string &body, - const char *content_type, - bool compress) { - return Put(path, Headers(), body, content_type, compress); -} - -inline std::shared_ptr -Client::Put(const char *path, const Headers &headers, const std::string &body, - const char *content_type, bool compress) { - return send_with_content_provider("PUT", path, headers, body, 0, nullptr, - content_type, compress); -} - -inline std::shared_ptr Client::Put(const char *path, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress) { - return Put(path, Headers(), content_length, content_provider, content_type, - compress); -} - -inline std::shared_ptr -Client::Put(const char *path, const Headers &headers, size_t content_length, - ContentProvider content_provider, const char *content_type, - bool compress) { - return send_with_content_provider("PUT", path, headers, std::string(), - content_length, content_provider, - content_type, compress); -} - -inline std::shared_ptr Client::Patch(const char *path, - const std::string &body, - const char *content_type, - bool compress) { - return Patch(path, Headers(), body, content_type, compress); -} - -inline std::shared_ptr -Client::Patch(const char *path, const Headers &headers, const std::string &body, - const char *content_type, bool compress) { - return send_with_content_provider("PATCH", path, headers, body, 0, nullptr, - content_type, compress); -} - -inline std::shared_ptr Client::Patch(const char *path, - size_t content_length, - ContentProvider content_provider, - const char *content_type, - bool compress) { - return Patch(path, Headers(), content_length, content_provider, content_type, - compress); -} - -inline std::shared_ptr -Client::Patch(const char *path, const Headers &headers, size_t content_length, - ContentProvider content_provider, const char *content_type, - bool compress) { - return send_with_content_provider("PATCH", path, headers, std::string(), - content_length, content_provider, - content_type, compress); -} - -inline std::shared_ptr Client::Delete(const char *path) { - return Delete(path, Headers(), std::string(), nullptr); -} - -inline std::shared_ptr Client::Delete(const char *path, - const std::string &body, - const char *content_type) { - return Delete(path, Headers(), body, content_type); -} - -inline std::shared_ptr Client::Delete(const char *path, - const Headers &headers) { - return Delete(path, headers, std::string(), nullptr); -} - -inline std::shared_ptr Client::Delete(const char *path, - const Headers &headers, - const std::string &body, - const char *content_type) { - Request req; - req.method = "DELETE"; - req.headers = headers; - req.path = path; - - if (content_type) { req.headers.emplace("Content-Type", content_type); } - req.body = body; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline std::shared_ptr Client::Options(const char *path) { - return Options(path, Headers()); -} - -inline std::shared_ptr Client::Options(const char *path, - const Headers &headers) { - Request req; - req.method = "OPTIONS"; - req.path = path; - req.headers = headers; - - auto res = std::make_shared(); - - return send(req, *res) ? res : nullptr; -} - -inline void Client::set_keep_alive_max_count(size_t count) { - keep_alive_max_count_ = count; -} - -inline void Client::set_read_timeout(time_t sec, time_t usec) { - read_timeout_sec_ = sec; - read_timeout_usec_ = usec; -} - -inline void Client::follow_location(bool on) { follow_location_ = on; } - -/* - * SSL Implementation - */ -#ifdef CPPHTTPLIB_OPENSSL_SUPPORT -namespace detail { - -template -inline bool process_and_close_socket_ssl( - bool is_client_request, socket_t sock, size_t keep_alive_max_count, - time_t read_timeout_sec, time_t read_timeout_usec, SSL_CTX *ctx, - std::mutex &ctx_mutex, U SSL_connect_or_accept, V setup, T callback) { - assert(keep_alive_max_count > 0); - - SSL *ssl = nullptr; - { - std::lock_guard guard(ctx_mutex); - ssl = SSL_new(ctx); - } - - if (!ssl) { - close_socket(sock); - return false; - } - - auto bio = BIO_new_socket(static_cast(sock), BIO_NOCLOSE); - SSL_set_bio(ssl, bio, bio); - - if (!setup(ssl)) { - SSL_shutdown(ssl); - { - std::lock_guard guard(ctx_mutex); - SSL_free(ssl); - } - - close_socket(sock); - return false; - } - - bool ret = false; - - if (SSL_connect_or_accept(ssl) == 1) { - if (keep_alive_max_count > 1) { - auto count = keep_alive_max_count; - while (count > 0 && - (is_client_request || - detail::select_read(sock, CPPHTTPLIB_KEEPALIVE_TIMEOUT_SECOND, - CPPHTTPLIB_KEEPALIVE_TIMEOUT_USECOND) > 0)) { - SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec); - auto last_connection = count == 1; - auto connection_close = false; - - ret = callback(ssl, strm, last_connection, connection_close); - if (!ret || connection_close) { break; } - - count--; - } - } else { - SSLSocketStream strm(sock, ssl, read_timeout_sec, read_timeout_usec); - auto dummy_connection_close = false; - ret = callback(ssl, strm, true, dummy_connection_close); - } - } - - SSL_shutdown(ssl); - { - std::lock_guard guard(ctx_mutex); - SSL_free(ssl); - } - - close_socket(sock); - - return ret; -} - -#if OPENSSL_VERSION_NUMBER < 0x10100000L -static std::shared_ptr> openSSL_locks_; - -class SSLThreadLocks { -public: - SSLThreadLocks() { - openSSL_locks_ = - std::make_shared>(CRYPTO_num_locks()); - CRYPTO_set_locking_callback(locking_callback); - } - - ~SSLThreadLocks() { CRYPTO_set_locking_callback(nullptr); } - -private: - static void locking_callback(int mode, int type, const char * /*file*/, - int /*line*/) { - auto &locks = *openSSL_locks_; - if (mode & CRYPTO_LOCK) { - locks[type].lock(); - } else { - locks[type].unlock(); - } - } -}; - -#endif - -class SSLInit { -public: - SSLInit() { -#if OPENSSL_VERSION_NUMBER < 0x1010001fL - SSL_load_error_strings(); - SSL_library_init(); -#else - OPENSSL_init_ssl( - OPENSSL_INIT_LOAD_SSL_STRINGS | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL); -#endif - } - - ~SSLInit() { -#if OPENSSL_VERSION_NUMBER < 0x1010001fL - ERR_free_strings(); -#endif - } - -private: -#if OPENSSL_VERSION_NUMBER < 0x10100000L - SSLThreadLocks thread_init_; -#endif -}; - -static SSLInit sslinit_; - -} // namespace detail - -// SSL socket stream implementation -inline SSLSocketStream::SSLSocketStream(socket_t sock, SSL *ssl, - time_t read_timeout_sec, - time_t read_timeout_usec) - : sock_(sock), ssl_(ssl), read_timeout_sec_(read_timeout_sec), - read_timeout_usec_(read_timeout_usec) {} - -inline SSLSocketStream::~SSLSocketStream() {} - -inline int SSLSocketStream::read(char *ptr, size_t size) { - if (SSL_pending(ssl_) > 0 || - detail::select_read(sock_, read_timeout_sec_, read_timeout_usec_) > 0) { - return SSL_read(ssl_, ptr, static_cast(size)); - } - return -1; -} - -inline int SSLSocketStream::write(const char *ptr, size_t size) { - return SSL_write(ssl_, ptr, static_cast(size)); -} - -inline int SSLSocketStream::write(const char *ptr) { - return write(ptr, strlen(ptr)); -} - -inline int SSLSocketStream::write(const std::string &s) { - return write(s.data(), s.size()); -} - -inline std::string SSLSocketStream::get_remote_addr() const { - return detail::get_remote_addr(sock_); -} - -// SSL HTTP server implementation -inline SSLServer::SSLServer(const char *cert_path, const char *private_key_path, - const char *client_ca_cert_file_path, - const char *client_ca_cert_dir_path) { - ctx_ = SSL_CTX_new(SSLv23_server_method()); - - if (ctx_) { - SSL_CTX_set_options(ctx_, - SSL_OP_ALL | SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | - SSL_OP_NO_COMPRESSION | - SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION); - - // auto ecdh = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); - // SSL_CTX_set_tmp_ecdh(ctx_, ecdh); - // EC_KEY_free(ecdh); - - if (SSL_CTX_use_certificate_chain_file(ctx_, cert_path) != 1 || - SSL_CTX_use_PrivateKey_file(ctx_, private_key_path, SSL_FILETYPE_PEM) != - 1) { - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } else if (client_ca_cert_file_path || client_ca_cert_dir_path) { - // if (client_ca_cert_file_path) { - // auto list = SSL_load_client_CA_file(client_ca_cert_file_path); - // SSL_CTX_set_client_CA_list(ctx_, list); - // } - - SSL_CTX_load_verify_locations(ctx_, client_ca_cert_file_path, - client_ca_cert_dir_path); - - SSL_CTX_set_verify( - ctx_, - SSL_VERIFY_PEER | - SSL_VERIFY_FAIL_IF_NO_PEER_CERT, // SSL_VERIFY_CLIENT_ONCE, - nullptr); - } - } -} - -inline SSLServer::~SSLServer() { - if (ctx_) { SSL_CTX_free(ctx_); } -} - -inline bool SSLServer::is_valid() const { return ctx_; } - -inline bool SSLServer::process_and_close_socket(socket_t sock) { - return detail::process_and_close_socket_ssl( - false, sock, keep_alive_max_count_, read_timeout_sec_, read_timeout_usec_, - ctx_, ctx_mutex_, SSL_accept, [](SSL * /*ssl*/) { return true; }, - [this](SSL *ssl, Stream &strm, bool last_connection, - bool &connection_close) { - return process_request(strm, last_connection, connection_close, - [&](Request &req) { req.ssl = ssl; }); - }); -} - -// SSL HTTP client implementation -inline SSLClient::SSLClient(const char *host, int port, time_t timeout_sec, - const char *client_cert_path, - const char *client_key_path) - : Client(host, port, timeout_sec) { - ctx_ = SSL_CTX_new(SSLv23_client_method()); - - detail::split(&host_[0], &host_[host_.size()], '.', - [&](const char *b, const char *e) { - host_components_.emplace_back(std::string(b, e)); - }); - if (client_cert_path && client_key_path) { - if (SSL_CTX_use_certificate_file(ctx_, client_cert_path, - SSL_FILETYPE_PEM) != 1 || - SSL_CTX_use_PrivateKey_file(ctx_, client_key_path, SSL_FILETYPE_PEM) != - 1) { - SSL_CTX_free(ctx_); - ctx_ = nullptr; - } - } -} - -inline SSLClient::~SSLClient() { - if (ctx_) { SSL_CTX_free(ctx_); } -} - -inline bool SSLClient::is_valid() const { return ctx_; } - -inline void SSLClient::set_ca_cert_path(const char *ca_cert_file_path, - const char *ca_cert_dir_path) { - if (ca_cert_file_path) { ca_cert_file_path_ = ca_cert_file_path; } - if (ca_cert_dir_path) { ca_cert_dir_path_ = ca_cert_dir_path; } -} - -inline void SSLClient::enable_server_certificate_verification(bool enabled) { - server_certificate_verification_ = enabled; -} - -inline long SSLClient::get_openssl_verify_result() const { - return verify_result_; -} - -inline SSL_CTX *SSLClient::ssl_context() const noexcept { return ctx_; } - -inline bool SSLClient::process_and_close_socket( - socket_t sock, size_t request_count, - std::function - callback) { - - request_count = std::min(request_count, keep_alive_max_count_); - - return is_valid() && - detail::process_and_close_socket_ssl( - true, sock, request_count, read_timeout_sec_, read_timeout_usec_, - ctx_, ctx_mutex_, - [&](SSL *ssl) { - if (ca_cert_file_path_.empty()) { - SSL_CTX_set_verify(ctx_, SSL_VERIFY_NONE, nullptr); - } else { - if (!SSL_CTX_load_verify_locations( - ctx_, ca_cert_file_path_.c_str(), nullptr)) { - return false; - } - SSL_CTX_set_verify(ctx_, SSL_VERIFY_PEER, nullptr); - } - - if (SSL_connect(ssl) != 1) { return false; } - - if (server_certificate_verification_) { - verify_result_ = SSL_get_verify_result(ssl); - - if (verify_result_ != X509_V_OK) { return false; } - - auto server_cert = SSL_get_peer_certificate(ssl); - - if (server_cert == nullptr) { return false; } - - if (!verify_host(server_cert)) { - X509_free(server_cert); - return false; - } - X509_free(server_cert); - } - - return true; - }, - [&](SSL *ssl) { - SSL_set_tlsext_host_name(ssl, host_.c_str()); - return true; - }, - [&](SSL * /*ssl*/, Stream &strm, bool last_connection, - bool &connection_close) { - return callback(strm, last_connection, connection_close); - }); -} - -inline bool SSLClient::is_ssl() const { return true; } - -inline bool SSLClient::verify_host(X509 *server_cert) const { - /* Quote from RFC2818 section 3.1 "Server Identity" - - If a subjectAltName extension of type dNSName is present, that MUST - be used as the identity. Otherwise, the (most specific) Common Name - field in the Subject field of the certificate MUST be used. Although - the use of the Common Name is existing practice, it is deprecated and - Certification Authorities are encouraged to use the dNSName instead. - - Matching is performed using the matching rules specified by - [RFC2459]. If more than one identity of a given type is present in - the certificate (e.g., more than one dNSName name, a match in any one - of the set is considered acceptable.) Names may contain the wildcard - character * which is considered to match any single domain name - component or component fragment. E.g., *.a.com matches foo.a.com but - not bar.foo.a.com. f*.com matches foo.com but not bar.com. - - In some cases, the URI is specified as an IP address rather than a - hostname. In this case, the iPAddress subjectAltName must be present - in the certificate and must exactly match the IP in the URI. - - */ - return verify_host_with_subject_alt_name(server_cert) || - verify_host_with_common_name(server_cert); -} - -inline bool -SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const { - auto ret = false; - - auto type = GEN_DNS; - - struct in6_addr addr6; - struct in_addr addr; - size_t addr_len = 0; - -#ifndef __MINGW32__ - if (inet_pton(AF_INET6, host_.c_str(), &addr6)) { - type = GEN_IPADD; - addr_len = sizeof(struct in6_addr); - } else if (inet_pton(AF_INET, host_.c_str(), &addr)) { - type = GEN_IPADD; - addr_len = sizeof(struct in_addr); - } -#endif - - auto alt_names = static_cast( - X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr)); - - if (alt_names) { - auto dsn_matched = false; - auto ip_mached = false; - - auto count = sk_GENERAL_NAME_num(alt_names); - - for (auto i = 0; i < count && !dsn_matched; i++) { - auto val = sk_GENERAL_NAME_value(alt_names, i); - if (val->type == type) { - auto name = (const char *)ASN1_STRING_get0_data(val->d.ia5); - auto name_len = (size_t)ASN1_STRING_length(val->d.ia5); - - if (strlen(name) == name_len) { - switch (type) { - case GEN_DNS: dsn_matched = check_host_name(name, name_len); break; - - case GEN_IPADD: - if (!memcmp(&addr6, name, addr_len) || - !memcmp(&addr, name, addr_len)) { - ip_mached = true; - } - break; - } - } - } - } - - if (dsn_matched || ip_mached) { ret = true; } - } - - GENERAL_NAMES_free((STACK_OF(GENERAL_NAME) *)alt_names); - - return ret; -} - -inline bool SSLClient::verify_host_with_common_name(X509 *server_cert) const { - const auto subject_name = X509_get_subject_name(server_cert); - - if (subject_name != nullptr) { - char name[BUFSIZ]; - auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName, - name, sizeof(name)); - - if (name_len != -1) { return check_host_name(name, name_len); } - } - - return false; -} - -inline bool SSLClient::check_host_name(const char *pattern, - size_t pattern_len) const { - if (host_.size() == pattern_len && host_ == pattern) { return true; } - - // Wildcard match - // https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484 - std::vector pattern_components; - detail::split(&pattern[0], &pattern[pattern_len], '.', - [&](const char *b, const char *e) { - pattern_components.emplace_back(std::string(b, e)); - }); - - if (host_components_.size() != pattern_components.size()) { return false; } - - auto itr = pattern_components.begin(); - for (const auto &h : host_components_) { - auto &p = *itr; - if (p != h && p != "*") { - auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' && - !p.compare(0, p.size() - 1, h)); - if (!partial_match) { return false; } - } - ++itr; - } - - return true; -} -#endif - -} // namespace httplib - -#endif // CPPHTTPLIB_HTTPLIB_H diff --git a/src/Detector/darknet/src/im2col.c b/src/Detector/darknet/src/im2col.c deleted file mode 100644 index 4951f8f90..000000000 --- a/src/Detector/darknet/src/im2col.c +++ /dev/null @@ -1,93 +0,0 @@ -#include "im2col.h" -#include -float im2col_get_pixel(float *im, int height, int width, int channels, - int row, int col, int channel, int pad) -{ - row -= pad; - col -= pad; - - if (row < 0 || col < 0 || - row >= height || col >= width) return 0; - return im[col + width*(row + height*channel)]; -} - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col) -{ - int c,h,w; - int height_col = (height + 2*pad - ksize) / stride + 1; - int width_col = (width + 2*pad - ksize) / stride + 1; - - int channels_col = channels * ksize * ksize; - for (c = 0; c < channels_col; ++c) { - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = 0; h < height_col; ++h) { - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h * stride; - int im_col = w_offset + w * stride; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } -} - - -// Function uses casting from int to unsigned to compare if value of -// parameter a is greater or equal to zero and lower than value of -// parameter b. The b parameter is of type signed and is always positive, -// therefore its value is always lower than 0x800... where casting -// negative value of a parameter converts it to value higher than 0x800... -// The casting allows to use one condition instead of two. -inline static int is_a_ge_zero_and_a_lt_b(int a, int b) { - return (unsigned)(a) < (unsigned)(b); -} - -// https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cpp -void im2col_cpu_ext(const float* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_col) -{ - const int output_h = (height + 2 * pad_h - - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - const int output_w = (width + 2 * pad_w - - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - const int channel_size = height * width; - int channel, kernel_row, kernel_col, output_rows, output_col; - for (channel = channels; channel--; data_im += channel_size) { - for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { - for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { - int input_row = -pad_h + kernel_row * dilation_h; - for (output_rows = output_h; output_rows; output_rows--) { - if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { - for (output_col = output_w; output_col; output_col--) { - *(data_col++) = 0; - } - } - else { - int input_col = -pad_w + kernel_col * dilation_w; - for (output_col = output_w; output_col; output_col--) { - if (is_a_ge_zero_and_a_lt_b(input_col, width)) { - *(data_col++) = data_im[input_row * width + input_col]; - } - else { - *(data_col++) = 0; - } - input_col += stride_w; - } - } - input_row += stride_h; - } - } - } - } -} diff --git a/src/Detector/darknet/src/im2col.h b/src/Detector/darknet/src/im2col.h deleted file mode 100644 index 65dd6ec8a..000000000 --- a/src/Detector/darknet/src/im2col.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef IM2COL_H -#define IM2COL_H - -#include -#include -#include "darknet.h" - -#ifdef __cplusplus -extern "C" { -#endif -void im2col_cpu(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col); -float im2col_get_pixel(float* im, int height, int width, int channels, - int row, int col, int channel, int pad); - -void im2col_cpu_ext(const float* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_col); - -#ifdef GPU - -void im2col_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad,float *data_col); - -void im2col_gpu_ext(const float* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_col); - -void im2col_align_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align); - -void im2col_align_bin_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align); - -void float_to_bit_gpu(float *src, unsigned char *dst, size_t size); - -void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const int m, - const int lda, const int ldb, const int block_size); - -void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align); - -void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align); - -void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c); - -void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int c); - -void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c); - -void fill_int8_gpu(unsigned char *src, unsigned char val, size_t size); - -// shared_memory + partial coalescing = GOOD -void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias, int leaky_activation, - float *shortcut_in_gpu, float *shortcut_out_gpu); - -// sequentially - BAD -void gemm_nn_custom_bin_mean_transposed_sequentially_gpu(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr); - -void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad); - -void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad, - int new_lda, float *mean_arr_gpu); - -//void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad, int new_lda, float *mean_arr_gpu); - -//void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad); - -#endif -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/im2col_kernels.cu b/src/Detector/darknet/src/im2col_kernels.cu deleted file mode 100644 index f924b3e5c..000000000 --- a/src/Detector/darknet/src/im2col_kernels.cu +++ /dev/null @@ -1,2288 +0,0 @@ -#include -#include -#include -#include - -#include "im2col.h" -#include "dark_cuda.h" - -#include -#include - - -template -__device__ inline T1 __shfl_custom(T1 val, T2 lane) { -#if CUDART_VERSION >= 9000 - return __shfl_sync(FULL_MASK, val, lane); -#else - return __shfl(val, lane); -#endif -} - -template -__device__ inline uint32_t __ballot_custom(T val) { -#if CUDART_VERSION >= 9000 - return __ballot_sync(FULL_MASK, val); -#else - return __ballot(val); -#endif -} - - -// src: https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu -// You may also want to read: https://github.com/BVLC/caffe/blob/master/LICENSE - -__global__ void im2col_gpu_kernel(const int n, const float* data_im, - const int height, const int width, const int ksize, - const int pad, - const int stride, - const int height_col, const int width_col, - float *data_col) { - int index = blockIdx.x*blockDim.x+threadIdx.x; - for(; index < n; index += blockDim.x*gridDim.x){ - int w_out = index % width_col; - int h_index = index / width_col; - int h_out = h_index % height_col; - int channel_in = h_index / height_col; - int channel_out = channel_in * ksize * ksize; - int h_in = h_out * stride - pad; - int w_in = w_out * stride - pad; - float* data_col_ptr = data_col; - data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; - const float* data_im_ptr = data_im; - data_im_ptr += (channel_in * height + h_in) * width + w_in; - for (int i = 0; i < ksize; ++i) { - for (int j = 0; j < ksize; ++j) { - int h = h_in + i; - int w = w_in + j; - - *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ? - data_im_ptr[i * width + j] : 0; - - //data_im[(channel_in * height + h_in) * width + w_in + i * width + j]; - //(*data_col_ptr) = data_im_ptr[ii * width + jj]; - - data_col_ptr += height_col * width_col; - } - } - } -} - -void im2col_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col){ - // We are going to launch channels * height_col * width_col kernels, each - // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad - ksize) / stride + 1; - int width_col = (width + 2 * pad - ksize) / stride + 1; - int num_kernels = channels * height_col * width_col; - im2col_gpu_kernel<<<(num_kernels+BLOCK-1)/BLOCK, - BLOCK, 0, get_cuda_stream()>>>( - num_kernels, im, height, width, ksize, pad, - stride, height_col, - width_col, data_col); - - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -/* -__global__ void im2col_align_gpu_kernel(const int n, const float* data_im, - const int height, const int width, const int ksize, - const int pad, - const int stride, - const int height_col, const int width_col, - float *data_col, const int bit_align) -{ - //__shared__ float tmp_s[1]; - - int index = blockIdx.x*blockDim.x + threadIdx.x; - for (; index < n; index += blockDim.x*gridDim.x) { - int w_out = index % width_col; - int h_index = index / width_col; - int h_out = h_index % height_col; - int channel_in = h_index / height_col; - int channel_out = channel_in * ksize * ksize; - int h_in = h_out * stride - pad; - int w_in = w_out * stride - pad; - float* data_col_ptr = data_col; - //data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; - data_col_ptr += channel_out * bit_align + h_out * width_col + w_out; - float* data_col_ptr_32 = data_col + (channel_out * bit_align + h_out * width_col + w_out)/32; - const float* data_im_ptr = data_im; - data_im_ptr += (channel_in * height + h_in) * width + w_in; - for (int i = 0; i < ksize; ++i) { - for (int j = 0; j < ksize; ++j) { - int h = h_in + i; - int w = w_in + j; - - float val = (h >= 0 && w >= 0 && h < height && w < width) ? - data_im_ptr[i * width + j] : 0; - - *data_col_ptr = val; - //tmp_s[0] = val; - - //(*data_col_ptr) = (h >= 0 && w >= 0 && h < height && w < width) ? - // data_im_ptr[i * width + j] : 0; - - //float src_val = (h >= 0 && w >= 0 && h < height && w < width) ? data_im_ptr[i * width + j] : 0; - //unsigned int bit_mask = __ballot_sync(0xffffffff, src_val > 0); - //if (threadIdx.x % WARP_SIZE == 0) *((unsigned int*)data_col_ptr_32) = bit_mask; - // use atomicOr() // *dst_ptr |= (mask << (col_index % 8)); - //data_col_ptr_32 += bit_align / 32; - - //data_col_ptr += height_col * width_col; - data_col_ptr += bit_align; - } - } - } -} -*/ - -// float 32 -__global__ void im2col_align_gpu_kernel(const int n, const float* data_im, - const int height, const int width, const int ksize, - const int pad, - const int stride, - const int height_col, const int width_col, - float *data_col, const int bit_align) -{ - //__shared__ float tmp_s[1]; - - - int index = blockIdx.x*blockDim.x + threadIdx.x; - for (; index < n; index += blockDim.x*gridDim.x) { - int w_out = index % width_col; - int h_index = index / width_col; - int h_out = h_index % height_col; - int channel_in = h_index / height_col; - int channel_out = channel_in * ksize * ksize; - int h_in = h_out * stride - pad; - int w_in = w_out * stride - pad; - //float* data_col_ptr = data_col; - //float* data_col_ptr_32 = data_col + (channel_out * bit_align + h_out * width_col + w_out) / 32; - //data_col_ptr += (channel_out * height_col + h_out) * width_col + w_out; - //data_col_ptr += channel_out * bit_align + h_out * width_col + w_out; - float* data_col_ptr = &data_col[channel_out * bit_align + h_out * width_col + w_out]; - const float* data_im_ptr = data_im; - data_im_ptr += (channel_in * height + h_in) * width + w_in; - for (int i = 0; i < ksize; ++i) { - for (int j = 0; j < ksize; ++j) { - int h = h_in + i; - int w = w_in + j; - - float val = (h >= 0 && w >= 0 && h < height && w < width) ? - data_im_ptr[i * width + j] : 0; - - int pre_out_index = index % (width_col*height_col); - int out_index = (channel_out + i*ksize + j) * bit_align + pre_out_index;// h_out * width_col + w_out; - data_col[out_index] = val; - - //(*data_col_ptr) = val; - //dst_s[threadIdx.x] = val; - //tmp_s[0] = val; - - //(*data_col_ptr) = (h >= 0 && w >= 0 && h < height && w < width) ? - // data_im_ptr[i * width + j] : 0; - - //float src_val = (h >= 0 && w >= 0 && h < height && w < width) ? data_im_ptr[i * width + j] : 0; - //unsigned int bit_mask = __ballot_sync(0xffffffff, src_val > 0); - //if (threadIdx.x % WARP_SIZE == 0) *((unsigned int*)data_col_ptr_32) = bit_mask; - // use atomicOr() // *dst_ptr |= (mask << (col_index % 8)); - //data_col_ptr_32 += bit_align / 32; - - //data_col_ptr += height_col * width_col; - data_col_ptr += bit_align; - } - } - } -} - -void im2col_align_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align) { - // We are going to launch channels * height_col * width_col kernels, each - // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad - ksize) / stride + 1; - int width_col = (width + 2 * pad - ksize) / stride + 1; - int num_kernels = channels * height_col * width_col; - im2col_align_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK, - BLOCK, 0, get_cuda_stream() >> >( - num_kernels, im, height, width, ksize, pad, - stride, height_col, - width_col, data_col, bit_align); - - CHECK_CUDA(cudaPeekAtLastError()); -} - - -// -------------------------------- - - - -// binary im2col - stride=1 -__global__ void im2col_align_bin_gpu_kernel(const int n, const float* data_im, - const int height, const int width, const int ksize, const int channels, - const int pad, - const int stride, - const int height_col, const int width_col, - float *data_col, const int bit_align) -{ - //__shared__ float tmp_s[1]; - //__shared__ ulonglong4 tmp256_s[1]; - - - //#define SHRED_VALS ((BLOCK / 169) * ) - //__shared__ float dst_s[1024]; - //__shared__ float dst_s[1024]; - //__shared__ uint32_t bit_s[32]; - //__shared__ uint8_t bit_s[128]; - - int index = blockIdx.x*blockDim.x + threadIdx.x; - //for (; index < n; index += blockDim.x*gridDim.x) - { - int c_index = index; - int channel_in = c_index % channels; - - //int h_out = index % height_col; - //int c_index = index / height_col; - //int channel_in = c_index % channels; - - int channel_out = channel_in * ksize * ksize; - - int j_index = c_index / channels; - int j = j_index % ksize; - int i = j_index / ksize; - - int pre_out_index = (channel_out + i*ksize + j) * bit_align; - int j_pad = (j - pad); - int i_pad = (i - pad); - - for(int wh_index = 0; wh_index < (height_col*width_col); wh_index += 32) - //for (int h_out = 0; h_out < height_col; ++h_out) - { - - // the end of padding - //if(0) - //for (int w_out = 0; w_out < (width_col); w_out += 32) - { - const int w_out = wh_index % width_col; - const int h_out = wh_index / width_col; - - const int w = w_out + j_pad; - const int h = h_out + i_pad; - - int pre_in_index = channel_in * height * width; - int pre_in_wh_index = h * width + w; - - int send_wh_index = wh_index; - if (i >= ksize) send_wh_index = height_col*width_col; - - #pragma unroll - for (int t = 0; t < WARP_SIZE; ++t) - { - const int lane_id = threadIdx.x % WARP_SIZE; - - const int cur_wh_index = __shfl_custom(send_wh_index, t) + lane_id; - - if (cur_wh_index < (width_col*height_col))// && (cur_i_pad+pad) < ksize) - { - const int cur_pre_out_index = __shfl_custom(pre_out_index, t); - - const int cur_pre_in_index = __shfl_custom(pre_in_index, t); - const int cur_pre_in_wh_index = __shfl_custom(pre_in_wh_index, t) + lane_id; - - int w = cur_pre_in_wh_index % width; - int h = cur_pre_in_wh_index / width; - int in_index = cur_pre_in_index + cur_pre_in_wh_index; - - int out_index = cur_pre_out_index + cur_wh_index; - - float val = (w >= 0 && w < width && h >= 0 && h < height) ? - data_im[in_index] : float(); - - //data_col[out_index] = val; - //tmp_s[0] = val; - - uint32_t bit_mask = __ballot_custom(val > 0); - if (lane_id == 0) { - uint8_t *bit8_ptr = &(((uint8_t *)data_col)[out_index / 8]); - uint32_t *bit32_ptr = (uint32_t *)bit8_ptr; - *bit32_ptr = bit_mask; - } - } - - - } - - }// w_out - - } - } -} - - -void im2col_align_bin_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align) { - // We are going to launch channels * height_col * width_col kernels, each - // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad - ksize) / stride + 1; - int width_col = (width + 2 * pad - ksize) / stride + 1; - //int num_kernels = channels * height_col * width_col * ksize * ksize; - //int num_kernels = channels * ksize * ksize * height_col; - int num_kernels = channels * ksize * ksize; - int num_blocks = num_kernels / BLOCK + 1; - - //im2col_align_bin_gpu_kernel << <(num_kernels + BLOCK - 1) / BLOCK, - im2col_align_bin_gpu_kernel << > >( - num_kernels, im, height, width, ksize, channels, pad, - stride, height_col, - width_col, data_col, bit_align); - - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -/* -__global__ void float_to_bit_gpu_kernel(float *src, unsigned char *dst, size_t size) -{ - //const int size_aligned = size + (WARP_SIZE - size % WARP_SIZE); - - int index = blockIdx.x*blockDim.x + threadIdx.x; - float src_val; - - //for (; index < size_aligned; index += blockDim.x*gridDim.x) - { - //src_val = src[index]; - if(index < size) src_val = src[index]; - else src_val = 0; - //unsigned int bit_mask = __ballot_sync(0xffffffff, src_val > 0); - unsigned int bit_mask = __ballot_custom(src_val > 0); - if (threadIdx.x % WARP_SIZE == 0) ((unsigned int*)dst)[index / 32] = bit_mask; - } -} -*/ - -/* -__global__ void float_to_bit_gpu_kernel(float *src, unsigned char *dst, size_t size) -{ - //const int size_aligned = size + (WARP_SIZE - size % WARP_SIZE); - __shared__ uint32_t tmp[WARP_SIZE]; - - int index = blockIdx.x*blockDim.x + threadIdx.x; - float src_val; - uint32_t *dst32_ptr = ((unsigned int*)dst); - - //for (; index < size_aligned; index += blockDim.x*gridDim.x) - { - //src_val = src[index]; - if (index < size) src_val = src[index]; - else src_val = 0; - //unsigned int bit_mask = __ballot_sync(0xffffffff, src_val > 0); - const int num_of_warps = blockDim.x / WARP_SIZE; - const int warp_id = threadIdx.x / WARP_SIZE; - const int lane_id = threadIdx.x % WARP_SIZE; - - uint32_t bit_mask = __ballot_custom(src_val > 0); - - if (lane_id == 0) tmp[warp_id] = bit_mask; - - __syncthreads(); - if (warp_id == 0) { - if (lane_id < num_of_warps) { - dst32_ptr[index / 32 + lane_id] = tmp[lane_id]; - } - } - __syncthreads(); - } -} -*/ - -__global__ void float_to_bit_gpu_kernel(float *src, unsigned char *dst, size_t size) -{ - __shared__ uint32_t tmp[WARP_SIZE*32]; - - int index = 32*blockIdx.x*blockDim.x + threadIdx.x; - float src_val; - uint32_t *dst32_ptr = ((unsigned int*)dst); - - int i; - for(i = 0; i < 32; ++i) - { - if ((index + i * 1024) < size) src_val = src[index + i*1024]; - else src_val = 0; - //unsigned int bit_mask = __ballot_sync(0xffffffff, src_val > 0); - //const int num_of_warps = blockDim.x / WARP_SIZE; - const int warp_id = threadIdx.x / WARP_SIZE; - const int lane_id = threadIdx.x % WARP_SIZE; - - uint32_t bit_mask = __ballot_custom(src_val > 0); - if (lane_id == 0) tmp[i * 32 + warp_id] = bit_mask; - } - __syncthreads(); - dst32_ptr[blockIdx.x*blockDim.x + threadIdx.x] = tmp[threadIdx.x]; -} - - -void float_to_bit_gpu(float *src, unsigned char *dst, size_t size) -{ - //const int num_blocks = size / 1024 + 1; - //const int num_blocks = size / (32*1024) + 1; - const int num_blocks = get_number_of_blocks(size, 32 * 1024); - float_to_bit_gpu_kernel<<>>(src, dst, size); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -/* -__device__ __host__ static inline void remove_bit(unsigned char *const dst, size_t index) { - size_t dst_i = index / 8; - int dst_shift = index % 8; - dst[dst_i] &= ~(1 << dst_shift); -} - -__device__ __host__ static inline void set_bit(unsigned char *const dst, size_t index) { - size_t dst_i = index / 8; - int dst_shift = index % 8; - dst[dst_i] |= 1 << dst_shift; - //dst[dst_i] |= 1 << (8 - dst_shift); -} -*/ - -__device__ __host__ static inline unsigned char get_bit(unsigned char const*const src, size_t index) { - size_t src_i = index / 8; - int src_shift = index % 8; - unsigned char val = (src[src_i] & (1 << src_shift)) > 0; - //unsigned char val = (src[src_i] & (1 << (8 - src_shift))) > 0; - return val; -} - -// Intel CPUs and nVidia CUDA GPU are little endian -__device__ __host__ unsigned char reverse_byte(unsigned char a) -{ - return ((a & 0x1) << 7) | ((a & 0x2) << 5) | - ((a & 0x4) << 3) | ((a & 0x8) << 1) | - ((a & 0x10) >> 1) | ((a & 0x20) >> 3) | - ((a & 0x40) >> 5) | ((a & 0x80) >> 7); -} - -__device__ __host__ unsigned char reverse_byte_2(unsigned char a) -{ - return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; -} - -__device__ unsigned char reverse_byte_CUDA(unsigned char a) -{ - uint32_t tmp = __brev(a); - return tmp >> 24; -} - -__device__ void transpose8rS32_reversed_diagonale(unsigned char* A, unsigned char* B, int m, int n) -{ - unsigned x, y, t; - - // Load the array and pack it into x and y. - x = (A[0] << 24) | (A[m] << 16) | (A[2 * m] << 8) | A[3 * m]; - y = (A[4 * m] << 24) | (A[5 * m] << 16) | (A[6 * m] << 8) | A[7 * m]; - - t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7); - t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7); - - t = (x ^ (x >> 14)) & 0x0000CCCC; x = x ^ t ^ (t << 14); - t = (y ^ (y >> 14)) & 0x0000CCCC; y = y ^ t ^ (t << 14); - - t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F); - y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F); - x = t; - - B[7 * n] = reverse_byte_CUDA(x >> 24); B[6 * n] = reverse_byte_CUDA(x >> 16); B[5 * n] = reverse_byte_CUDA(x >> 8); B[4 * n] = reverse_byte_CUDA(x); - B[3 * n] = reverse_byte_CUDA(y >> 24); B[2 * n] = reverse_byte_CUDA(y >> 16); B[1 * n] = reverse_byte_CUDA(y >> 8); B[0 * n] = reverse_byte_CUDA(y); - - //__device__ ​ unsigned int __brev(unsigned int x) - //Reverse the bit order of a 32 bit unsigned integer. - // https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html -} - - -// transpose 8x8 bit -__global__ void transpose_bin_gpu_kernel(unsigned char *A, unsigned char *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - int i; - int index = blockIdx.x*blockDim.x + threadIdx.x; - - //for (i = 0; i < n; i += 8) - { - i = (index*8) % n; - int j; - //for (j = 0; j < m - 8; j += 8) - { - j = ((index * 8) / n) * 8; - if (j < m) { - int a_index = i*lda + j; - int b_index = j*ldb + i; - transpose8rS32_reversed_diagonale(&A[a_index / 8], &B[b_index / 8], lda / 8, ldb / 8); - } - //else if (j < m) { - // for (; j < m; ++j) { - // if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i); - // else remove_bit(B, j*ldb + i); - // } - //} - } - } -} - - - -__device__ __host__ uint8_t reverse_8_bit(uint8_t a) { - return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; -} - -__device__ uint32_t reverse_32_bit(uint32_t a) -{ - // __device__ ​ unsigned int __brev(unsigned int x) // CUDA - // unsigned int __rbit(unsigned int val) // for ARM //__asm__("rbit %0, %1\n" : "=r"(output) : "r"(input)); - return __brev(a); - //return (reverse_8_bit(a >> 24) << 0) | - // (reverse_8_bit(a >> 16) << 8) | - // (reverse_8_bit(a >> 8) << 16) | - // (reverse_8_bit(a >> 0) << 24); -} - -#define swap(a0, a1, j, m) t = (a0 ^ (a1 >>j)) & m; a0 = a0 ^ t; a1 = a1 ^ (t << j); - -__device__ void transpose32_optimized(uint32_t A[32]) { - int j, k; - unsigned m, t; - - //m = 0x0000FFFF; - //for (j = 16; j != 0; j = j >> 1, m = m ^ (m << j)) { - // for (k = 0; k < 32; k = (k + j + 1) & ~j) { - // t = (A[k] ^ (A[k + j] >> j)) & m; - // A[k] = A[k] ^ t; - // A[k + j] = A[k + j] ^ (t << j); - // } - //} - - j = 16; - m = 0x0000FFFF; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 8; - m = 0x00ff00ff; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 4; - m = 0x0f0f0f0f; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 2; - m = 0x33333333; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - j = 1; - m = 0x55555555; - for (k = 0; k < 32; k = (k + j + 1) & ~j) { swap(A[k], A[k + j], j, m); } - - // reverse Y - for (j = 0; j < 16; ++j) { - uint32_t tmp = A[j]; - A[j] = reverse_32_bit(A[31 - j]); - A[31 - j] = reverse_32_bit(tmp); - } -} - -extern "C" { -__device__ void transpose_32x32_bits_reversed_diagonale(uint32_t *A, uint32_t *B, int m, int n) -{ - //unsigned A_tmp[32]; - //int i; - //#pragma unroll - //for (i = 0; i < 32; ++i) A_tmp[i] = A[i * m]; - //transpose32_optimized(A_tmp); - //#pragma unroll - //for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i]; - - __shared__ uint32_t A_shared[32 * BLOCK_TRANSPOSE32]; - uint32_t *A_tmp = &A_shared[32 * threadIdx.x]; - - int i; - #pragma unroll 32 - for (i = 0; i < 32; ++i) A_tmp[i] = A[i * m]; - transpose32_optimized(A_tmp); - #pragma unroll 32 - for (i = 0; i < 32; ++i) B[i*n] = A_tmp[i]; -} -} - -// transpose 32x32 bit -__global__ void transpose_bin_gpu_kernel_32(uint32_t *A, uint32_t *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - int i; - int index = (blockIdx.x*blockDim.x + threadIdx.x) * 32; - - //for (i = 0; i < n; i += 8) - { - i = index % n; - int j; - //for (j = 0; j < m - 8; j += 8) - { - j = (index / n) * 32; - if (j < m) { - int a_index = i*lda + j; - int b_index = j*ldb + i; - transpose_32x32_bits_reversed_diagonale(&A[a_index / 32], &B[b_index / 32], lda / 32, ldb / 32); - } - } - } -} - -void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - //int size = n*m/ (8*8) + 1; - int size32 = n*m / (32*32) + 1; - //const int num_blocks = size / BLOCK + 1; - const int num_blocks32 = size32 / BLOCK_TRANSPOSE32 + 1; - transpose_bin_gpu_kernel_32 << > >((uint32_t *)A, (uint32_t *)B, n, m, lda, ldb, block_size); - //transpose_bin_gpu_kernel << > >(A, B, n, m, lda, ldb, block_size); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -__global__ void transpose_uint32_kernel(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) -{ - //l.bit_align - algined (n) by 32 - //new_ldb - aligned (k) by 256 - int index = blockIdx.x*blockDim.x + threadIdx.x; - - //for (i = 0; i < src_h; i += 1) - int i = index % src_h; // l.size*l.size*l.c; - { - //for (j = 0; j < src_w; j += 1) - int j = index / src_h; // out_h*out_w; - if(j < src_w) - { - ((uint32_t *)dst)[j*dst_align / 32 + i] = ((uint32_t *)src)[i*src_align + j]; - } - } -} - -void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) -{ - int size = src_w * src_h; - const int num_blocks = size / BLOCK + 1; - transpose_uint32_kernel << > >(src, dst, src_h, src_w, src_align, dst_align); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -//#define TRANS_LOOP 10 - -__global__ void transpose_uint32_kernel_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) -{ - __shared__ uint32_t tmp[33 * 32]; // misaligned_array[32x32] - const int w_align = 33; - //const int shared_size = w_align * 32; - - //l.bit_align - algined (n) by 32 - //new_ldb - aligned (k) by 256 - - const int src_w_align = src_w + (32 - src_w % 32); - //const int src_h_align = src_h + (32 - src_h % 32); - - const int warps_in_width = src_w_align / 32; - //const int warps_in_height = src_h_align / 32; - - - - const int local_x = threadIdx.x % 32; // index % 32; - const int local_x_index = threadIdx.x / 32; // index / 32; - const int local_y = local_x_index % 32; - -//#pragma unroll TRANS_LOOP - //for (int i = 0; i < TRANS_LOOP; ++i) - { - const int global_index = blockIdx.x;// blockIdx.x*TRANS_LOOP + i;// local_x_index / 32; - const int global_x_index = global_index % warps_in_width; - const int global_y_index = global_index / warps_in_width; - - const int global_x = global_x_index * 32 + local_x; - const int global_y = global_y_index * 32 + local_y; - - uint32_t val = 0; - if (global_x < src_w && global_y < src_h) { - val = src[global_y * src_align + global_x]; - } - //dst[global_x * dst_align / 32 + global_y] = val; - //tmp[local_y * 32 + local_x] = val; - - tmp[local_x * w_align + local_y] = val; - __syncthreads(); - val = tmp[local_y * w_align + local_x]; - - const int new_global_x = global_y_index * 32 + local_x; - const int new_global_y = global_x_index * 32 + local_y; - - if (new_global_x < src_h && new_global_y < src_w) { - dst[new_global_y * (dst_align / 32) + new_global_x] = val; - } - } -} - -#define TRANS_BLOCK 1024 -void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align) -{ - int src_w_align = src_w + (32 - src_w % 32); - int src_h_align = src_h + (32 - src_h % 32); - - int size = src_w_align * src_h_align; - int num_blocks = size / TRANS_BLOCK; - transpose_uint32_kernel_2 << > >(src, dst, src_h, src_w, src_align, dst_align); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - - -// 32 channels -> 1 channel (with 32 floats) -// 256 channels -> 8 channels (with 32 floats) -__global__ void repack_input_kernel(float *input, float *re_packed_input, int w, int h, int c) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - const int items_per_channel = w * h; - - int c_pack = index % 32; - int chan_index = index / 32; - int chan = (chan_index * 32) % c; - int i = (chan_index * 32) / c; - - //for (chan = 0; chan < c; chan += 32) - { - //for (i = 0; i < items_per_channel; ++i) - if(i < items_per_channel) - { - //for (c_pack = 0; c_pack < 32; ++c_pack) - { - float src = input[(chan + c_pack)*items_per_channel + i]; - - re_packed_input[chan*items_per_channel + i * 32 + c_pack] = src; - } - } - } -} - -void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c) -{ - int size = w * h * c; - const int num_blocks = size / BLOCK + 1; - repack_input_kernel << > >(input, re_packed_input, w, h, c); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - - -// 32 channels -> 1 channel (with 32 floats) -// 256 channels -> 8 channels (with 32 floats) -__global__ void repack_input_kernel_2(float *input, float *re_packed_input, int w, int h, int c) -{ - //__shared__ uint32_t tmp[33 * 32]; // 33x32 is misaligned 32 x 32 to avoid bank conflicts - - int index = blockIdx.x*blockDim.x + threadIdx.x; - - const int items_per_channel = w * h; - - int c_pack = index % 32; - int chan_index = index / 32; - int chan = (chan_index * 32) % c; - int i = (chan_index * 32) / c; - - //for (chan = 0; chan < c; chan += 32) - { - //for (i = 0; i < items_per_channel; ++i) - if (i < items_per_channel) - { - //for (c_pack = 0; c_pack < 32; ++c_pack) - { - float src = input[(chan + c_pack)*items_per_channel + i]; - - re_packed_input[chan*items_per_channel + i * 32 + c_pack] = src; - } - } - } -} - -void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int c) -{ - int size = w * h * c; - const int num_blocks = size / BLOCK + 1; - repack_input_kernel_2 << > >(input, re_packed_input, w, h, c); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - - -// 32 channels -> 1 channel (with 32 floats) -// 256 channels -> 8 channels (with 32 floats) -__global__ void repack_input_kernel_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c) -{ - //__shared__ uint32_t tmp[32]; - const int index = blockIdx.x*blockDim.x + threadIdx.x; - - const int global_warp_id = index / WARP_SIZE; - const int lane_id = threadIdx.x % WARP_SIZE; - - const int items_per_channel = w * h; - const int items_per_channel_aligned = items_per_channel + WARP_SIZE - (items_per_channel % WARP_SIZE); - - int i = 32 * (global_warp_id % (items_per_channel_aligned / WARP_SIZE)); - int chan = 32 * (global_warp_id / (items_per_channel_aligned / WARP_SIZE)); - - if (chan < c) - { - uint32_t result_bits = 0; - - for (int c_pack = 0; c_pack < 32; ++c_pack) - { - float src = 0; - if ((i + lane_id) < items_per_channel) { - src = input[(chan + c_pack)*items_per_channel + (i + lane_id)]; - } - uint32_t bit_mask = __ballot_custom(src > 0); - - uint32_t cur_bit = (bit_mask >> lane_id) & uint32_t(1); - - result_bits |= (cur_bit << c_pack); - } - if ((i + lane_id) < items_per_channel) { - re_packed_input_bin[chan*items_per_channel / 32 + (i + lane_id)] = result_bits; - } - } -} - -void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c) -{ - int size = (w * h * c) / 32 + 1; - const int block_size = BLOCK; - const int num_blocks = get_number_of_blocks(size, block_size); - //printf("\n num_blocks = %d, num_blocks/32 = %d, block_size = %d \n", num_blocks, num_blocks / 32, block_size); - repack_input_kernel_bin << > >(input, re_packed_input_bin, w, h, c); - CHECK_CUDA(cudaPeekAtLastError()); -} - -/* -// 32 channels -> 1 channel (with 32 floats) -// 256 channels -> 8 channels (with 32 floats) -__global__ void repack_input_kernel_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c) -{ - //__shared__ uint32_t tmp[32]; - int index = blockIdx.x*blockDim.x + threadIdx.x; - - //const int num_of_warps = blockDim.x / WARP_SIZE; - //const int warp_id = threadIdx.x / WARP_SIZE; - //const int lane_id = threadIdx.x % WARP_SIZE; - - const int items_per_channel = w * h; - - int c_pack = index % 32; - int chan_index = index / 32; - //int chan = (chan_index * 32) % c; - //int i = (chan_index * 32) / c; - - int i = (chan_index) % items_per_channel; - int chan = ((chan_index ) / items_per_channel)*32; - - - //for (chan = 0; chan < c; chan += 32) - if(chan < c) - { - //for (i = 0; i < items_per_channel; ++i) - //if (i < items_per_channel) - { - //for (c_pack = 0; c_pack < 32; ++c_pack) - { - float src = input[(chan + c_pack)*items_per_channel + i]; - - uint32_t bit_mask = __ballot_custom(src > 0); - if (threadIdx.x % 32 == 0) - re_packed_input_bin[chan*items_per_channel / 32 + i] = bit_mask; - } - } - } -} - -void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c) -{ - int size = w * h * c; - const int block_size = 256;// 128; - const int num_blocks = get_number_of_blocks(size, block_size); - printf("\n num_blocks = %d, num_blocks/32 = %d, block_size = %d \n", num_blocks, num_blocks/32, block_size); - repack_input_kernel_bin << > >(input, re_packed_input_bin, w, h, c); - CHECK_CUDA(cudaPeekAtLastError()); -} -*/ - - - -__global__ void fill_int8_gpu_kernel(unsigned char *src, unsigned char val, size_t size) { - int index = blockIdx.x*blockDim.x + threadIdx.x; - if(index < size) src[index] = 0; -} - -void fill_int8_gpu(unsigned char *src, unsigned char val, size_t size) { - const int num_blocks = size / BLOCK + 1; - fill_int8_gpu_kernel<<>>(src, val, size); - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -//typedef unsigned long long int uint64_t; -//typedef unsigned int uint32_t; -//typedef unsigned char uint8_t; -//typedef char int8_t; -/* -__device__ __host__ static inline uint64_t broadcast_bit_1_to_64(uint8_t src) { - return (src > 0) ? 0xFFFFFFFFFFFFFFFF : 0; -} -*/ -__device__ __host__ static inline uint8_t xnor_bit1(uint8_t a, uint8_t b) { - return ~(a^b) & 0b1; -} -/* -__device__ __host__ static inline uint32_t xnor_int32(uint32_t a, uint32_t b) { - return ~(a^b); -} - -__device__ __host__ static inline uint64_t xnor_int64(uint64_t a, uint64_t b) { - return ~(a^b); -} - -__device__ __host__ static inline uint4 xnor_int128(uint4 a, uint4 b) { - uint4 res; - res.w = ~(a.w^b.w); - res.x = ~(a.x^b.x); - res.y = ~(a.y^b.y); - res.z = ~(a.z^b.z); - return res; -} - -__device__ __host__ static inline ulonglong4 xnor_int256(ulonglong4 a, ulonglong4 b) { - ulonglong4 res; - res.w = ~(a.w^b.w); - res.x = ~(a.x^b.x); - res.y = ~(a.y^b.y); - res.z = ~(a.z^b.z); - return res; -} -*/ -//------- -/* -__device__ __host__ static inline uint8_t xor_bit1(uint8_t a, uint8_t b) { - return (a^b) & 0b1; -} -*/ -__device__ __host__ static inline uint32_t xor_int32(uint32_t a, uint32_t b) { - return (a^b); -} - -__device__ __host__ static inline uint64_t xor_int64(uint64_t a, uint64_t b) { - return (a^b); -} -/* -__device__ __host__ static inline uint4 xor_int128(uint4 a, uint4 b) { - uint4 res; - res.w = (a.w^b.w); - res.x = (a.x^b.x); - res.y = (a.y^b.y); - res.z = (a.z^b.z); - return res; -} -*/ -__device__ __host__ static inline ulonglong4 xor_int256(ulonglong4 a, ulonglong4 b) { - ulonglong4 res; - res.w = (a.w^b.w); - res.x = (a.x^b.x); - res.y = (a.y^b.y); - res.z = (a.z^b.z); - return res; -} - -/* -__device__ static inline int popcnt_256(ulonglong4 a) { - return __popcll(a.w) + __popcll(a.x) + __popcll(a.y) + __popcll(a.z); -} - -__global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - //if (index == 0) - { - int i, j, k, h; - - //#pragma omp parallel for - //for (i = 0; i < M; ++i) - i = index % M; - //if(i < M) - { // l.n - filters [16 - 55 - 1024] - float mean_val = mean_arr[i]; - - //for (j = 0; j < N; ++j) - j = index / M; - if(j < N) - { // out_h*out_w - one channel output size [169 - 173056] - int count = 0; - - for (k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - uint64_t a_bit64 = *((uint64_t *)(A + (i*lda + k) / 8)); - uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); - uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - - int tmp_count = __popcll(c_bit64); - - if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits - count += tmp_count; - //binary_int64_printf(c_bit64); - //printf(", count = %d \n\n", tmp_count); - } - - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } - } -} -*/ - - -/* -// B (input) in the shared_memory -__global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - - __shared__ uint64_t B_s[4096]; // 32 KB // [ldb x N`] // max = 262 144 bits - - int start_j = blockIdx.x*blockDim.x / M; - { - int end_j = (blockIdx.x*blockDim.x + blockDim.x) / M + 1; - - size_t shared_size = ldb * (end_j - start_j); - - //float tmp_shared_size = ldb * (blockDim.x / M); - //int passes = (4096 * 64) / tmp_shared_size - 1; - //size_t shared_size = tmp_shared_size * passes; - - int k; - for (int k = threadIdx.x * 256; k < shared_size; k += blockDim.x * 256) { - int x = start_j*ldb + k; - if (x < (N*ldb)) *((ulonglong4 *)(B_s + k / 8)) = *((ulonglong4 *)(B + x / 8)); - } - - ////if (j_cur < N && (index % M == 0 || threadIdx.x == 0)) { - //// for (int k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - //// *((uint64_t *)(B_s + (local_j*ldb + k) / 8)) = *((uint64_t *)(B + (j_cur*ldb + k) / 8)); // input - ////} - ////} - } - __syncthreads(); - - int index = blockIdx.x*blockDim.x + threadIdx.x; - - - //if (index == 0) - //for(int in_tmp = threadIdx.x; in_tmp < 1*blockDim.x; in_tmp += blockDim.x) - { - //int index = blockIdx.x*blockDim.x*1 + in_tmp; - - int j_cur = index / M; - int local_j = j_cur - start_j; - - int i, j, h; - - //#pragma omp parallel for - //for (i = 0; i < M; ++i) - i = index % M; - //if(i < M) - { // l.n - filters [16 - 55 - 1024] - // further improvements: for (l.n == 1024) iterate several (j) - float mean_val = mean_arr[i]; - - //for (j = 0; j < N; ++j) - j = index / M; - if (j < N) - { // out_h*out_w - one channel output size [169 - 173056] - const int bit_step = 256; - int count = 0; - int k = 0; - for (k = 0; k < K; k += bit_step) { // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - ulonglong4 a_bit256 = *((ulonglong4 *)(A + (i*lda + k) / 8)); // weights - //ulonglong4 b_bit256 = *((ulonglong4 *)(B + (j*ldb + k) / 8)); - ulonglong4 b_bit256 = *((ulonglong4 *)(B_s + (local_j*ldb + k) / 8)); // input - ulonglong4 c_bit256 = xnor_int256(a_bit256, b_bit256); - - count += __popcll(c_bit256.w) + __popcll(c_bit256.x) + - __popcll(c_bit256.y) + __popcll(c_bit256.z); - } - - int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - //C[i*ldc + j] += 2 * count*mean_val; - //C[i*ldc + j] += -2 * f1*mean_val; - //C[i*ldc + j] += - K*mean_val; - - count = count - f1; // remove extra bits (from empty space for align only) - C[i*ldc + j] = (2 * count - K) * mean_val; - - //B_s[0] = (2 * count - K) * mean_val; - } - } - } -} -*/ - -/* -// A (weights) in the shared_memory -__global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - __shared__ uint64_t A_s[6144]; // 48 KB // [lda x M`] - //__shared__ uint8_t A_s[6144*8]; // 48 KB // [lda x M`] - - int start_i = blockIdx.x*blockDim.x / N; - int end_i = (blockIdx.x*blockDim.x + blockDim.x) / N + 1; - - size_t shared_size = lda * (end_i - start_i); - - int i_cur = index / N; - int local_i = i_cur - start_i; - - for (int k = threadIdx.x * 64; k < shared_size; k += blockDim.x * 64) { - int x = start_i*lda + k; - if (x < (M*lda)) *((uint64_t *)(A_s + k / 8)) = *((uint64_t *)(A + x / 8)); - } - - //if (i_cur < M && (index % N == 0 || threadIdx.x == 0)) { - //for (int k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - //(*(uint64_t *)(A_s + (local_i*lda + k) / 8)) = *((uint64_t *)(A + (i_cur*lda + k) / 8)); // weights - // } - //} - - __syncthreads(); - - int i, j, k, h; - - j = index % N; - { // out_h*out_w - one channel output size [169 - 173056] - i = index / N; - if (i < M) // l.n - filters [16 - 55 - 1024] - { - float mean_val = mean_arr[i]; - int count = 0; - - for (k = 0; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - //uint64_t a_bit64 = *((uint64_t *)(A + (i*lda + k) / 8)); // weights - uint64_t a_bit64 = *((uint64_t *)(A_s + (local_i*lda + k) / 8)); // weights - uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); // input - uint64_t c_bit64 = xnor_int64(a_bit64, b_bit64); - - int tmp_count = __popcll(c_bit64); - - if (K - k < 64) tmp_count = tmp_count - (64 - (K - k)); // remove extra bits - count += tmp_count; - } - - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } -} -*/ - -__inline__ __device__ -int warpAllReduceSum(int val) { - for (int mask = WARP_SIZE / 2; mask > 0; mask /= 2) -#if CUDART_VERSION >= 9000 - val += __shfl_xor_sync(FULL_MASK, val, mask); -#else - val += __shfl_xor(val, mask); -#endif - - return val; -} - -// Tensor Cores binary (CC >= 7.3 && CUDA >= 10.0) - __CUDA_SUBBYTE_IMMA__ -#if CUDART_VERSION >= 10000 -#include - -#define WMMA_M 8 -#define WMMA_N 8 -#define WMMA_K 128 -#define WMMA_K32 (WMMA_K/32) - -#define WMMA_Nx2 (WMMA_N*2) - -// Tensor Cores are used for XOR-GEMM -__global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias_arr, int leaky_activation, - float *shortcut_in_gpu, float *shortcut_out_gpu) -{ - // total 57% - int index = blockIdx.x*blockDim.x + threadIdx.x; - - __shared__ int C_s[WMMA_N * WMMA_M * 32 * 2]; // 2 * 8 KB - Temprorary result of GEMM WMMA for 32 warps - - const int lane_id = threadIdx.x % 32; - const int warp_id = threadIdx.x / 32; - const int global_warp_id = index / 32; - - const int N_aligned = N + WMMA_Nx2 - (N % WMMA_Nx2); - - /* - __syncthreads(); - __shared__ uint32_t A_s[8 * 512]; // 8x512 = 8 x 16384 bits, instead of 8x4 - const int start_global_warp_id = blockIdx.x*blockDim.x / 32; - int start_i = start_global_warp_id / (N_aligned / WMMA_N); - start_i = start_i * WMMA_M; - if (start_i + WMMA_M > M) start_i = M - WMMA_M; // must be: i+7 < M - for (int tmp_index = threadIdx.x; tmp_index < (8 * 512); tmp_index += blockDim.x) - { - int k_tmp = tmp_index % 512; - int local_i = tmp_index / 512; - - uint32_t a_val = ((uint32_t *)(A))[(start_i + local_i)*lda/32 + k_tmp]; - A_s[local_i * 512 + k_tmp] = a_val; - } - __syncthreads(); - */ - - - int i, j, k;//, h; - // 47% = 29 + 10 + 8 - j = global_warp_id % (N_aligned / WMMA_Nx2); - j = j * WMMA_Nx2; - { // out_h*out_w - one channel output size [169 - 173056] - i = global_warp_id / (N_aligned / WMMA_Nx2); - i = i * WMMA_M; - - //int count = 0; - k = 0; - - if (i < M) //if (i < M) // l.n - filters [16 - 55 - 1024] - { - if (j + WMMA_Nx2 > N) j = N - WMMA_Nx2; // must be: j+7 < N - if (i + WMMA_M > M) i = M - WMMA_M; // must be: i+7 < M - -#if __CUDA_ARCH__ >= 730 - // Tensor Cores - using namespace nvcuda; - - wmma::fragment a_frag; - wmma::fragment b_frag; - wmma::fragment c1_frag, c2_frag; - wmma::fill_fragment(c1_frag, 0); // !!!! XOR isn't XNOR !!!!!!!!!! - wmma::fill_fragment(c2_frag, 0); // !!!! XOR isn't XNOR !!!!!!!!!! - - // 8 x 8 x 4 (uint32_t, 4 * 32 = 128 bit) - for (; k < K; k += 128) // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - { - int64_t A_cur_index = (i*lda + k) / 8; // index in bits - int64_t B1_cur_index = (j*ldb + k) / 8; // index in bits - int64_t B2_cur_index = ((j + 8)*ldb + k) / 8; // index in bits - - // try to use A that is cached in shared memory - poor performance - //if (i == start_i) wmma::load_matrix_sync(a_frag, &A_s[k / 32], (512 * 32)); // lda = (128*32) bits - //else wmma::load_matrix_sync(a_frag, (uint32_t *)(A + A_cur_index), lda); // lda = M - - // lda, ldb - are in bits - wmma::load_matrix_sync(a_frag, (uint32_t *)(A + A_cur_index), lda); // lda = M - - wmma::load_matrix_sync(b_frag, (uint32_t *)(B + B1_cur_index), ldb); // ldb = K - wmma::bmma_sync(c1_frag, a_frag, b_frag, c1_frag); // XOR-GEMM - - wmma::load_matrix_sync(b_frag, (uint32_t *)(B + B2_cur_index), ldb); // ldb = K - wmma::bmma_sync(c2_frag, a_frag, b_frag, c2_frag); // XOR-GEMM - } - // C[i*ldc + j] - wmma::store_matrix_sync(&C_s[warp_id*WMMA_M*WMMA_N], c1_frag, WMMA_N, wmma::mem_row_major); - wmma::store_matrix_sync(&C_s[warp_id*WMMA_M*WMMA_N + WMMA_M*WMMA_N*32], c2_frag, WMMA_N, wmma::mem_row_major); -#else // __CUDA_ARCH__ >= 730 - - // Custom XOR-GEMM - int k_d = lane_id % 4; - int i_d = lane_id / 4; - //int j_d = lane_id / 4; - - int32_t accum_c_val[8*2]; // wmma::fill_fragment(c_frag, 0); - for (int local_j = 0; local_j < 8*2; ++local_j) { - accum_c_val[local_j] = 0; - } - - // 8 x 8 x 4 (uint32_t, 4 * 32 = 128 bit) - for (; k < K; k += 128) // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - { - //int64_t A_cur_index = (i*lda + k) / 8; - //int64_t A_cur_index = (local_i*lda + k) / 8; - //int64_t B_cur_index = (j*ldb + k) / 8; - - // lda, ldb - are in bits - // 8*4 = 32 - // 8*8 = 64 - int k_d = lane_id % 4; - int i_d = lane_id / 4; - int j_d = lane_id / 4; - uint32_t a_val = *(uint32_t *)(A + ((i + i_d)*lda + (k + k_d*32)) / 8); // wmma::load_matrix_sync(a_frag, (uint32_t *)(A + A_cur_index), lda); - - for (int c_x = 0; c_x < 2; c_x++) - { - uint32_t b_val = *(uint32_t *)(B + ((c_x * 8 + j + j_d)*ldb + (k + k_d * 32)) / 8); // wmma::load_matrix_sync(b_frag, (uint32_t *)(B + B_cur_index), ldb); - - // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag); - int32_t c_val[8]; // 8 x 32 threads = 256 - #pragma UNROLL - for (int local_j = 0; local_j < 8; ++local_j) - { - uint32_t b_val_cur = __shfl_custom(b_val, local_j * 4 + k_d); - c_val[local_j] = __popc(xor_int32(a_val, b_val_cur)); - } - - #pragma UNROLL - for (int local_j = 0; local_j < 8; ++local_j) - { - #pragma UNROLL - for (int local_k = 0; local_k < 4; ++local_k) { - accum_c_val[local_j + c_x*8] += __shfl_custom(c_val[local_j], i_d * 4 + local_k); - } - } - } - } - - // only the first 8 threads (i) contain 8 good values each, in c_val[8] (j) = 8 x 8 =64 - // wmma::store_matrix_sync(&C_s[warp_id*WMMA_M*WMMA_N], c_frag, WMMA_N, wmma::mem_row_major); - if (k_d == 0) { - for (int c_x = 0; c_x < 2; c_x++) - { - for (int local_j = 0; local_j < 8; ++local_j) - { - C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + local_j + WMMA_M*WMMA_N*32 * c_x] = accum_c_val[local_j + c_x*8]; - } - } - } -#endif // __CUDA_ARCH__ >= 730 - - for(int c_x = 0; c_x < 2; c_x++) - { - int j_d = lane_id % WMMA_N; - { - #pragma UNROLL - for (int i_d = lane_id / WMMA_N; i_d < WMMA_M; i_d += WMMA_M / 2) - { - int count = C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + j_d + WMMA_M*WMMA_N*32*c_x]; - - const int bit_step = 128; - int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - - count = (2 * count - K); - - float mean_val = mean_arr[i + i_d]; - float bias_val = bias_arr[i + i_d]; - float dst_val = count *mean_val + bias_val; - if (leaky_activation) - dst_val = (dst_val >= 0) ? (dst_val) : (0.1f*dst_val); // Leaky activation - - size_t out_index = (i + i_d)*ldc + (c_x * 8 + j + j_d); - C[out_index] = dst_val; - - if (shortcut_out_gpu) { - shortcut_out_gpu[out_index] = shortcut_in_gpu[out_index] + dst_val; - } - } - - } - } - } - } -} -#endif // CUDART_VERSION >= 10000 - -/* -// Tensor Cores are used for XOR-GEMM -__global__ void gemm_nn_custom_bin_mean_transposed_tensor_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias_arr, int leaky_activation) -{ - // total 57% - int index = blockIdx.x*blockDim.x + threadIdx.x; - - __shared__ int C_s[8*8 * 32]; // Temprorary result of GEMM WMMA - - const int lane_id = threadIdx.x % 32; - const int warp_id = threadIdx.x / 32; - const int global_warp_id = index / 32; - - const int N_aligned = N + WMMA_N - (N % WMMA_N); - - int i, j, k, h; - // 47% = 29 + 10 + 8 - j = global_warp_id % (N_aligned / WMMA_N); - j = j * WMMA_N; - { // out_h*out_w - one channel output size [169 - 173056] - i = global_warp_id / (N_aligned / WMMA_N); - i = i * WMMA_M; - - int count = 0; - k = 0; - - if (i < M) //if (i < M) // l.n - filters [16 - 55 - 1024] - { - if (j + WMMA_N > N) j = N - WMMA_N; // must be: j+7 < N - if (i + WMMA_M > M) i = M - WMMA_M; // must be: i+7 < M - -#if __CUDA_ARCH__ >= 730 - // Tensor Cores - using namespace nvcuda; - - wmma::fragment a_frag; - wmma::fragment b_frag; - wmma::fragment c_frag; - wmma::fill_fragment(c_frag, 0); // !!!! XOR isn't XNOR !!!!!!!!!! - - // 8 x 8 x 4 (uint32_t, 4 * 32 = 128 bit) - for (; k < K; k += 128) // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - { - int64_t A_cur_index = (i*lda + k) / 8; - //int64_t A_cur_index = (local_i*lda + k) / 8; - int64_t B_cur_index = (j*ldb + k) / 8; - - // lda, ldb - are in bits - wmma::load_matrix_sync(a_frag, (uint32_t *)(A + A_cur_index), lda); // lda = M - wmma::load_matrix_sync(b_frag, (uint32_t *)(B + B_cur_index), ldb); // ldb = K - - wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag); // XOR-GEMM - } - // C[i*ldc + j] - wmma::store_matrix_sync(&C_s[warp_id*WMMA_M*WMMA_N], c_frag, WMMA_N, wmma::mem_row_major); -#else // __CUDA_ARCH__ >= 730 - - // Custom XOR-GEMM - int k_d = lane_id % 4; - int i_d = lane_id / 4; - int j_d = lane_id / 4; - - int32_t accum_c_val[8]; // wmma::fill_fragment(c_frag, 0); - for (int local_j = 0; local_j < 8; ++local_j) { - accum_c_val[local_j] = 0; - } - - // 8 x 8 x 4 (uint32_t, 4 * 32 = 128 bit) - for (; k < K; k += 128) // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - { - int64_t A_cur_index = (i*lda + k) / 8; - //int64_t A_cur_index = (local_i*lda + k) / 8; - int64_t B_cur_index = (j*ldb + k) / 8; - - // lda, ldb - are in bits - // 8*4 = 32 - // 8*8 = 64 - int k_d = lane_id % 4; - int i_d = lane_id / 4; - int j_d = lane_id / 4; - uint32_t a_val = *(uint32_t *)(A + ((i + i_d)*lda + (k + k_d*32)) / 8); // wmma::load_matrix_sync(a_frag, (uint32_t *)(A + A_cur_index), lda); - uint32_t b_val = *(uint32_t *)(B + ((j + j_d)*ldb + (k + k_d*32)) / 8); // wmma::load_matrix_sync(b_frag, (uint32_t *)(B + B_cur_index), ldb); - - // wmma::bmma_sync(c_frag, a_frag, b_frag, c_frag); - int32_t c_val[8]; // 8 x 32 threads = 256 - #pragma UNROLL - for (int local_j = 0; local_j < 8; ++local_j) - { - uint32_t b_val_cur = __shfl_custom(b_val, local_j *4 + k_d); - c_val[local_j] = __popc(xor_int32(a_val, b_val_cur)); - } - - #pragma UNROLL - for (int local_j = 0; local_j < 8; ++local_j) - { - #pragma UNROLL - for (int local_k = 0; local_k < 4; ++local_k) { - accum_c_val[local_j] += __shfl_custom(c_val[local_j], i_d * 4 + local_k); - } - } - } - - // only the first 8 threads (i) contain 8 good values each, in c_val[8] (j) = 8 x 8 =64 - // wmma::store_matrix_sync(&C_s[warp_id*WMMA_M*WMMA_N], c_frag, WMMA_N, wmma::mem_row_major); - if (k_d == 0) { - for (int local_j = 0; local_j < 8; ++local_j) - { - C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + local_j] = accum_c_val[local_j]; - } - } -#endif // __CUDA_ARCH__ >= 730 - - { - int i_d = lane_id % WMMA_M; - { - - for (int j_d = lane_id / WMMA_M; j_d < WMMA_N; j_d += WMMA_N / 2) - { - int count = C_s[warp_id*WMMA_M*WMMA_N + i_d*WMMA_N + j_d]; - - const int bit_step = 128; - int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - - count = (2 * count - K); - - float mean_val = mean_arr[i + i_d]; - float bias_val = bias_arr[i + i_d]; - float dst_val = count *mean_val + bias_val; - if (leaky_activation) - dst_val = (dst_val > 0) ? (dst_val) : (0.1f*dst_val); // Leaky activation - - C[(i + i_d)*ldc + (j + j_d)] = dst_val; - } - - } - } - } - } -} -*/ - - -// Coalescing -// A (weights) in the shared_memory - GOOD -__global__ void gemm_nn_custom_bin_mean_transposed_gpu_kernel(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias_arr, int leaky_activation, - float *shortcut_in_gpu, float *shortcut_out_gpu) -{ - // total 57% - int index = blockIdx.x*blockDim.x + threadIdx.x; - - __shared__ uint8_t A_s[6144*8/4]; - //__shared__ uint64_t A_s[6144]; // 48 KB // [lda x M`] - //__shared__ uint8_t A_s[6144*8]; // 48 KB // [lda x M`] - - int start_i = blockIdx.x*blockDim.x / N; - int end_i = (blockIdx.x*blockDim.x + blockDim.x) / N + 1; - - size_t shared_size = lda * (end_i - start_i); - - int i_cur = index / N; - int local_i = i_cur - start_i; - // ~10% - for (int k = threadIdx.x * 64; k < shared_size; k += blockDim.x * 64) { - int x = start_i*lda + k; - if (x < (M*lda)) *((uint64_t *)(A_s + k / 8)) = *((uint64_t *)(A + x / 8)); - } - __syncthreads(); - - int i, j, k; //, h; - // 47% = 29 + 10 + 8 - j = index % N; - { // out_h*out_w - one channel output size [169 - 173056] - i = index / N; - //if (i < M) // l.n - filters [16 - 55 - 1024] - { - int count = 0; - k = 0; - -#ifdef NOT_USED - // 32 thread X 256 bit = 8192 bit - for (; k < (K - 8192); k += 8192) { // l.size*l.size*l.c - one filter size [27 - 9216] - ulonglong4 c_bit256; - - //int64_t A_cur_index = (i*lda + k) / 8; - int64_t A_cur_index = (local_i*lda + k) / 8; - int64_t B_cur_index = (j*ldb + k) / 8; - if (i >= M) A_cur_index = 0; - -#pragma unroll - for (int t = 0; t < WARP_SIZE; ++t) { - const int lane_id = threadIdx.x % WARP_SIZE; - - const int64_t A_i = __shfl_custom(A_cur_index, t) + 32 * lane_id; - const int64_t B_i = __shfl_custom(B_cur_index, t) + 32 * lane_id; - - { - //ulonglong4 a_bit256 = *((ulonglong4 *)(A + A_i)); // weights - ulonglong4 a_bit256 = *((ulonglong4 *)(A_s + A_i)); // weights - ulonglong4 b_bit256 = *((ulonglong4 *)(B + B_i)); // input - c_bit256 = xor_int256(a_bit256, b_bit256); - int tmp_count = __popcll(c_bit256.w) + __popcll(c_bit256.x) + - __popcll(c_bit256.y) + __popcll(c_bit256.z); - - int sum_count = warpAllReduceSum(tmp_count); - if (lane_id == t) count += sum_count; - } - } - } -#endif - - -//#ifdef NOT_USED - // 32 thread X 64 bit = 2048 bit // 29% - for (; k < (K - 2048); k += 2048) { // l.size*l.size*l.c - one filter size [27 - 9216] - uint64_t c_bit64; - - //int64_t A_cur_index = (i*lda + k) / 8; - int64_t A_cur_index = (local_i*lda + k) / 8; - int64_t B_cur_index = (j*ldb + k) / 8; - if (i >= M) A_cur_index = 0; - - #pragma unroll - for (int t = 0; t < WARP_SIZE; ++t) { - const int lane_id = threadIdx.x % WARP_SIZE; - - const int64_t A_i = __shfl_custom(A_cur_index, t) + 8 * lane_id; - const int64_t B_i = __shfl_custom(B_cur_index, t) + 8 * lane_id; - - { - //uint64_t a_bit64 = *((uint64_t *)(A + A_i)); // weights - uint64_t a_bit64 = *((uint64_t *)(A_s + A_i)); // weights - uint64_t b_bit64 = *((uint64_t *)(B + B_i)); // input - c_bit64 = xor_int64(a_bit64, b_bit64); - int tmp_count = __popcll(c_bit64); - - int sum_count = warpAllReduceSum(tmp_count); - if (lane_id == t) count += sum_count; - } - } - } -//#endif - -//#ifdef NOT_USED - // 32 thread X 32 bit = 1024 bit // 10% - for (; k < (K - 1024); k += 1024) { // l.size*l.size*l.c - one filter size [27 - 9216] - - //int64_t A_cur_index = (i*lda + k) / 8; - int64_t A_cur_index = (local_i*lda + k) / 8; - int64_t B_cur_index = (j*ldb + k) / 8; - if (i >= M) A_cur_index = 0; - - #pragma unroll - for (int t = 0; t < WARP_SIZE; ++t) { - const int lane_id = threadIdx.x % WARP_SIZE; - - const int64_t A_i = __shfl_custom(A_cur_index, t) + 4 * lane_id; - const int64_t B_i = __shfl_custom(B_cur_index, t) + 4 * lane_id; - - { - //uint64_t a_bit64 = *((uint64_t *)(A + A_i)); // weights - uint32_t a_bit32 = *((uint32_t *)(A_s + A_i)); // weights - uint32_t b_bit32 = *((uint32_t *)(B + B_i)); // input - uint32_t c_bit32 = xor_int32(a_bit32, b_bit32); - int tmp_count = __popc(c_bit32); - - int sum_count = warpAllReduceSum(tmp_count); - if (lane_id == t) count += sum_count; - } - } - } -//#endif - - if (i < M) - { - float mean_val = mean_arr[i]; - float bias_val = bias_arr[i]; - -//#ifdef NOT_USED - // 8% - for (; k < K; k += 256) { // l.size*l.size*l.c - one filter size [27 - 144 - 9216] - //ulonglong4 a_bit256 = *((ulonglong4 *)(A + (i*lda + k) / 8)); // weights - ulonglong4 a_bit256 = *((ulonglong4 *)(A_s + (local_i*lda + k) / 8)); // weights - ulonglong4 b_bit256 = *((ulonglong4 *)(B + (j*ldb + k) / 8)); // input - ulonglong4 c_bit256 = xor_int256(a_bit256, b_bit256); - - count += __popcll(c_bit256.w) + __popcll(c_bit256.x) + - __popcll(c_bit256.y) + __popcll(c_bit256.z); - } -//#endif - -#ifdef NOT_USED - for (; k < K; k += 64) { // l.size*l.size*l.c - one filter size [27 - 9216] - //uint64_t a_bit64 = *((uint64_t *)(A + (i*lda + k) / 8)); // weights - uint64_t a_bit64 = *((uint64_t *)(A_s + (local_i*lda + k) / 8)); // weights - uint64_t b_bit64 = *((uint64_t *)(B + (j*ldb + k) / 8)); // input - uint64_t c_bit64 = xor_int64(a_bit64, b_bit64); - - count += __popcll(c_bit64); - } -#endif - - const int bit_step = 256; - int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - float dst_val = (2 * count - K) *mean_val + bias_val; - if(leaky_activation) - dst_val = (dst_val >= 0) ? (dst_val) : (0.1f*dst_val); // Leaky activation - size_t out_index = i*ldc + j; - C[out_index] = dst_val; - - if (shortcut_out_gpu) { - shortcut_out_gpu[out_index] = shortcut_in_gpu[out_index] + dst_val; - } - } - } - } -} - - -// further optimization - use WMMA GEMM for using Tensor Cores -// https://github.com/NVIDIA-developer-blog/code-samples/blob/master/posts/tensor-cores/simpleTensorCoreGEMM.cu -// https://github.com/NVIDIA/cuda-samples/blob/master/Samples/cudaTensorCoreGemm/cudaTensorCoreGemm.cu -// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#wmma-subbyte -// nvcuda::wmma::col_major -> cutlass::MatrixLayout::kColumnMajor (matrix is not transposed) - -// Matrix A Matrix B Accumulator Matrix Size (m-n-k) -// precision::b1 precision::b1 int 8x8x128 - -// The only dimensions currently supported by WMMA for XNOR -// const int WMMA_M = 8; -// const int WMMA_N = 8; -// const int WMMA_K = 128; - - -// GOOD -void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias, int leaky_activation, - float *shortcut_in_gpu, float *shortcut_out_gpu) -{ - int size = M*N; - const int num_blocks = get_number_of_blocks(size, BLOCK); - - //printf("\n M = %d, N = %d, M %% 8 = %d, N %% 8 = %d \n", M, N, M % 8, N % 8); - - /* - printf("\n gemm_bin size = %d, num_blocks = %d, M*K = %d KB, N*K = %d KB \n (w) M*K/num_blocks = %d KB, (i) N*K/num_blocks = %d KB \n", - size, num_blocks, M*K / 1024, N*K / 1024, M*lda / num_blocks / 1024, N*ldb / num_blocks / 1024); - printf(" M / 512 = %d, N / 512 = %d, M*lda / 512 = %d, N*ldb / 512 = %d \n", M / 512, N / 512, M*lda/512, N*ldb/512); - */ - //printf(" shared_memory: (w) lda*BLOCK/N = %d, (i) ldb*BLOCK/M = %d, \t lda = %d \n\n", lda*BLOCK / N, ldb*BLOCK / M, lda); - - - //if (M % 8 == 0 && N % 8 == 0 && M == 128) - //if (M >= 32) // l.n >= 32 -#if CUDART_VERSION >= 10000 - if (1) - { - const int M_aligned = M + (8 - (M % 8)); - const int N_aligned = N + (16 - (N % 16)); - int size = (M_aligned / 8)*(N_aligned / 16)*WARP_SIZE; - const int num_blocks = get_number_of_blocks(size, BLOCK); - - //printf(" lda = %d, ldb = %d, ldc = %d, lda/32 = %d, ldb/32 = %d, ldc/32 = %d \n", lda, ldb, ldc, lda / 32, ldb / 32, ldc / 32); - //printf(" l.c (K/9) = %d, M (l.n) = %d \n", (K%9 == 0)? K / 9: K, M); - gemm_nn_custom_bin_mean_transposed_tensor_kernel << > > ( - M, N, K, - A, lda, - B, ldb, - C, ldc, - mean_arr, bias, leaky_activation, - shortcut_in_gpu, shortcut_out_gpu); - - //cudaDeviceSynchronize(); - //getchar(); - } - else -#endif //# CUDART_VERSION >= 10000 - { - gemm_nn_custom_bin_mean_transposed_gpu_kernel << > > ( - M, N, K, - A, lda, - B, ldb, - C, ldc, - mean_arr, bias, leaky_activation, - shortcut_in_gpu, shortcut_out_gpu); - } - CHECK_CUDA(cudaPeekAtLastError()); -} -// -------------------------------- - -/* -void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad) -{ - int fil; - // filter index -#pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP - for (fil = 0; fil < n; ++fil) { - int chan, y, x, f_y, f_x; - // channel index - for (chan = 0; chan < in_c; ++chan) - // input - y - for (y = 0; y < in_h; ++y) - // input - x - for (x = 0; x < in_w; ++x) - { - int const output_index = fil*in_w*in_h + y*in_w + x; - int const weights_pre_index = fil*in_c*size*size + chan*size*size; - int const input_pre_index = chan*in_w*in_h; - float sum = 0; - - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= in_h || input_x >= in_w) continue; - - int input_index = input_pre_index + input_y*in_w + input_x; - int weights_index = weights_pre_index + f_y*size + f_x; - - sum += input[input_index] * weights[weights_index]; - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - output[output_index] += sum; - } - } - - -} -// -------------------------------- - - -void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, - int size, int pad, int new_lda, float *mean_arr_gpu) -{ - int fil; - // filter index -#pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP - for (fil = 0; fil < n; ++fil) { - float mean_val = mean_arr_gpu[fil]; - int chan, y, x, f_y, f_x; - // channel index - for (chan = 0; chan < in_c; ++chan) - // input - y - for (y = 0; y < in_h; ++y) - // input - x - for (x = 0; x < in_w; ++x) - { - int const output_index = fil*in_w*in_h + y*in_w + x; - int const weights_pre_index = fil*in_c*size*size + chan*size*size; - int const input_pre_index = chan*in_w*in_h; - int sum = 0; - int good_val = 0; - - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= in_h || input_x >= in_w) continue; - - int input_index = input_pre_index + input_y*in_w + input_x; - //int weights_index = weights_pre_index + f_y*size + f_x; - //int weights_index = fil*in_c*size*size + chan*size*size + f_y*size + f_x; - int weights_index = fil*new_lda + chan*size*size + f_y*size + f_x; - - //sum += input[input_index] * weights[weights_index]; - - int8_t in_bit = get_bit((uint8_t *)input, input_index); - int8_t w_bit = get_bit((uint8_t *)weights, weights_index); - int res = xnor_bit1(in_bit, w_bit); - sum += res; - good_val++; - //sum += (res > 0) ? 1 : -1; - //in_bit = (in_bit > 0) ? 1 : -1; - //w_bit = (w_bit > 0) ? 1 : -1; - //int8_t res = in_bit*w_bit; - //sum += res; - //printf("\n i: %d x w: %d = res: %d \t sum: %d \t mean = %f \n", in_bit, w_bit, res, sum, mean_val); - } - } - //printf("sum = %d, ", sum); - sum = sum - (good_val - sum); - //printf(" size = %d, sum = %d \n", size, sum); - - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - output[output_index] += sum*mean_val; - } - } -} -*/ -// -------------------------------- - -__global__ void convolve_gpu_kernel(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - int fil; - // filter index - //for (fil = 0; fil < n; ++fil) - int chan, y, x, f_y, f_x; - // channel index - //for (chan = 0; chan < in_c; ++chan) - // input - y - //for (y = 0; y < in_h; ++y) - // input - x - //for (x = 0; x < in_w; ++x) - x = index % in_w; - int index2 = index / in_w; - y = index2 % in_h; - fil = index2 / in_h; - if (fil < n) - { - - int const output_index = fil*in_w*in_h + y*in_w + x; - float sum = 0; - - for (chan = 0; chan < in_c; ++chan) - { - int const weights_pre_index = fil*in_c*size*size + chan*size*size; - int const input_pre_index = chan*in_w*in_h; - - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= in_h || input_x >= in_w) continue; - - int input_index = input_pre_index + input_y*in_w + input_x; - int weights_index = weights_pre_index + f_y*size + f_x; - - sum += input[input_index] * weights[weights_index]; - - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - //output[output_index] += sum; - } - output[output_index] = sum; - } - -} - -void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad) -{ - int array_size = in_w*in_h*n; // width X height X filters - const int num_blocks = array_size / BLOCK + 1; - //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad); - - convolve_gpu_kernel << > > (input, weights, output, in_w, in_h, in_c, n, size, pad); - CHECK_CUDA(cudaPeekAtLastError()); -} - -// -------------------------------- - -/* -__global__ void convolve_bin_gpu_kernel(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, - int size, int pad, int new_lda, float *mean_arr_gpu) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - int fil; - // filter index - //for (fil = 0; fil < n; ++fil) - int chan, y, x, f_y, f_x; - // channel index - //for (chan = 0; chan < in_c; ++chan) - // input - y - //for (y = 0; y < in_h; ++y) - // input - x - //for (x = 0; x < in_w; ++x) - x = index % in_w; - int index2 = index / in_w; - y = index2 % in_h; - fil = index2 / in_h; - if (fil < n) // (1-6 for one BLOCK) - { - //float mean_val = mean_arr_gpu[fil]; - int const output_index = fil*in_w*in_h + y*in_w + x; - int sum = 0; - int good_val = 0; - - for (chan = 0; chan < in_c; ++chan) - { - //int const weights_pre_index = fil*in_c*size*size + chan*size*size; - int const weights_pre_index = fil*new_lda + chan*size*size; - int const input_pre_index = chan*in_w*in_h; - - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= in_h || input_x >= in_w) continue; - - int input_index = input_pre_index + input_y*in_w + input_x; - int weights_index = weights_pre_index + f_y*size + f_x; - //int weights_index = fil*in_c*size*size + chan*size*size + f_y*size + f_x; - //int weights_index = fil*new_lda + chan*size*size + f_y*size + f_x; - - uint8_t in_bit = get_bit((uint8_t *)input, input_index); - uint8_t w_bit = get_bit((uint8_t *)weights, weights_index); - int res = xnor_bit1(in_bit, w_bit); - sum += res; - good_val++; - - //sum += input[input_index] *weights[weights_index]; - - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - //output[output_index] += sum; - } - sum = sum - (good_val - sum); - output[output_index] = sum * mean_arr_gpu[fil]; // atoimcAdd for inter-BLOCK sum - } - -} -*/ - -__global__ void convolve_bin_gpu_kernel(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, - int size, int pad, int new_lda, float *mean_arr_gpu) -{ - int index = blockIdx.x*blockDim.x + threadIdx.x; - - int fil; - // filter index - //for (fil = 0; fil < n; ++fil) - int chan, y, x, f_y, f_x; - // channel index - //for (chan = 0; chan < in_c; ++chan) - // input - y - //for (y = 0; y < in_h; ++y) - // input - x - //for (x = 0; x < in_w; ++x) - x = index % in_w; - int index2 = index / in_w; - y = index2 % in_h; - fil = index2 / in_h; - //if (fil < n) // (1-6 for one BLOCK) - { - //float mean_val = mean_arr_gpu[fil]; - int const output_index = fil*in_w*in_h + y*in_w + x; - int sum = 0; - int good_val = 0; - - int min_index = blockIdx.x*blockDim.x; - int min_fil = (min_index / in_w) / in_h; - int max_index = (blockIdx.x+1)*blockDim.x - 1; - int max_fil = (max_index / in_w) / in_h; - - __shared__ uint32_t weights_shared[3*3*1024*6/32 + 1]; // 7 KB (6 filters) - use (new_lda) for size calculation - //const int weights_size = size*size*in_c/8; - const int weights_size = size*size*in_c / 32 + 1; - - for (int tmp_fil = min_fil; tmp_fil <= max_fil; tmp_fil++) { - for (int s = threadIdx.x; s < weights_size; s += blockDim.x) { - //weights_shared[s + (tmp_fil - min_fil)*new_lda / 8] = ((uint8_t *)weights)[tmp_fil*new_lda / 8 + s]; - weights_shared[s + (tmp_fil - min_fil)*new_lda/32] = ((uint32_t *)weights)[tmp_fil*new_lda / 32 + s]; - } - } - __syncthreads(); - - for (chan = 0; chan < in_c; ++chan) - { - //int const weights_pre_index = fil*in_c*size*size + chan*size*size; - //int const weights_pre_index = fil*new_lda + chan*size*size; - int const input_pre_index = chan*in_w*in_h; - - __shared__ uint32_t input_shared[416*416/32 + 1]; // 21.2 KB bytes (for input size 832x832) - const int input_shared_size = in_w*in_h / 32 + 1; - const int add_input_index = input_pre_index % 32; - __syncthreads(); // why??? but is required - - for (int s = threadIdx.x; s < input_shared_size; s += blockDim.x) { - input_shared[s] = ((uint32_t *)input)[input_pre_index / 32 + s]; - } - __syncthreads(); - - /* - __shared__ uint8_t input_shared[208 * 208 / 8 + 1]; // 5.4 KB bytes (for input size 416x416) - const int input_shared_size = in_w*in_h / 8 + 1; - const int add_input_index = input_pre_index % 8; - __syncthreads(); - - for (int s = threadIdx.x; s < input_shared_size; s += blockDim.x) { - ((uint8_t *)input_shared)[s] = ((uint8_t *)input)[input_pre_index / 8 + s]; - } - __syncthreads(); - */ - //int src_index = -1; - //uint32_t input_byte; - - if (fil < n) // (1-6 for one BLOCK) - { - // filter - y - for (f_y = 0; f_y < size; ++f_y) - { - int input_y = y + f_y - pad; - // filter - x - for (f_x = 0; f_x < size; ++f_x) - { - int input_x = x + f_x - pad; - if (input_y < 0 || input_x < 0 || input_y >= in_h || input_x >= in_w) continue; - - //int input_index = input_pre_index + input_y*in_w + input_x; - //int weights_index = weights_pre_index + f_y*size + f_x; - //int weights_index = fil*in_c*size*size + chan*size*size + f_y*size + f_x; - //int weights_index = fil*new_lda + chan*size*size + f_y*size + f_x; - - //uint8_t in_bit = get_bit((uint8_t *)input, input_index); - //uint8_t w_bit = get_bit((uint8_t *)weights, weights_index); - - //int weights_index = fil*in_c*size*size + chan*size*size + f_y*size + f_x; - int weights_shared_index = (fil - min_fil)*new_lda + chan*size*size + f_y*size + f_x; - //uint8_t in_bit = get_bit((uint8_t *)weights_shared, weights_shared_index); - uint8_t w_bit = get_bit((uint8_t *)weights_shared, weights_shared_index); - - //int input_index = input_pre_index + input_y*in_w + input_x; - int input_shared_index = /*input_pre_index +*/ input_y*in_w + input_x + add_input_index; - uint8_t in_bit = get_bit((uint8_t *)input_shared, input_shared_index); - /* - int new_src_index = input_shared_index / 32; - int src_shift = input_shared_index % 32; - //if (new_src_index != src_index) - { - src_index = new_src_index; - input_byte = ((uint32_t *)input_shared)[src_index]; - } - uint8_t in_bit = (input_byte & (1 << src_shift)) >> src_shift; - */ - - int res = xnor_bit1(in_bit, w_bit); - sum += res; - good_val++; - - //sum += input[input_index] *weights[weights_index]; - - } - } - } - // l.output[filters][width][height] += - // state.input[channels][width][height] * - // l.weights[filters][channels][filter_width][filter_height]; - //output[output_index] += sum; - } - sum = sum - (good_val - sum); - //output[output_index] = sum * mean_arr_gpu[fil]; // atoimcAdd for inter-BLOCK sum - atomicAdd(&output[output_index], sum * mean_arr_gpu[fil]); - } - -} - -void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, - int size, int pad, int new_lda, float *mean_arr_gpu) -{ - int array_size = in_w*in_h*n; // width X height X filters - const int num_blocks = array_size / BLOCK + 1; - //printf("\n array_size = %d, num_blocks = %d, w = %d, h = %d, n = %d, c = %d, pad = %d \n", array_size, num_blocks, in_w, in_h, n, in_c, pad); - - convolve_bin_gpu_kernel << > > (input, weights, output, in_w, in_h, in_c, n, size, pad, new_lda, mean_arr_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -// -------------------------------- - -// CUDA: use 512 threads per block -const int CAFFE_CUDA_NUM_THREADS = 512; - -// CUDA: number of blocks for threads. -inline int CAFFE_GET_BLOCKS(const int N) { - return (N + CAFFE_CUDA_NUM_THREADS - 1) / CAFFE_CUDA_NUM_THREADS; -} - -// CUDA: grid stride looping -#define CUDA_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; \ - i < (n); \ - i += blockDim.x * gridDim.x) - -// https://github.com/BVLC/caffe/blob/master/src/caffe/util/im2col.cu -__global__ void im2col_gpu_kernel_ext(const int n, const float* data_im, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - const int height_col, const int width_col, - float* data_col) { - CUDA_KERNEL_LOOP(index, n) { - const int h_index = index / width_col; - const int h_col = h_index % height_col; - const int w_col = index % width_col; - const int c_im = h_index / height_col; - const int c_col = c_im * kernel_h * kernel_w; - const int h_offset = h_col * stride_h - pad_h; - const int w_offset = w_col * stride_w - pad_w; - float* data_col_ptr = data_col; - data_col_ptr += (c_col * height_col + h_col) * width_col + w_col; - const float* data_im_ptr = data_im; - data_im_ptr += (c_im * height + h_offset) * width + w_offset; - for (int i = 0; i < kernel_h; ++i) { - for (int j = 0; j < kernel_w; ++j) { - int h_im = h_offset + i * dilation_h; - int w_im = w_offset + j * dilation_w; - *data_col_ptr = - (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? - data_im_ptr[i * dilation_h * width + j * dilation_w] : 0; - data_col_ptr += height_col * width_col; - } - } - } -} - - -void im2col_gpu_ext(const float* data_im, const int channels, - const int height, const int width, const int kernel_h, const int kernel_w, - const int pad_h, const int pad_w, - const int stride_h, const int stride_w, - const int dilation_h, const int dilation_w, - float* data_col) -{ - // We are going to launch channels * height_col * width_col kernels, each - // kernel responsible for copying a single-channel grid. - int height_col = (height + 2 * pad_h - - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; - int width_col = (width + 2 * pad_w - - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; - int num_kernels = channels * height_col * width_col; - // NOLINT_NEXT_LINE(whitespace/operators) - im2col_gpu_kernel_ext << > >( - num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h, - pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col, - width_col, data_col); - - CHECK_CUDA(cudaPeekAtLastError()); -} \ No newline at end of file diff --git a/src/Detector/darknet/src/image.c b/src/Detector/darknet/src/image.c deleted file mode 100644 index 07942c71e..000000000 --- a/src/Detector/darknet/src/image.c +++ /dev/null @@ -1,1706 +0,0 @@ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include "image.h" -#include "utils.h" -#include "blas.h" -#include "dark_cuda.h" -#include -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include - -#ifndef STB_IMAGE_IMPLEMENTATION -#define STB_IMAGE_IMPLEMENTATION -#include "stb_image.h" -#endif -#ifndef STB_IMAGE_WRITE_IMPLEMENTATION -#define STB_IMAGE_WRITE_IMPLEMENTATION -#include "stb_image_write.h" -#endif - -extern int check_mistakes; -//int windows = 0; - -float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; - -float get_color(int c, int x, int max) -{ - float ratio = ((float)x/max)*5; - int i = floor(ratio); - int j = ceil(ratio); - ratio -= i; - float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; - //printf("%f\n", r); - return r; -} - -static float get_pixel(image m, int x, int y, int c) -{ - assert(x < m.w && y < m.h && c < m.c); - return m.data[c*m.h*m.w + y*m.w + x]; -} -static float get_pixel_extend(image m, int x, int y, int c) -{ - if (x < 0 || x >= m.w || y < 0 || y >= m.h) return 0; - /* - if(x < 0) x = 0; - if(x >= m.w) x = m.w-1; - if(y < 0) y = 0; - if(y >= m.h) y = m.h-1; - */ - if (c < 0 || c >= m.c) return 0; - return get_pixel(m, x, y, c); -} -static void set_pixel(image m, int x, int y, int c, float val) -{ - if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return; - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] = val; -} -static void add_pixel(image m, int x, int y, int c, float val) -{ - assert(x < m.w && y < m.h && c < m.c); - m.data[c*m.h*m.w + y*m.w + x] += val; -} - -void composite_image(image source, image dest, int dx, int dy) -{ - int x,y,k; - for(k = 0; k < source.c; ++k){ - for(y = 0; y < source.h; ++y){ - for(x = 0; x < source.w; ++x){ - float val = get_pixel(source, x, y, k); - float val2 = get_pixel_extend(dest, dx+x, dy+y, k); - set_pixel(dest, dx+x, dy+y, k, val * val2); - } - } - } -} - -image border_image(image a, int border) -{ - image b = make_image(a.w + 2*border, a.h + 2*border, a.c); - int x,y,k; - for(k = 0; k < b.c; ++k){ - for(y = 0; y < b.h; ++y){ - for(x = 0; x < b.w; ++x){ - float val = get_pixel_extend(a, x - border, y - border, k); - if(x - border < 0 || x - border >= a.w || y - border < 0 || y - border >= a.h) val = 1; - set_pixel(b, x, y, k, val); - } - } - } - return b; -} - -image tile_images(image a, image b, int dx) -{ - if(a.w == 0) return copy_image(b); - image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c); - fill_cpu(c.w*c.h*c.c, 1, c.data, 1); - embed_image(a, c, 0, 0); - composite_image(b, c, a.w + dx, 0); - return c; -} - -image get_label(image **characters, char *string, int size) -{ - if(size > 7) size = 7; - image label = make_empty_image(0,0,0); - while(*string){ - image l = characters[size][(int)*string]; - image n = tile_images(label, l, -size - 1 + (size+1)/2); - free_image(label); - label = n; - ++string; - } - image b = border_image(label, label.h*.25); - free_image(label); - return b; -} - -image get_label_v3(image **characters, char *string, int size) -{ - size = size / 10; - if (size > 7) size = 7; - image label = make_empty_image(0, 0, 0); - while (*string) { - image l = characters[size][(int)*string]; - image n = tile_images(label, l, -size - 1 + (size + 1) / 2); - free_image(label); - label = n; - ++string; - } - image b = border_image(label, label.h*.05); - free_image(label); - return b; -} - -void draw_label(image a, int r, int c, image label, const float *rgb) -{ - int w = label.w; - int h = label.h; - if (r - h >= 0) r = r - h; - - int i, j, k; - for(j = 0; j < h && j + r < a.h; ++j){ - for(i = 0; i < w && i + c < a.w; ++i){ - for(k = 0; k < label.c; ++k){ - float val = get_pixel(label, i, j, k); - set_pixel(a, i+c, j+r, k, rgb[k] * val); - } - } - } -} - -void draw_weighted_label(image a, int r, int c, image label, const float *rgb, const float alpha) -{ - int w = label.w; - int h = label.h; - if (r - h >= 0) r = r - h; - - int i, j, k; - for (j = 0; j < h && j + r < a.h; ++j) { - for (i = 0; i < w && i + c < a.w; ++i) { - for (k = 0; k < label.c; ++k) { - float val1 = get_pixel(label, i, j, k); - float val2 = get_pixel(a, i + c, j + r, k); - float val_dst = val1 * rgb[k] * alpha + val2 * (1 - alpha); - set_pixel(a, i + c, j + r, k, val_dst); - } - } - } -} - -void draw_box_bw(image a, int x1, int y1, int x2, int y2, float brightness) -{ - //normalize_image(a); - int i; - if (x1 < 0) x1 = 0; - if (x1 >= a.w) x1 = a.w - 1; - if (x2 < 0) x2 = 0; - if (x2 >= a.w) x2 = a.w - 1; - - if (y1 < 0) y1 = 0; - if (y1 >= a.h) y1 = a.h - 1; - if (y2 < 0) y2 = 0; - if (y2 >= a.h) y2 = a.h - 1; - - for (i = x1; i <= x2; ++i) { - a.data[i + y1*a.w + 0 * a.w*a.h] = brightness; - a.data[i + y2*a.w + 0 * a.w*a.h] = brightness; - } - for (i = y1; i <= y2; ++i) { - a.data[x1 + i*a.w + 0 * a.w*a.h] = brightness; - a.data[x2 + i*a.w + 0 * a.w*a.h] = brightness; - } -} - -void draw_box_width_bw(image a, int x1, int y1, int x2, int y2, int w, float brightness) -{ - int i; - for (i = 0; i < w; ++i) { - float alternate_color = (w % 2) ? (brightness) : (1.0 - brightness); - draw_box_bw(a, x1 + i, y1 + i, x2 - i, y2 - i, alternate_color); - } -} - -void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) -{ - //normalize_image(a); - int i; - if(x1 < 0) x1 = 0; - if(x1 >= a.w) x1 = a.w-1; - if(x2 < 0) x2 = 0; - if(x2 >= a.w) x2 = a.w-1; - - if(y1 < 0) y1 = 0; - if(y1 >= a.h) y1 = a.h-1; - if(y2 < 0) y2 = 0; - if(y2 >= a.h) y2 = a.h-1; - - for(i = x1; i <= x2; ++i){ - a.data[i + y1*a.w + 0*a.w*a.h] = r; - a.data[i + y2*a.w + 0*a.w*a.h] = r; - - a.data[i + y1*a.w + 1*a.w*a.h] = g; - a.data[i + y2*a.w + 1*a.w*a.h] = g; - - a.data[i + y1*a.w + 2*a.w*a.h] = b; - a.data[i + y2*a.w + 2*a.w*a.h] = b; - } - for(i = y1; i <= y2; ++i){ - a.data[x1 + i*a.w + 0*a.w*a.h] = r; - a.data[x2 + i*a.w + 0*a.w*a.h] = r; - - a.data[x1 + i*a.w + 1*a.w*a.h] = g; - a.data[x2 + i*a.w + 1*a.w*a.h] = g; - - a.data[x1 + i*a.w + 2*a.w*a.h] = b; - a.data[x2 + i*a.w + 2*a.w*a.h] = b; - } -} - -void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b) -{ - int i; - for(i = 0; i < w; ++i){ - draw_box(a, x1+i, y1+i, x2-i, y2-i, r, g, b); - } -} - -void draw_bbox(image a, box bbox, int w, float r, float g, float b) -{ - int left = (bbox.x-bbox.w/2)*a.w; - int right = (bbox.x+bbox.w/2)*a.w; - int top = (bbox.y-bbox.h/2)*a.h; - int bot = (bbox.y+bbox.h/2)*a.h; - - int i; - for(i = 0; i < w; ++i){ - draw_box(a, left+i, top+i, right-i, bot-i, r, g, b); - } -} - -image **load_alphabet() -{ - int i, j; - const int nsize = 8; - image** alphabets = (image**)xcalloc(nsize, sizeof(image*)); - for(j = 0; j < nsize; ++j){ - alphabets[j] = (image*)xcalloc(128, sizeof(image)); - for(i = 32; i < 127; ++i){ - char buff[256]; - sprintf(buff, "data/labels/%d_%d.png", i, j); - alphabets[j][i] = load_image_color(buff, 0, 0); - } - } - return alphabets; -} - - - -// Creates array of detections with prob > thresh and fills best_class for them -detection_with_class* get_actual_detections(detection *dets, int dets_num, float thresh, int* selected_detections_num, char **names) -{ - int selected_num = 0; - detection_with_class* result_arr = (detection_with_class*)xcalloc(dets_num, sizeof(detection_with_class)); - int i; - for (i = 0; i < dets_num; ++i) { - int best_class = -1; - float best_class_prob = thresh; - int j; - for (j = 0; j < dets[i].classes; ++j) { - int show = strncmp(names[j], "dont_show", 9); - if (dets[i].prob[j] > best_class_prob && show) { - best_class = j; - best_class_prob = dets[i].prob[j]; - } - } - if (best_class >= 0) { - result_arr[selected_num].det = dets[i]; - result_arr[selected_num].best_class = best_class; - ++selected_num; - } - } - if (selected_detections_num) - *selected_detections_num = selected_num; - return result_arr; -} - -// compare to sort detection** by bbox.x -int compare_by_lefts(const void *a_ptr, const void *b_ptr) { - const detection_with_class* a = (detection_with_class*)a_ptr; - const detection_with_class* b = (detection_with_class*)b_ptr; - const float delta = (a->det.bbox.x - a->det.bbox.w/2) - (b->det.bbox.x - b->det.bbox.w/2); - return delta < 0 ? -1 : delta > 0 ? 1 : 0; -} - -// compare to sort detection** by best_class probability -int compare_by_probs(const void *a_ptr, const void *b_ptr) { - const detection_with_class* a = (detection_with_class*)a_ptr; - const detection_with_class* b = (detection_with_class*)b_ptr; - float delta = a->det.prob[a->best_class] - b->det.prob[b->best_class]; - return delta < 0 ? -1 : delta > 0 ? 1 : 0; -} - -void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) -{ - static int frame_id = 0; - frame_id++; - - int selected_detections_num; - detection_with_class* selected_detections = get_actual_detections(dets, num, thresh, &selected_detections_num, names); - - // text output - qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_lefts); - int i; - for (i = 0; i < selected_detections_num; ++i) { - const int best_class = selected_detections[i].best_class; - printf("%s: %.0f%%", names[best_class], selected_detections[i].det.prob[best_class] * 100); - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - round((selected_detections[i].det.bbox.x - selected_detections[i].det.bbox.w / 2)*im.w), - round((selected_detections[i].det.bbox.y - selected_detections[i].det.bbox.h / 2)*im.h), - round(selected_detections[i].det.bbox.w*im.w), round(selected_detections[i].det.bbox.h*im.h)); - else - printf("\n"); - int j; - for (j = 0; j < classes; ++j) { - if (selected_detections[i].det.prob[j] > thresh && j != best_class) { - printf("%s: %.0f%%", names[j], selected_detections[i].det.prob[j] * 100); - - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - round((selected_detections[i].det.bbox.x - selected_detections[i].det.bbox.w / 2)*im.w), - round((selected_detections[i].det.bbox.y - selected_detections[i].det.bbox.h / 2)*im.h), - round(selected_detections[i].det.bbox.w*im.w), round(selected_detections[i].det.bbox.h*im.h)); - else - printf("\n"); - } - } - } - - // image output - qsort(selected_detections, selected_detections_num, sizeof(*selected_detections), compare_by_probs); - for (i = 0; i < selected_detections_num; ++i) { - int width = im.h * .002; - if (width < 1) - width = 1; - - /* - if(0){ - width = pow(prob, 1./2.)*10+1; - alphabet = 0; - } - */ - - //printf("%d %s: %.0f%%\n", i, names[selected_detections[i].best_class], prob*100); - int offset = selected_detections[i].best_class * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; - - //width = prob*20+2; - - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = selected_detections[i].det.bbox; - //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - - int left = (b.x - b.w / 2.)*im.w; - int right = (b.x + b.w / 2.)*im.w; - int top = (b.y - b.h / 2.)*im.h; - int bot = (b.y + b.h / 2.)*im.h; - - if (left < 0) left = 0; - if (right > im.w - 1) right = im.w - 1; - if (top < 0) top = 0; - if (bot > im.h - 1) bot = im.h - 1; - - //int b_x_center = (left + right) / 2; - //int b_y_center = (top + bot) / 2; - //int b_width = right - left; - //int b_height = bot - top; - //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); - - // you should create directory: result_img - //static int copied_frame_id = -1; - //static image copy_img; - //if (copied_frame_id != frame_id) { - // copied_frame_id = frame_id; - // if (copy_img.data) free_image(copy_img); - // copy_img = copy_image(im); - //} - //image cropped_im = crop_image(copy_img, left, top, right - left, bot - top); - //static int img_id = 0; - //img_id++; - //char image_name[1024]; - //int best_class_id = selected_detections[i].best_class; - //sprintf(image_name, "result_img/img_%d_%d_%d_%s.jpg", frame_id, img_id, best_class_id, names[best_class_id]); - //save_image(cropped_im, image_name); - //free_image(cropped_im); - - if (im.c == 1) { - draw_box_width_bw(im, left, top, right, bot, width, 0.8); // 1 channel Black-White - } - else { - draw_box_width(im, left, top, right, bot, width, red, green, blue); // 3 channels RGB - } - if (alphabet) { - char labelstr[4096] = { 0 }; - strcat(labelstr, names[selected_detections[i].best_class]); - char prob_str[10]; - sprintf(prob_str, ": %.2f", selected_detections[i].det.prob[selected_detections[i].best_class]); - strcat(labelstr, prob_str); - int j; - for (j = 0; j < classes; ++j) { - if (selected_detections[i].det.prob[j] > thresh && j != selected_detections[i].best_class) { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - } - } - image label = get_label_v3(alphabet, labelstr, (im.h*.02)); - //draw_label(im, top + width, left, label, rgb); - draw_weighted_label(im, top + width, left, label, rgb, 0.7); - free_image(label); - } - if (selected_detections[i].det.mask) { - image mask = float_to_image(14, 14, 1, selected_detections[i].det.mask); - image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); - image tmask = threshold_image(resized_mask, .5); - embed_image(tmask, im, left, top); - free_image(mask); - free_image(resized_mask); - free_image(tmask); - } - } - free(selected_detections); -} - -void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) -{ - int i; - - for(i = 0; i < num; ++i){ - int class_id = max_index(probs[i], classes); - float prob = probs[i][class_id]; - if(prob > thresh){ - - //// for comparison with OpenCV version of DNN Darknet Yolo v2 - //printf("\n %f, %f, %f, %f, ", boxes[i].x, boxes[i].y, boxes[i].w, boxes[i].h); - // int k; - //for (k = 0; k < classes; ++k) { - // printf("%f, ", probs[i][k]); - //} - //printf("\n"); - - int width = im.h * .012; - - if(0){ - width = pow(prob, 1./2.)*10+1; - alphabet = 0; - } - - int offset = class_id*123457 % classes; - float red = get_color(2,offset,classes); - float green = get_color(1,offset,classes); - float blue = get_color(0,offset,classes); - float rgb[3]; - - //width = prob*20+2; - - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = boxes[i]; - - int left = (b.x-b.w/2.)*im.w; - int right = (b.x+b.w/2.)*im.w; - int top = (b.y-b.h/2.)*im.h; - int bot = (b.y+b.h/2.)*im.h; - - if(left < 0) left = 0; - if(right > im.w-1) right = im.w-1; - if(top < 0) top = 0; - if(bot > im.h-1) bot = im.h-1; - printf("%s: %.0f%%", names[class_id], prob * 100); - - //printf(" - id: %d, x_center: %d, y_center: %d, width: %d, height: %d", - // class_id, (right + left) / 2, (bot - top) / 2, right - left, bot - top); - - printf("\n"); - draw_box_width(im, left, top, right, bot, width, red, green, blue); - if (alphabet) { - image label = get_label(alphabet, names[class_id], (im.h*.03)/10); - draw_label(im, top + width, left, label, rgb); - } - } - } -} - -void transpose_image(image im) -{ - assert(im.w == im.h); - int n, m; - int c; - for(c = 0; c < im.c; ++c){ - for(n = 0; n < im.w-1; ++n){ - for(m = n + 1; m < im.w; ++m){ - float swap = im.data[m + im.w*(n + im.h*c)]; - im.data[m + im.w*(n + im.h*c)] = im.data[n + im.w*(m + im.h*c)]; - im.data[n + im.w*(m + im.h*c)] = swap; - } - } - } -} - -void rotate_image_cw(image im, int times) -{ - assert(im.w == im.h); - times = (times + 400) % 4; - int i, x, y, c; - int n = im.w; - for(i = 0; i < times; ++i){ - for(c = 0; c < im.c; ++c){ - for(x = 0; x < n/2; ++x){ - for(y = 0; y < (n-1)/2 + 1; ++y){ - float temp = im.data[y + im.w*(x + im.h*c)]; - im.data[y + im.w*(x + im.h*c)] = im.data[n-1-x + im.w*(y + im.h*c)]; - im.data[n-1-x + im.w*(y + im.h*c)] = im.data[n-1-y + im.w*(n-1-x + im.h*c)]; - im.data[n-1-y + im.w*(n-1-x + im.h*c)] = im.data[x + im.w*(n-1-y + im.h*c)]; - im.data[x + im.w*(n-1-y + im.h*c)] = temp; - } - } - } - } -} - -void flip_image(image a) -{ - int i,j,k; - for(k = 0; k < a.c; ++k){ - for(i = 0; i < a.h; ++i){ - for(j = 0; j < a.w/2; ++j){ - int index = j + a.w*(i + a.h*(k)); - int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); - float swap = a.data[flip]; - a.data[flip] = a.data[index]; - a.data[index] = swap; - } - } - } -} - -image image_distance(image a, image b) -{ - int i,j; - image dist = make_image(a.w, a.h, 1); - for(i = 0; i < a.c; ++i){ - for(j = 0; j < a.h*a.w; ++j){ - dist.data[j] += pow(a.data[i*a.h*a.w+j]-b.data[i*a.h*a.w+j],2); - } - } - for(j = 0; j < a.h*a.w; ++j){ - dist.data[j] = sqrt(dist.data[j]); - } - return dist; -} - -void embed_image(image source, image dest, int dx, int dy) -{ - int x,y,k; - for(k = 0; k < source.c; ++k){ - for(y = 0; y < source.h; ++y){ - for(x = 0; x < source.w; ++x){ - float val = get_pixel(source, x,y,k); - set_pixel(dest, dx+x, dy+y, k, val); - } - } - } -} - -image collapse_image_layers(image source, int border) -{ - int h = source.h; - h = (h+border)*source.c - border; - image dest = make_image(source.w, h, 1); - int i; - for(i = 0; i < source.c; ++i){ - image layer = get_image_layer(source, i); - int h_offset = i*(source.h+border); - embed_image(layer, dest, 0, h_offset); - free_image(layer); - } - return dest; -} - -void constrain_image(image im) -{ - int i; - for(i = 0; i < im.w*im.h*im.c; ++i){ - if(im.data[i] < 0) im.data[i] = 0; - if(im.data[i] > 1) im.data[i] = 1; - } -} - -void normalize_image(image p) -{ - int i; - float min = 9999999; - float max = -999999; - - for(i = 0; i < p.h*p.w*p.c; ++i){ - float v = p.data[i]; - if(v < min) min = v; - if(v > max) max = v; - } - if(max - min < .000000001){ - min = 0; - max = 1; - } - for(i = 0; i < p.c*p.w*p.h; ++i){ - p.data[i] = (p.data[i] - min)/(max-min); - } -} - -void normalize_image2(image p) -{ - float* min = (float*)xcalloc(p.c, sizeof(float)); - float* max = (float*)xcalloc(p.c, sizeof(float)); - int i,j; - for(i = 0; i < p.c; ++i) min[i] = max[i] = p.data[i*p.h*p.w]; - - for(j = 0; j < p.c; ++j){ - for(i = 0; i < p.h*p.w; ++i){ - float v = p.data[i+j*p.h*p.w]; - if(v < min[j]) min[j] = v; - if(v > max[j]) max[j] = v; - } - } - for(i = 0; i < p.c; ++i){ - if(max[i] - min[i] < .000000001){ - min[i] = 0; - max[i] = 1; - } - } - for(j = 0; j < p.c; ++j){ - for(i = 0; i < p.w*p.h; ++i){ - p.data[i+j*p.h*p.w] = (p.data[i+j*p.h*p.w] - min[j])/(max[j]-min[j]); - } - } - free(min); - free(max); -} - -void copy_image_inplace(image src, image dst) -{ - memcpy(dst.data, src.data, src.h*src.w*src.c * sizeof(float)); -} - -image copy_image(image p) -{ - image copy = p; - copy.data = (float*)xcalloc(p.h * p.w * p.c, sizeof(float)); - memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float)); - return copy; -} - -void rgbgr_image(image im) -{ - int i; - for(i = 0; i < im.w*im.h; ++i){ - float swap = im.data[i]; - im.data[i] = im.data[i+im.w*im.h*2]; - im.data[i+im.w*im.h*2] = swap; - } -} - -void show_image(image p, const char *name) -{ -#ifdef OPENCV - show_image_cv(p, name); -#else - fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name); - save_image(p, name); -#endif // OPENCV -} - -void save_image_png(image im, const char *name) -{ - char buff[256]; - //sprintf(buff, "%s (%d)", name, windows); - sprintf(buff, "%s.png", name); - unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char)); - int i,k; - for(k = 0; k < im.c; ++k){ - for(i = 0; i < im.w*im.h; ++i){ - data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]); - } - } - int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); - free(data); - if(!success) fprintf(stderr, "Failed to write image %s\n", buff); -} - -void save_image_options(image im, const char *name, IMTYPE f, int quality) -{ - char buff[256]; - //sprintf(buff, "%s (%d)", name, windows); - if (f == PNG) sprintf(buff, "%s.png", name); - else if (f == BMP) sprintf(buff, "%s.bmp", name); - else if (f == TGA) sprintf(buff, "%s.tga", name); - else if (f == JPG) sprintf(buff, "%s.jpg", name); - else sprintf(buff, "%s.png", name); - unsigned char* data = (unsigned char*)xcalloc(im.w * im.h * im.c, sizeof(unsigned char)); - int i, k; - for (k = 0; k < im.c; ++k) { - for (i = 0; i < im.w*im.h; ++i) { - data[i*im.c + k] = (unsigned char)(255 * im.data[i + k*im.w*im.h]); - } - } - int success = 0; - if (f == PNG) success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c); - else if (f == BMP) success = stbi_write_bmp(buff, im.w, im.h, im.c, data); - else if (f == TGA) success = stbi_write_tga(buff, im.w, im.h, im.c, data); - else if (f == JPG) success = stbi_write_jpg(buff, im.w, im.h, im.c, data, quality); - free(data); - if (!success) fprintf(stderr, "Failed to write image %s\n", buff); -} - -void save_image(image im, const char *name) -{ - save_image_options(im, name, JPG, 80); -} - -void save_image_jpg(image p, const char *name) -{ - save_image_options(p, name, JPG, 80); -} - -void show_image_layers(image p, char *name) -{ - int i; - char buff[256]; - for(i = 0; i < p.c; ++i){ - sprintf(buff, "%s - Layer %d", name, i); - image layer = get_image_layer(p, i); - show_image(layer, buff); - free_image(layer); - } -} - -void show_image_collapsed(image p, char *name) -{ - image c = collapse_image_layers(p, 1); - show_image(c, name); - free_image(c); -} - -image make_empty_image(int w, int h, int c) -{ - image out; - out.data = 0; - out.h = h; - out.w = w; - out.c = c; - return out; -} - -image make_image(int w, int h, int c) -{ - image out = make_empty_image(w,h,c); - out.data = (float*)xcalloc(h * w * c, sizeof(float)); - return out; -} - -image make_random_image(int w, int h, int c) -{ - image out = make_empty_image(w,h,c); - out.data = (float*)xcalloc(h * w * c, sizeof(float)); - int i; - for(i = 0; i < w*h*c; ++i){ - out.data[i] = (rand_normal() * .25) + .5; - } - return out; -} - -image float_to_image_scaled(int w, int h, int c, float *data) -{ - image out = make_image(w, h, c); - int abs_max = 0; - int i = 0; - for (i = 0; i < w*h*c; ++i) { - if (fabs(data[i]) > abs_max) abs_max = fabs(data[i]); - } - for (i = 0; i < w*h*c; ++i) { - out.data[i] = data[i] / abs_max; - } - return out; -} - -image float_to_image(int w, int h, int c, float *data) -{ - image out = make_empty_image(w,h,c); - out.data = data; - return out; -} - - -image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) -{ - int x, y, c; - float cx = im.w/2.; - float cy = im.h/2.; - image rot = make_image(w, h, im.c); - for(c = 0; c < im.c; ++c){ - for(y = 0; y < h; ++y){ - for(x = 0; x < w; ++x){ - float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; - float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; - float val = bilinear_interpolate(im, rx, ry, c); - set_pixel(rot, x, y, c, val); - } - } - } - return rot; -} - -image rotate_image(image im, float rad) -{ - int x, y, c; - float cx = im.w/2.; - float cy = im.h/2.; - image rot = make_image(im.w, im.h, im.c); - for(c = 0; c < im.c; ++c){ - for(y = 0; y < im.h; ++y){ - for(x = 0; x < im.w; ++x){ - float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx; - float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy; - float val = bilinear_interpolate(im, rx, ry, c); - set_pixel(rot, x, y, c, val); - } - } - } - return rot; -} - -void translate_image(image m, float s) -{ - int i; - for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s; -} - -void scale_image(image m, float s) -{ - int i; - for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s; -} - -image crop_image(image im, int dx, int dy, int w, int h) -{ - image cropped = make_image(w, h, im.c); - int i, j, k; - for(k = 0; k < im.c; ++k){ - for(j = 0; j < h; ++j){ - for(i = 0; i < w; ++i){ - int r = j + dy; - int c = i + dx; - float val = 0; - r = constrain_int(r, 0, im.h-1); - c = constrain_int(c, 0, im.w-1); - if (r >= 0 && r < im.h && c >= 0 && c < im.w) { - val = get_pixel(im, c, r, k); - } - set_pixel(cropped, i, j, k, val); - } - } - } - return cropped; -} - -int best_3d_shift_r(image a, image b, int min, int max) -{ - if(min == max) return min; - int mid = floor((min + max) / 2.); - image c1 = crop_image(b, 0, mid, b.w, b.h); - image c2 = crop_image(b, 0, mid+1, b.w, b.h); - float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 10); - float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 10); - free_image(c1); - free_image(c2); - if(d1 < d2) return best_3d_shift_r(a, b, min, mid); - else return best_3d_shift_r(a, b, mid+1, max); -} - -int best_3d_shift(image a, image b, int min, int max) -{ - int i; - int best = 0; - float best_distance = FLT_MAX; - for(i = min; i <= max; i += 2){ - image c = crop_image(b, 0, i, b.w, b.h); - float d = dist_array(c.data, a.data, a.w*a.h*a.c, 100); - if(d < best_distance){ - best_distance = d; - best = i; - } - printf("%d %f\n", i, d); - free_image(c); - } - return best; -} - -void composite_3d(char *f1, char *f2, char *out, int delta) -{ - if(!out) out = "out"; - image a = load_image(f1, 0,0,0); - image b = load_image(f2, 0,0,0); - int shift = best_3d_shift_r(a, b, -a.h/100, a.h/100); - - image c1 = crop_image(b, 10, shift, b.w, b.h); - float d1 = dist_array(c1.data, a.data, a.w*a.h*a.c, 100); - image c2 = crop_image(b, -10, shift, b.w, b.h); - float d2 = dist_array(c2.data, a.data, a.w*a.h*a.c, 100); - - if(d2 < d1 && 0){ - image swap = a; - a = b; - b = swap; - shift = -shift; - printf("swapped, %d\n", shift); - } - else{ - printf("%d\n", shift); - } - - image c = crop_image(b, delta, shift, a.w, a.h); - int i; - for(i = 0; i < c.w*c.h; ++i){ - c.data[i] = a.data[i]; - } -#ifdef OPENCV - save_image_jpg(c, out); -#else - save_image(c, out); -#endif -} - -void fill_image(image m, float s) -{ - int i; - for (i = 0; i < m.h*m.w*m.c; ++i) m.data[i] = s; -} - -void letterbox_image_into(image im, int w, int h, image boxed) -{ - int new_w = im.w; - int new_h = im.h; - if (((float)w / im.w) < ((float)h / im.h)) { - new_w = w; - new_h = (im.h * w) / im.w; - } - else { - new_h = h; - new_w = (im.w * h) / im.h; - } - image resized = resize_image(im, new_w, new_h); - embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); - free_image(resized); -} - -image letterbox_image(image im, int w, int h) -{ - int new_w = im.w; - int new_h = im.h; - if (((float)w / im.w) < ((float)h / im.h)) { - new_w = w; - new_h = (im.h * w) / im.w; - } - else { - new_h = h; - new_w = (im.w * h) / im.h; - } - image resized = resize_image(im, new_w, new_h); - image boxed = make_image(w, h, im.c); - fill_image(boxed, .5); - //int i; - //for(i = 0; i < boxed.w*boxed.h*boxed.c; ++i) boxed.data[i] = 0; - embed_image(resized, boxed, (w - new_w) / 2, (h - new_h) / 2); - free_image(resized); - return boxed; -} - -image resize_max(image im, int max) -{ - int w = im.w; - int h = im.h; - if(w > h){ - h = (h * max) / w; - w = max; - } else { - w = (w * max) / h; - h = max; - } - if(w == im.w && h == im.h) return im; - image resized = resize_image(im, w, h); - return resized; -} - -image resize_min(image im, int min) -{ - int w = im.w; - int h = im.h; - if(w < h){ - h = (h * min) / w; - w = min; - } else { - w = (w * min) / h; - h = min; - } - if(w == im.w && h == im.h) return im; - image resized = resize_image(im, w, h); - return resized; -} - -image random_crop_image(image im, int w, int h) -{ - int dx = rand_int(0, im.w - w); - int dy = rand_int(0, im.h - h); - image crop = crop_image(im, dx, dy, w, h); - return crop; -} - -image random_augment_image(image im, float angle, float aspect, int low, int high, int size) -{ - aspect = rand_scale(aspect); - int r = rand_int(low, high); - int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; - float scale = (float)r / min; - - float rad = rand_uniform(-angle, angle) * 2.0 * M_PI / 360.; - - float dx = (im.w*scale/aspect - size) / 2.; - float dy = (im.h*scale - size) / 2.; - if(dx < 0) dx = 0; - if(dy < 0) dy = 0; - dx = rand_uniform(-dx, dx); - dy = rand_uniform(-dy, dy); - - image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect); - - return crop; -} - -float three_way_max(float a, float b, float c) -{ - return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ; -} - -float three_way_min(float a, float b, float c) -{ - return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ; -} - -// http://www.cs.rit.edu/~ncs/color/t_convert.html -void rgb_to_hsv(image im) -{ - assert(im.c == 3); - int i, j; - float r, g, b; - float h, s, v; - for(j = 0; j < im.h; ++j){ - for(i = 0; i < im.w; ++i){ - r = get_pixel(im, i , j, 0); - g = get_pixel(im, i , j, 1); - b = get_pixel(im, i , j, 2); - float max = three_way_max(r,g,b); - float min = three_way_min(r,g,b); - float delta = max - min; - v = max; - if(max == 0){ - s = 0; - h = 0; - }else{ - s = delta/max; - if(r == max){ - h = (g - b) / delta; - } else if (g == max) { - h = 2 + (b - r) / delta; - } else { - h = 4 + (r - g) / delta; - } - if (h < 0) h += 6; - h = h/6.; - } - set_pixel(im, i, j, 0, h); - set_pixel(im, i, j, 1, s); - set_pixel(im, i, j, 2, v); - } - } -} - -void hsv_to_rgb(image im) -{ - assert(im.c == 3); - int i, j; - float r, g, b; - float h, s, v; - float f, p, q, t; - for(j = 0; j < im.h; ++j){ - for(i = 0; i < im.w; ++i){ - h = 6 * get_pixel(im, i , j, 0); - s = get_pixel(im, i , j, 1); - v = get_pixel(im, i , j, 2); - if (s == 0) { - r = g = b = v; - } else { - int index = floor(h); - f = h - index; - p = v*(1-s); - q = v*(1-s*f); - t = v*(1-s*(1-f)); - if(index == 0){ - r = v; g = t; b = p; - } else if(index == 1){ - r = q; g = v; b = p; - } else if(index == 2){ - r = p; g = v; b = t; - } else if(index == 3){ - r = p; g = q; b = v; - } else if(index == 4){ - r = t; g = p; b = v; - } else { - r = v; g = p; b = q; - } - } - set_pixel(im, i, j, 0, r); - set_pixel(im, i, j, 1, g); - set_pixel(im, i, j, 2, b); - } - } -} - -image grayscale_image(image im) -{ - assert(im.c == 3); - int i, j, k; - image gray = make_image(im.w, im.h, 1); - float scale[] = {0.587, 0.299, 0.114}; - for(k = 0; k < im.c; ++k){ - for(j = 0; j < im.h; ++j){ - for(i = 0; i < im.w; ++i){ - gray.data[i+im.w*j] += scale[k]*get_pixel(im, i, j, k); - } - } - } - return gray; -} - -image threshold_image(image im, float thresh) -{ - int i; - image t = make_image(im.w, im.h, im.c); - for(i = 0; i < im.w*im.h*im.c; ++i){ - t.data[i] = im.data[i]>thresh ? 1 : 0; - } - return t; -} - -image blend_image(image fore, image back, float alpha) -{ - assert(fore.w == back.w && fore.h == back.h && fore.c == back.c); - image blend = make_image(fore.w, fore.h, fore.c); - int i, j, k; - for(k = 0; k < fore.c; ++k){ - for(j = 0; j < fore.h; ++j){ - for(i = 0; i < fore.w; ++i){ - float val = alpha * get_pixel(fore, i, j, k) + - (1 - alpha)* get_pixel(back, i, j, k); - set_pixel(blend, i, j, k, val); - } - } - } - return blend; -} - -void scale_image_channel(image im, int c, float v) -{ - int i, j; - for(j = 0; j < im.h; ++j){ - for(i = 0; i < im.w; ++i){ - float pix = get_pixel(im, i, j, c); - pix = pix*v; - set_pixel(im, i, j, c, pix); - } - } -} - -void translate_image_channel(image im, int c, float v) -{ - int i, j; - for(j = 0; j < im.h; ++j){ - for(i = 0; i < im.w; ++i){ - float pix = get_pixel(im, i, j, c); - pix = pix+v; - set_pixel(im, i, j, c, pix); - } - } -} - -image binarize_image(image im) -{ - image c = copy_image(im); - int i; - for(i = 0; i < im.w * im.h * im.c; ++i){ - if(c.data[i] > .5) c.data[i] = 1; - else c.data[i] = 0; - } - return c; -} - -void saturate_image(image im, float sat) -{ - rgb_to_hsv(im); - scale_image_channel(im, 1, sat); - hsv_to_rgb(im); - constrain_image(im); -} - -void hue_image(image im, float hue) -{ - rgb_to_hsv(im); - int i; - for(i = 0; i < im.w*im.h; ++i){ - im.data[i] = im.data[i] + hue; - if (im.data[i] > 1) im.data[i] -= 1; - if (im.data[i] < 0) im.data[i] += 1; - } - hsv_to_rgb(im); - constrain_image(im); -} - -void exposure_image(image im, float sat) -{ - rgb_to_hsv(im); - scale_image_channel(im, 2, sat); - hsv_to_rgb(im); - constrain_image(im); -} - -void distort_image(image im, float hue, float sat, float val) -{ - if (im.c >= 3) - { - rgb_to_hsv(im); - scale_image_channel(im, 1, sat); - scale_image_channel(im, 2, val); - int i; - for(i = 0; i < im.w*im.h; ++i){ - im.data[i] = im.data[i] + hue; - if (im.data[i] > 1) im.data[i] -= 1; - if (im.data[i] < 0) im.data[i] += 1; - } - hsv_to_rgb(im); - } - else - { - scale_image_channel(im, 0, val); - } - constrain_image(im); -} - -void random_distort_image(image im, float hue, float saturation, float exposure) -{ - float dhue = rand_uniform_strong(-hue, hue); - float dsat = rand_scale(saturation); - float dexp = rand_scale(exposure); - distort_image(im, dhue, dsat, dexp); -} - -void saturate_exposure_image(image im, float sat, float exposure) -{ - rgb_to_hsv(im); - scale_image_channel(im, 1, sat); - scale_image_channel(im, 2, exposure); - hsv_to_rgb(im); - constrain_image(im); -} - -float bilinear_interpolate(image im, float x, float y, int c) -{ - int ix = (int) floorf(x); - int iy = (int) floorf(y); - - float dx = x - ix; - float dy = y - iy; - - float val = (1-dy) * (1-dx) * get_pixel_extend(im, ix, iy, c) + - dy * (1-dx) * get_pixel_extend(im, ix, iy+1, c) + - (1-dy) * dx * get_pixel_extend(im, ix+1, iy, c) + - dy * dx * get_pixel_extend(im, ix+1, iy+1, c); - return val; -} - -void quantize_image(image im) -{ - int size = im.c * im.w * im.h; - int i; - for (i = 0; i < size; ++i) im.data[i] = (int)(im.data[i] * 255) / 255. + (0.5/255); -} - -void make_image_red(image im) -{ - int r, c, k; - for (r = 0; r < im.h; ++r) { - for (c = 0; c < im.w; ++c) { - float val = 0; - for (k = 0; k < im.c; ++k) { - val += get_pixel(im, c, r, k); - set_pixel(im, c, r, k, 0); - } - for (k = 0; k < im.c; ++k) { - //set_pixel(im, c, r, k, val); - } - set_pixel(im, c, r, 0, val); - } - } -} - -image make_attention_image(int img_size, float *original_delta_cpu, float *original_input_cpu, int w, int h, int c) -{ - image attention_img; - attention_img.w = w; - attention_img.h = h; - attention_img.c = c; - attention_img.data = original_delta_cpu; - make_image_red(attention_img); - - int k; - float min_val = 999999, mean_val = 0, max_val = -999999; - for (k = 0; k < img_size; ++k) { - if (original_delta_cpu[k] < min_val) min_val = original_delta_cpu[k]; - if (original_delta_cpu[k] > max_val) max_val = original_delta_cpu[k]; - mean_val += original_delta_cpu[k]; - } - mean_val = mean_val / img_size; - float range = max_val - min_val; - - for (k = 0; k < img_size; ++k) { - float val = original_delta_cpu[k]; - val = fabs(mean_val - val) / range; - original_delta_cpu[k] = val * 4; - } - - image resized = resize_image(attention_img, w / 4, h / 4); - attention_img = resize_image(resized, w, h); - free_image(resized); - for (k = 0; k < img_size; ++k) attention_img.data[k] += original_input_cpu[k]; - - //normalize_image(attention_img); - //show_image(attention_img, "delta"); - return attention_img; -} - -image resize_image(image im, int w, int h) -{ - if (im.w == w && im.h == h) return copy_image(im); - - image resized = make_image(w, h, im.c); - image part = make_image(w, im.h, im.c); - int r, c, k; - float w_scale = (float)(im.w - 1) / (w - 1); - float h_scale = (float)(im.h - 1) / (h - 1); - for(k = 0; k < im.c; ++k){ - for(r = 0; r < im.h; ++r){ - for(c = 0; c < w; ++c){ - float val = 0; - if(c == w-1 || im.w == 1){ - val = get_pixel(im, im.w-1, r, k); - } else { - float sx = c*w_scale; - int ix = (int) sx; - float dx = sx - ix; - val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k); - } - set_pixel(part, c, r, k, val); - } - } - } - for(k = 0; k < im.c; ++k){ - for(r = 0; r < h; ++r){ - float sy = r*h_scale; - int iy = (int) sy; - float dy = sy - iy; - for(c = 0; c < w; ++c){ - float val = (1-dy) * get_pixel(part, c, iy, k); - set_pixel(resized, c, r, k, val); - } - if(r == h-1 || im.h == 1) continue; - for(c = 0; c < w; ++c){ - float val = dy * get_pixel(part, c, iy+1, k); - add_pixel(resized, c, r, k, val); - } - } - } - - free_image(part); - return resized; -} - - -void test_resize(char *filename) -{ - image im = load_image(filename, 0,0, 3); - float mag = mag_array(im.data, im.w*im.h*im.c); - printf("L2 Norm: %f\n", mag); - image gray = grayscale_image(im); - - image c1 = copy_image(im); - image c2 = copy_image(im); - image c3 = copy_image(im); - image c4 = copy_image(im); - distort_image(c1, .1, 1.5, 1.5); - distort_image(c2, -.1, .66666, .66666); - distort_image(c3, .1, 1.5, .66666); - distort_image(c4, .1, .66666, 1.5); - - - show_image(im, "Original"); - show_image(gray, "Gray"); - show_image(c1, "C1"); - show_image(c2, "C2"); - show_image(c3, "C3"); - show_image(c4, "C4"); - -#ifdef OPENCV - while(1){ - image aug = random_augment_image(im, 0, .75, 320, 448, 320); - show_image(aug, "aug"); - free_image(aug); - - - float exposure = 1.15; - float saturation = 1.15; - float hue = .05; - - image c = copy_image(im); - - float dexp = rand_scale(exposure); - float dsat = rand_scale(saturation); - float dhue = rand_uniform(-hue, hue); - - distort_image(c, dhue, dsat, dexp); - show_image(c, "rand"); - printf("%f %f %f\n", dhue, dsat, dexp); - free_image(c); - wait_until_press_key_cv(); - } -#endif -} - - -image load_image_stb(char *filename, int channels) -{ - int w, h, c; - unsigned char *data = stbi_load(filename, &w, &h, &c, channels); - if (!data) { - char shrinked_filename[1024]; - if (strlen(filename) >= 1024) sprintf(shrinked_filename, "name is too long"); - else sprintf(shrinked_filename, "%s", filename); - fprintf(stderr, "Cannot load image \"%s\"\nSTB Reason: %s\n", shrinked_filename, stbi_failure_reason()); - FILE* fw = fopen("bad.list", "a"); - fwrite(shrinked_filename, sizeof(char), strlen(shrinked_filename), fw); - char *new_line = "\n"; - fwrite(new_line, sizeof(char), strlen(new_line), fw); - fclose(fw); - if (check_mistakes) { - printf("\n Error in load_image_stb() \n"); - getchar(); - } - return make_image(10, 10, 3); - //exit(EXIT_FAILURE); - } - if(channels) c = channels; - int i,j,k; - image im = make_image(w, h, c); - for(k = 0; k < c; ++k){ - for(j = 0; j < h; ++j){ - for(i = 0; i < w; ++i){ - int dst_index = i + w*j + w*h*k; - int src_index = k + c*i + c*w*j; - im.data[dst_index] = (float)data[src_index]/255.; - } - } - } - free(data); - return im; -} - -image load_image_stb_resize(char *filename, int w, int h, int c) -{ - image out = load_image_stb(filename, c); // without OpenCV - - if ((h && w) && (h != out.h || w != out.w)) { - image resized = resize_image(out, w, h); - free_image(out); - out = resized; - } - return out; -} - -image load_image(char *filename, int w, int h, int c) -{ -#ifdef OPENCV - //image out = load_image_stb(filename, c); - image out = load_image_cv(filename, c); -#else - image out = load_image_stb(filename, c); // without OpenCV -#endif // OPENCV - - if((h && w) && (h != out.h || w != out.w)){ - image resized = resize_image(out, w, h); - free_image(out); - out = resized; - } - return out; -} - -image load_image_color(char *filename, int w, int h) -{ - return load_image(filename, w, h, 3); -} - -image get_image_layer(image m, int l) -{ - image out = make_image(m.w, m.h, 1); - int i; - for(i = 0; i < m.h*m.w; ++i){ - out.data[i] = m.data[i+l*m.h*m.w]; - } - return out; -} - -void print_image(image m) -{ - int i, j, k; - for(i =0 ; i < m.c; ++i){ - for(j =0 ; j < m.h; ++j){ - for(k = 0; k < m.w; ++k){ - printf("%.2lf, ", m.data[i*m.h*m.w + j*m.w + k]); - if(k > 30) break; - } - printf("\n"); - if(j > 30) break; - } - printf("\n"); - } - printf("\n"); -} - -image collapse_images_vert(image *ims, int n) -{ - int color = 1; - int border = 1; - int h,w,c; - w = ims[0].w; - h = (ims[0].h + border) * n - border; - c = ims[0].c; - if(c != 3 || !color){ - w = (w+border)*c - border; - c = 1; - } - - image filters = make_image(w, h, c); - int i,j; - for(i = 0; i < n; ++i){ - int h_offset = i*(ims[0].h+border); - image copy = copy_image(ims[i]); - //normalize_image(copy); - if(c == 3 && color){ - embed_image(copy, filters, 0, h_offset); - } - else{ - for(j = 0; j < copy.c; ++j){ - int w_offset = j*(ims[0].w+border); - image layer = get_image_layer(copy, j); - embed_image(layer, filters, w_offset, h_offset); - free_image(layer); - } - } - free_image(copy); - } - return filters; -} - -image collapse_images_horz(image *ims, int n) -{ - int color = 1; - int border = 1; - int h,w,c; - int size = ims[0].h; - h = size; - w = (ims[0].w + border) * n - border; - c = ims[0].c; - if(c != 3 || !color){ - h = (h+border)*c - border; - c = 1; - } - - image filters = make_image(w, h, c); - int i,j; - for(i = 0; i < n; ++i){ - int w_offset = i*(size+border); - image copy = copy_image(ims[i]); - //normalize_image(copy); - if(c == 3 && color){ - embed_image(copy, filters, w_offset, 0); - } - else{ - for(j = 0; j < copy.c; ++j){ - int h_offset = j*(size+border); - image layer = get_image_layer(copy, j); - embed_image(layer, filters, w_offset, h_offset); - free_image(layer); - } - } - free_image(copy); - } - return filters; -} - -void show_image_normalized(image im, const char *name) -{ - image c = copy_image(im); - normalize_image(c); - show_image(c, name); - free_image(c); -} - -void show_images(image *ims, int n, char *window) -{ - image m = collapse_images_vert(ims, n); - /* - int w = 448; - int h = ((float)m.h/m.w) * 448; - if(h > 896){ - h = 896; - w = ((float)m.w/m.h) * 896; - } - image sized = resize_image(m, w, h); - */ - normalize_image(m); - save_image(m, window); - show_image(m, window); - free_image(m); -} - -void free_image(image m) -{ - if(m.data){ - free(m.data); - } -} - -// Fast copy data from a contiguous byte array into the image. -LIB_API void copy_image_from_bytes(image im, char *pdata) -{ - unsigned char *data = (unsigned char*)pdata; - int i, k, j; - int w = im.w; - int h = im.h; - int c = im.c; - for (k = 0; k < c; ++k) { - for (j = 0; j < h; ++j) { - for (i = 0; i < w; ++i) { - int dst_index = i + w * j + w * h*k; - int src_index = k + c * i + c * w*j; - im.data[dst_index] = (float)data[src_index] / 255.; - } - } - } -} diff --git a/src/Detector/darknet/src/image.h b/src/Detector/darknet/src/image.h deleted file mode 100644 index 90e6a0486..000000000 --- a/src/Detector/darknet/src/image.h +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef IMAGE_H -#define IMAGE_H -#include "darknet.h" - -#include -#include -#include -#include -#include - -#include "image_opencv.h" - -#include "box.h" -#ifdef __cplusplus -extern "C" { -#endif -/* -typedef struct { - int w; - int h; - int c; - float *data; -} image; -*/ -float get_color(int c, int x, int max); -void flip_image(image a); -void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); -void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b); -void draw_bbox(image a, box bbox, int w, float r, float g, float b); -void draw_label(image a, int r, int c, image label, const float *rgb); -void draw_weighted_label(image a, int r, int c, image label, const float *rgb, const float alpha); -void write_label(image a, int r, int c, image *characters, char *string, float *rgb); -void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes); -void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output); -image image_distance(image a, image b); -void scale_image(image m, float s); -// image crop_image(image im, int dx, int dy, int w, int h); -image random_crop_image(image im, int w, int h); -image random_augment_image(image im, float angle, float aspect, int low, int high, int size); -void random_distort_image(image im, float hue, float saturation, float exposure); -//LIB_API image resize_image(image im, int w, int h); -//LIB_API void copy_image_from_bytes(image im, char *pdata); -void fill_image(image m, float s); -void letterbox_image_into(image im, int w, int h, image boxed); -//LIB_API image letterbox_image(image im, int w, int h); -// image resize_min(image im, int min); -image resize_max(image im, int max); -void translate_image(image m, float s); -void normalize_image(image p); -image rotate_image(image m, float rad); -void rotate_image_cw(image im, int times); -void embed_image(image source, image dest, int dx, int dy); -void saturate_image(image im, float sat); -void exposure_image(image im, float sat); -void distort_image(image im, float hue, float sat, float val); -void saturate_exposure_image(image im, float sat, float exposure); -void hsv_to_rgb(image im); -//LIB_API void rgbgr_image(image im); -void constrain_image(image im); -void composite_3d(char *f1, char *f2, char *out, int delta); -int best_3d_shift_r(image a, image b, int min, int max); - -image grayscale_image(image im); -image threshold_image(image im, float thresh); - -image collapse_image_layers(image source, int border); -image collapse_images_horz(image *ims, int n); -image collapse_images_vert(image *ims, int n); - -void show_image(image p, const char *name); -void show_image_normalized(image im, const char *name); -void save_image_png(image im, const char *name); -void save_image(image p, const char *name); -void show_images(image *ims, int n, char *window); -void show_image_layers(image p, char *name); -void show_image_collapsed(image p, char *name); - -void print_image(image m); - -//LIB_API image make_image(int w, int h, int c); -image make_random_image(int w, int h, int c); -image make_empty_image(int w, int h, int c); -image float_to_image_scaled(int w, int h, int c, float *data); -image float_to_image(int w, int h, int c, float *data); -image copy_image(image p); -void copy_image_inplace(image src, image dst); -image load_image(char *filename, int w, int h, int c); -image load_image_stb_resize(char *filename, int w, int h, int c); -//LIB_API image load_image_color(char *filename, int w, int h); -image **load_alphabet(); - -//float get_pixel(image m, int x, int y, int c); -//float get_pixel_extend(image m, int x, int y, int c); -//void set_pixel(image m, int x, int y, int c, float val); -//void add_pixel(image m, int x, int y, int c, float val); -float bilinear_interpolate(image im, float x, float y, int c); - -image get_image_layer(image m, int l); - -//LIB_API void free_image(image m); -void test_resize(char *filename); -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/image_opencv.cpp b/src/Detector/darknet/src/image_opencv.cpp deleted file mode 100644 index add27c96e..000000000 --- a/src/Detector/darknet/src/image_opencv.cpp +++ /dev/null @@ -1,1549 +0,0 @@ -#include "image_opencv.h" -#include - -#ifdef OPENCV -#include "utils.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include - -// includes for OpenCV >= 3.x -#ifndef CV_VERSION_EPOCH -#include -#include -#include -#endif - -// OpenCV includes for OpenCV 2.x -#ifdef CV_VERSION_EPOCH -#include -#include -#include -#include -#endif - -//using namespace cv; - -using std::cerr; -using std::endl; - -#ifdef DEBUG -#define OCV_D "d" -#else -#define OCV_D -#endif//DEBUG - - -// OpenCV libraries -#ifndef CV_VERSION_EPOCH -#define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION) OCV_D -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib") -#endif // USE_CMAKE_LIBS -#else // CV_VERSION_EPOCH -#define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR) OCV_D -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") -#endif // USE_CMAKE_LIBS -#endif // CV_VERSION_EPOCH - -#include "http_stream.h" - -#ifndef CV_RGB -#define CV_RGB(r, g, b) cvScalar( (b), (g), (r), 0 ) -#endif - -#ifndef CV_FILLED -#define CV_FILLED cv::FILLED -#endif - -#ifndef CV_AA -#define CV_AA cv::LINE_AA -#endif - -extern "C" { - - //struct mat_cv : cv::Mat { }; - //struct cap_cv : cv::VideoCapture { }; - //struct write_cv : cv::VideoWriter { }; - - //struct mat_cv : cv::Mat { int a[0]; }; - //struct cap_cv : cv::VideoCapture { int a[0]; }; - //struct write_cv : cv::VideoWriter { int a[0]; }; - -// ==================================================================== -// cv::Mat -// ==================================================================== - image mat_to_image(cv::Mat mat); - cv::Mat image_to_mat(image img); -// image ipl_to_image(mat_cv* src); -// mat_cv *image_to_ipl(image img); -// cv::Mat ipl_to_mat(IplImage *ipl); -// IplImage *mat_to_ipl(cv::Mat mat); - - -extern "C" mat_cv *load_image_mat_cv(const char *filename, int flag) -{ - cv::Mat *mat_ptr = NULL; - try { - cv::Mat mat = cv::imread(filename, flag); - if (mat.empty()) - { - std::string shrinked_filename = filename; - if (shrinked_filename.length() > 1024) { - shrinked_filename.resize(1024); - shrinked_filename = std::string("name is too long: ") + shrinked_filename; - } - cerr << "Cannot load image " << shrinked_filename << std::endl; - std::ofstream bad_list("bad.list", std::ios::out | std::ios::app); - bad_list << shrinked_filename << std::endl; - //if (check_mistakes) getchar(); - return NULL; - } - cv::Mat dst; - if (mat.channels() == 3) cv::cvtColor(mat, dst, cv::COLOR_RGB2BGR); - else if (mat.channels() == 4) cv::cvtColor(mat, dst, cv::COLOR_RGBA2BGRA); - else dst = mat; - - mat_ptr = new cv::Mat(dst); - - return (mat_cv *)mat_ptr; - } - catch (...) { - cerr << "OpenCV exception: load_image_mat_cv \n"; - } - if (mat_ptr) delete mat_ptr; - return NULL; -} -// ---------------------------------------- - -cv::Mat load_image_mat(char *filename, int channels) -{ - int flag = cv::IMREAD_UNCHANGED; - if (channels == 0) flag = cv::IMREAD_COLOR; - else if (channels == 1) flag = cv::IMREAD_GRAYSCALE; - else if (channels == 3) flag = cv::IMREAD_COLOR; - else { - fprintf(stderr, "OpenCV can't force load with %d channels\n", channels); - } - //flag |= IMREAD_IGNORE_ORIENTATION; // un-comment it if you want - - cv::Mat *mat_ptr = (cv::Mat *)load_image_mat_cv(filename, flag); - - if (mat_ptr == NULL) { - return cv::Mat(); - } - cv::Mat mat = *mat_ptr; - delete mat_ptr; - - return mat; -} -// ---------------------------------------- - -extern "C" image load_image_cv(char *filename, int channels) -{ - cv::Mat mat = load_image_mat(filename, channels); - - if (mat.empty()) { - return make_image(10, 10, channels); - } - return mat_to_image(mat); -} -// ---------------------------------------- - -extern "C" image load_image_resize(char *filename, int w, int h, int c, image *im) -{ - image out; - try { - cv::Mat loaded_image = load_image_mat(filename, c); - - *im = mat_to_image(loaded_image); - - cv::Mat resized(h, w, CV_8UC3); - cv::resize(loaded_image, resized, cv::Size(w, h), 0, 0, cv::INTER_LINEAR); - out = mat_to_image(resized); - } - catch (...) { - cerr << " OpenCV exception: load_image_resize() can't load image %s " << filename << " \n"; - out = make_image(w, h, c); - *im = make_image(w, h, c); - } - return out; -} -// ---------------------------------------- - -extern "C" int get_width_mat(mat_cv *mat) -{ - if (mat == NULL) { - cerr << " Pointer is NULL in get_width_mat() \n"; - return 0; - } - return ((cv::Mat *)mat)->cols; -} -// ---------------------------------------- - -extern "C" int get_height_mat(mat_cv *mat) -{ - if (mat == NULL) { - cerr << " Pointer is NULL in get_height_mat() \n"; - return 0; - } - return ((cv::Mat *)mat)->rows; -} -// ---------------------------------------- - -extern "C" void release_mat(mat_cv **mat) -{ - try { - cv::Mat **mat_ptr = (cv::Mat **)mat; - if (*mat_ptr) delete *mat_ptr; - *mat_ptr = NULL; - } - catch (...) { - cerr << "OpenCV exception: release_mat \n"; - } -} - -// ==================================================================== -// IplImage -// ==================================================================== -/* -extern "C" int get_width_cv(mat_cv *ipl_src) -{ - IplImage *ipl = (IplImage *)ipl_src; - return ipl->width; -} -// ---------------------------------------- - -extern "C" int get_height_cv(mat_cv *ipl_src) -{ - IplImage *ipl = (IplImage *)ipl_src; - return ipl->height; -} -// ---------------------------------------- - -extern "C" void release_ipl(mat_cv **ipl) -{ - IplImage **ipl_img = (IplImage **)ipl; - if (*ipl_img) cvReleaseImage(ipl_img); - *ipl_img = NULL; -} -// ---------------------------------------- - -// ==================================================================== -// image-to-ipl, ipl-to-image, image_to_mat, mat_to_image -// ==================================================================== - -extern "C" mat_cv *image_to_ipl(image im) -{ - int x, y, c; - IplImage *disp = cvCreateImage(cvSize(im.w, im.h), IPL_DEPTH_8U, im.c); - int step = disp->widthStep; - for (y = 0; y < im.h; ++y) { - for (x = 0; x < im.w; ++x) { - for (c = 0; c < im.c; ++c) { - float val = im.data[c*im.h*im.w + y*im.w + x]; - disp->imageData[y*step + x*im.c + c] = (unsigned char)(val * 255); - } - } - } - return (mat_cv *)disp; -} -// ---------------------------------------- - -extern "C" image ipl_to_image(mat_cv* src_ptr) -{ - IplImage* src = (IplImage*)src_ptr; - int h = src->height; - int w = src->width; - int c = src->nChannels; - image im = make_image(w, h, c); - unsigned char *data = (unsigned char *)src->imageData; - int step = src->widthStep; - int i, j, k; - - for (i = 0; i < h; ++i) { - for (k = 0; k < c; ++k) { - for (j = 0; j < w; ++j) { - im.data[k*w*h + i*w + j] = data[i*step + j*c + k] / 255.; - } - } - } - return im; -} -// ---------------------------------------- - -cv::Mat ipl_to_mat(IplImage *ipl) -{ - Mat m = cvarrToMat(ipl, true); - return m; -} -// ---------------------------------------- - -IplImage *mat_to_ipl(cv::Mat mat) -{ - IplImage *ipl = new IplImage; - *ipl = mat; - return ipl; -} -// ---------------------------------------- -*/ - -extern "C" cv::Mat image_to_mat(image img) -{ - int channels = img.c; - int width = img.w; - int height = img.h; - cv::Mat mat = cv::Mat(height, width, CV_8UC(channels)); - int step = mat.step; - - for (int y = 0; y < img.h; ++y) { - for (int x = 0; x < img.w; ++x) { - for (int c = 0; c < img.c; ++c) { - float val = img.data[c*img.h*img.w + y*img.w + x]; - mat.data[y*step + x*img.c + c] = (unsigned char)(val * 255); - } - } - } - return mat; -} -// ---------------------------------------- - -extern "C" image mat_to_image(cv::Mat mat) -{ - int w = mat.cols; - int h = mat.rows; - int c = mat.channels(); - image im = make_image(w, h, c); - unsigned char *data = (unsigned char *)mat.data; - int step = mat.step; - for (int y = 0; y < h; ++y) { - for (int k = 0; k < c; ++k) { - for (int x = 0; x < w; ++x) { - //uint8_t val = mat.ptr(y)[c * x + k]; - //uint8_t val = mat.at(y, x).val[k]; - //im.data[k*w*h + y*w + x] = val / 255.0f; - - im.data[k*w*h + y*w + x] = data[y*step + x*c + k] / 255.0f; - } - } - } - return im; -} - -image mat_to_image_cv(mat_cv *mat) -{ - return mat_to_image(*(cv::Mat*)mat); -} - -// ==================================================================== -// Window -// ==================================================================== -extern "C" void create_window_cv(char const* window_name, int full_screen, int width, int height) -{ - try { - int window_type = cv::WINDOW_NORMAL; -#ifdef CV_VERSION_EPOCH // OpenCV 2.x - if (full_screen) window_type = CV_WINDOW_FULLSCREEN; -#else - if (full_screen) window_type = cv::WINDOW_FULLSCREEN; -#endif - cv::namedWindow(window_name, window_type); - cv::moveWindow(window_name, 0, 0); - cv::resizeWindow(window_name, width, height); - } - catch (...) { - cerr << "OpenCV exception: create_window_cv \n"; - } -} -// ---------------------------------------- - -extern "C" void resize_window_cv(char const* window_name, int width, int height) -{ - try { - cv::resizeWindow(window_name, width, height); - } - catch (...) { - cerr << "OpenCV exception: create_window_cv \n"; - } -} -// ---------------------------------------- - -extern "C" void destroy_all_windows_cv() -{ - try { - cv::destroyAllWindows(); - } - catch (...) { - cerr << "OpenCV exception: destroy_all_windows_cv \n"; - } -} -// ---------------------------------------- - -extern "C" int wait_key_cv(int delay) -{ - try { - return cv::waitKey(delay); - } - catch (...) { - cerr << "OpenCV exception: wait_key_cv \n"; - } - return -1; -} -// ---------------------------------------- - -extern "C" int wait_until_press_key_cv() -{ - return wait_key_cv(0); -} -// ---------------------------------------- - -extern "C" void make_window(char *name, int w, int h, int fullscreen) -{ - try { - cv::namedWindow(name, cv::WINDOW_NORMAL); - if (fullscreen) { -#ifdef CV_VERSION_EPOCH // OpenCV 2.x - cv::setWindowProperty(name, cv::WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN); -#else - cv::setWindowProperty(name, cv::WND_PROP_FULLSCREEN, cv::WINDOW_FULLSCREEN); -#endif - } - else { - cv::resizeWindow(name, w, h); - if (strcmp(name, "Demo") == 0) cv::moveWindow(name, 0, 0); - } - } - catch (...) { - cerr << "OpenCV exception: make_window \n"; - } -} -// ---------------------------------------- - -static float get_pixel(image m, int x, int y, int c) -{ - assert(x < m.w && y < m.h && c < m.c); - return m.data[c*m.h*m.w + y*m.w + x]; -} -// ---------------------------------------- - -extern "C" void show_image_cv(image p, const char *name) -{ - try { - image copy = copy_image(p); - constrain_image(copy); - - cv::Mat mat = image_to_mat(copy); - if (mat.channels() == 3) cv::cvtColor(mat, mat, cv::COLOR_RGB2BGR); - else if (mat.channels() == 4) cv::cvtColor(mat, mat, cv::COLOR_RGBA2BGR); - cv::namedWindow(name, cv::WINDOW_NORMAL); - cv::imshow(name, mat); - free_image(copy); - } - catch (...) { - cerr << "OpenCV exception: show_image_cv \n"; - } -} -// ---------------------------------------- - -/* -extern "C" void show_image_cv_ipl(mat_cv *disp, const char *name) -{ - if (disp == NULL) return; - char buff[256]; - sprintf(buff, "%s", name); - cv::namedWindow(buff, WINDOW_NORMAL); - cvShowImage(buff, disp); -} -// ---------------------------------------- -*/ - -extern "C" void show_image_mat(mat_cv *mat_ptr, const char *name) -{ - try { - if (mat_ptr == NULL) return; - cv::Mat &mat = *(cv::Mat *)mat_ptr; - cv::namedWindow(name, cv::WINDOW_NORMAL); - cv::imshow(name, mat); - } - catch (...) { - cerr << "OpenCV exception: show_image_mat \n"; - } -} - -// ==================================================================== -// Video Writer -// ==================================================================== -extern "C" write_cv *create_video_writer(char *out_filename, char c1, char c2, char c3, char c4, int fps, int width, int height, int is_color) -{ - try { - cv::VideoWriter * output_video_writer = -#ifdef CV_VERSION_EPOCH - new cv::VideoWriter(out_filename, CV_FOURCC(c1, c2, c3, c4), fps, cv::Size(width, height), is_color); -#else - new cv::VideoWriter(out_filename, cv::VideoWriter::fourcc(c1, c2, c3, c4), fps, cv::Size(width, height), is_color); -#endif - - return (write_cv *)output_video_writer; - } - catch (...) { - cerr << "OpenCV exception: create_video_writer \n"; - } - return NULL; -} - -extern "C" void write_frame_cv(write_cv *output_video_writer, mat_cv *mat) -{ - try { - cv::VideoWriter *out = (cv::VideoWriter *)output_video_writer; - out->write(*(cv::Mat*)mat); - } - catch (...) { - cerr << "OpenCV exception: write_frame_cv \n"; - } -} - -extern "C" void release_video_writer(write_cv **output_video_writer) -{ - try { - if (output_video_writer) { - std::cout << " closing..."; - cv::VideoWriter *out = *(cv::VideoWriter **)output_video_writer; - out->release(); - delete out; - output_video_writer = NULL; - std::cout << " closed!"; - } - else { - cerr << "OpenCV exception: output_video_writer isn't created \n"; - } - } - catch (...) { - cerr << "OpenCV exception: release_video_writer \n"; - } -} - -/* -extern "C" void *open_video_stream(const char *f, int c, int w, int h, int fps) -{ - VideoCapture *cap; - if(f) cap = new VideoCapture(f); - else cap = new VideoCapture(c); - if(!cap->isOpened()) return 0; - if(w) cap->set(CV_CAP_PROP_FRAME_WIDTH, w); - if(h) cap->set(CV_CAP_PROP_FRAME_HEIGHT, w); - if(fps) cap->set(CV_CAP_PROP_FPS, w); - return (void *) cap; -} - - -extern "C" image get_image_from_stream(void *p) -{ - VideoCapture *cap = (VideoCapture *)p; - Mat m; - *cap >> m; - if(m.empty()) return make_empty_image(0,0,0); - return mat_to_image(m); -} - -extern "C" int show_image_cv(image im, const char* name, int ms) -{ - Mat m = image_to_mat(im); - imshow(name, m); - int c = waitKey(ms); - if (c != -1) c = c%256; - return c; -} -*/ - - -// ==================================================================== -// Video Capture -// ==================================================================== - -extern "C" cap_cv* get_capture_video_stream(const char *path) { - cv::VideoCapture* cap = NULL; - try { - cap = new cv::VideoCapture(path); - } - catch (...) { - cerr << " OpenCV exception: video-stream " << path << " can't be opened! \n"; - } - return (cap_cv*)cap; -} -// ---------------------------------------- - -extern "C" cap_cv* get_capture_webcam(int index) -{ - cv::VideoCapture* cap = NULL; - try { - cap = new cv::VideoCapture(index); - //cap->set(CV_CAP_PROP_FRAME_WIDTH, 1280); - //cap->set(CV_CAP_PROP_FRAME_HEIGHT, 960); - } - catch (...) { - cerr << " OpenCV exception: Web-camera " << index << " can't be opened! \n"; - } - return (cap_cv*)cap; -} -// ---------------------------------------- - -extern "C" void release_capture(cap_cv* cap) -{ - try { - cv::VideoCapture *cpp_cap = (cv::VideoCapture *)cap; - delete cpp_cap; - } - catch (...) { - cerr << " OpenCV exception: cv::VideoCapture " << cap << " can't be released! \n"; - } -} -// ---------------------------------------- - -extern "C" mat_cv* get_capture_frame_cv(cap_cv *cap) { - cv::Mat *mat = NULL; - try { - mat = new cv::Mat(); - if (cap) { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; - if (cpp_cap.isOpened()) - { - cpp_cap >> *mat; - } - else std::cout << " Video-stream stopped! \n"; - } - else cerr << " cv::VideoCapture isn't created \n"; - } - catch (...) { - std::cout << " OpenCV exception: Video-stream stoped! \n"; - } - return (mat_cv *)mat; -} -// ---------------------------------------- - -extern "C" int get_stream_fps_cpp_cv(cap_cv *cap) -{ - int fps = 25; - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; -#ifndef CV_VERSION_EPOCH // OpenCV 3.x - fps = cpp_cap.get(cv::CAP_PROP_FPS); -#else // OpenCV 2.x - fps = cpp_cap.get(CV_CAP_PROP_FPS); -#endif - } - catch (...) { - cerr << " Can't get FPS of source videofile. For output video FPS = 25 by default. \n"; - } - return fps; -} -// ---------------------------------------- - -extern "C" double get_capture_property_cv(cap_cv *cap, int property_id) -{ - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; - return cpp_cap.get(property_id); - } - catch (...) { - cerr << " OpenCV exception: Can't get property of source video-stream. \n"; - } - return 0; -} -// ---------------------------------------- - -extern "C" double get_capture_frame_count_cv(cap_cv *cap) -{ - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; -#ifndef CV_VERSION_EPOCH // OpenCV 3.x - return cpp_cap.get(cv::CAP_PROP_FRAME_COUNT); -#else // OpenCV 2.x - return cpp_cap.get(CV_CAP_PROP_FRAME_COUNT); -#endif - } - catch (...) { - cerr << " OpenCV exception: Can't get CAP_PROP_FRAME_COUNT of source videofile. \n"; - } - return 0; -} -// ---------------------------------------- - -extern "C" int set_capture_property_cv(cap_cv *cap, int property_id, double value) -{ - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; - return cpp_cap.set(property_id, value); - } - catch (...) { - cerr << " Can't set property of source video-stream. \n"; - } - return false; -} -// ---------------------------------------- - -extern "C" int set_capture_position_frame_cv(cap_cv *cap, int index) -{ - try { - cv::VideoCapture &cpp_cap = *(cv::VideoCapture *)cap; -#ifndef CV_VERSION_EPOCH // OpenCV 3.x - return cpp_cap.set(cv::CAP_PROP_POS_FRAMES, index); -#else // OpenCV 2.x - return cpp_cap.set(CV_CAP_PROP_POS_FRAMES, index); -#endif - } - catch (...) { - cerr << " Can't set CAP_PROP_POS_FRAMES of source videofile. \n"; - } - return false; -} -// ---------------------------------------- - - - -// ==================================================================== -// ... Video Capture -// ==================================================================== - -extern "C" image get_image_from_stream_cpp(cap_cv *cap) -{ - cv::Mat *src = NULL; - static int once = 1; - if (once) { - once = 0; - do { - if (src) delete src; - src = (cv::Mat*)get_capture_frame_cv(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->cols < 1 || src->rows < 1 || src->channels() < 1); - printf("Video stream: %d x %d \n", src->cols, src->rows); - } - else - src = (cv::Mat*)get_capture_frame_cv(cap); - - if (!src) return make_empty_image(0, 0, 0); - image im = mat_to_image(*src); - rgbgr_image(im); - if (src) delete src; - return im; -} -// ---------------------------------------- - -extern "C" int wait_for_stream(cap_cv *cap, cv::Mat* src, int dont_close) -{ - if (!src) { - if (dont_close) src = new cv::Mat(416, 416, CV_8UC(3)); // cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); - else return 0; - } - if (src->cols < 1 || src->rows < 1 || src->channels() < 1) { - if (dont_close) { - delete src;// cvReleaseImage(&src); - int z = 0; - for (z = 0; z < 20; ++z) { - src = (cv::Mat*)get_capture_frame_cv(cap); - delete src;// cvReleaseImage(&src); - } - src = new cv::Mat(416, 416, CV_8UC(3)); // cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, 3); - } - else return 0; - } - return 1; -} -// ---------------------------------------- - -extern "C" image get_image_from_stream_resize(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close) -{ - c = c ? c : 3; - cv::Mat *src = NULL; - - static int once = 1; - if (once) { - once = 0; - do { - if (src) delete src; - src = (cv::Mat*)get_capture_frame_cv(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->cols < 1 || src->rows < 1 || src->channels() < 1); - printf("Video stream: %d x %d \n", src->cols, src->rows); - } - else - src = (cv::Mat*)get_capture_frame_cv(cap); - - if (!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); - - *(cv::Mat **)in_img = src; - - cv::Mat new_img = cv::Mat(h, w, CV_8UC(c)); - cv::resize(*src, new_img, new_img.size(), 0, 0, cv::INTER_LINEAR); - if (c>1) cv::cvtColor(new_img, new_img, cv::COLOR_RGB2BGR); - image im = mat_to_image(new_img); - - //show_image_cv(im, "im"); - //show_image_mat(*in_img, "in_img"); - return im; -} -// ---------------------------------------- - -extern "C" image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close) -{ - c = c ? c : 3; - cv::Mat *src = NULL; - static int once = 1; - if (once) { - once = 0; - do { - if (src) delete src; - src = (cv::Mat*)get_capture_frame_cv(cap); - if (!src) return make_empty_image(0, 0, 0); - } while (src->cols < 1 || src->rows < 1 || src->channels() < 1); - printf("Video stream: %d x %d \n", src->cols, src->rows); - } - else - src = (cv::Mat*)get_capture_frame_cv(cap); - - if (!wait_for_stream(cap, src, dont_close)) return make_empty_image(0, 0, 0); // passes (cv::Mat *)src while should be (cv::Mat **)src - - *in_img = (mat_cv *)new cv::Mat(src->rows, src->cols, CV_8UC(c)); - cv::resize(*src, **(cv::Mat**)in_img, (*(cv::Mat**)in_img)->size(), 0, 0, cv::INTER_LINEAR); - - if (c>1) cv::cvtColor(*src, *src, cv::COLOR_RGB2BGR); - image tmp = mat_to_image(*src); - image im = letterbox_image(tmp, w, h); - free_image(tmp); - release_mat((mat_cv **)&src); - - //show_image_cv(im, "im"); - //show_image_mat(*in_img, "in_img"); - return im; -} -// ---------------------------------------- - -// ==================================================================== -// Image Saving -// ==================================================================== -extern int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); -extern int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); - -extern "C" void save_mat_png(cv::Mat img_src, const char *name) -{ - cv::Mat img_rgb; - if (img_src.channels() >= 3) cv::cvtColor(img_src, img_rgb, cv::COLOR_RGB2BGR); - stbi_write_png(name, img_rgb.cols, img_rgb.rows, 3, (char *)img_rgb.data, 0); -} -// ---------------------------------------- - -extern "C" void save_mat_jpg(cv::Mat img_src, const char *name) -{ - cv::Mat img_rgb; - if (img_src.channels() >= 3) cv::cvtColor(img_src, img_rgb, cv::COLOR_RGB2BGR); - stbi_write_jpg(name, img_rgb.cols, img_rgb.rows, 3, (char *)img_rgb.data, 80); -} -// ---------------------------------------- - - -extern "C" void save_cv_png(mat_cv *img_src, const char *name) -{ - cv::Mat* img = (cv::Mat* )img_src; - save_mat_png(*img, name); -} -// ---------------------------------------- - -extern "C" void save_cv_jpg(mat_cv *img_src, const char *name) -{ - cv::Mat* img = (cv::Mat*)img_src; - save_mat_jpg(*img, name); -} -// ---------------------------------------- - - -// ==================================================================== -// Draw Detection -// ==================================================================== -extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) -{ - try { - cv::Mat *show_img = (cv::Mat*)mat; - int i, j; - if (!show_img) return; - static int frame_id = 0; - frame_id++; - - for (i = 0; i < num; ++i) { - char labelstr[4096] = { 0 }; - int class_id = -1; - for (j = 0; j < classes; ++j) { - int show = strncmp(names[j], "dont_show", 9); - if (dets[i].prob[j] > thresh && show) { - if (class_id < 0) { - strcat(labelstr, names[j]); - class_id = j; - char buff[20]; - if (dets[i].track_id) { - sprintf(buff, " (id: %d)", dets[i].track_id); - strcat(labelstr, buff); - } - sprintf(buff, " (%2.0f%%)", dets[i].prob[j] * 100); - strcat(labelstr, buff); - printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100); - if (dets[i].track_id) printf("(track = %d, sim = %f) ", dets[i].track_id, dets[i].sim); - } - else { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - printf(", %s: %.0f%% ", names[j], dets[i].prob[j] * 100); - } - } - } - if (class_id >= 0) { - int width = std::max(1.0f, show_img->rows * .002f); - - //if(0){ - //width = pow(prob, 1./2.)*10+1; - //alphabet = 0; - //} - - //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); - int offset = class_id * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; - - //width = prob*20+2; - - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = dets[i].bbox; - if (std::isnan(b.w) || std::isinf(b.w)) b.w = 0.5; - if (std::isnan(b.h) || std::isinf(b.h)) b.h = 0.5; - if (std::isnan(b.x) || std::isinf(b.x)) b.x = 0.5; - if (std::isnan(b.y) || std::isinf(b.y)) b.y = 0.5; - b.w = (b.w < 1) ? b.w : 1; - b.h = (b.h < 1) ? b.h : 1; - b.x = (b.x < 1) ? b.x : 1; - b.y = (b.y < 1) ? b.y : 1; - //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - - int left = (b.x - b.w / 2.)*show_img->cols; - int right = (b.x + b.w / 2.)*show_img->cols; - int top = (b.y - b.h / 2.)*show_img->rows; - int bot = (b.y + b.h / 2.)*show_img->rows; - - if (left < 0) left = 0; - if (right > show_img->cols - 1) right = show_img->cols - 1; - if (top < 0) top = 0; - if (bot > show_img->rows - 1) bot = show_img->rows - 1; - - //int b_x_center = (left + right) / 2; - //int b_y_center = (top + bot) / 2; - //int b_width = right - left; - //int b_height = bot - top; - //sprintf(labelstr, "%d x %d - w: %d, h: %d", b_x_center, b_y_center, b_width, b_height); - - float const font_size = show_img->rows / 1000.F; - cv::Size const text_size = cv::getTextSize(labelstr, cv::FONT_HERSHEY_COMPLEX_SMALL, font_size, 1, 0); - cv::Point pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; - pt1.x = left; - pt1.y = top; - pt2.x = right; - pt2.y = bot; - pt_text.x = left; - pt_text.y = top - 4;// 12; - pt_text_bg1.x = left; - pt_text_bg1.y = top - (3 + 18 * font_size); - pt_text_bg2.x = right; - if ((right - left) < text_size.width) pt_text_bg2.x = left + text_size.width; - pt_text_bg2.y = top; - cv::Scalar color; - color.val[0] = red * 256; - color.val[1] = green * 256; - color.val[2] = blue * 256; - - // you should create directory: result_img - //static int copied_frame_id = -1; - //static IplImage* copy_img = NULL; - //if (copied_frame_id != frame_id) { - // copied_frame_id = frame_id; - // if(copy_img == NULL) copy_img = cvCreateImage(cvSize(show_img->width, show_img->height), show_img->depth, show_img->nChannels); - // cvCopy(show_img, copy_img, 0); - //} - //static int img_id = 0; - //img_id++; - //char image_name[1024]; - //sprintf(image_name, "result_img/img_%d_%d_%d_%s.jpg", frame_id, img_id, class_id, names[class_id]); - //CvRect rect = cvRect(pt1.x, pt1.y, pt2.x - pt1.x, pt2.y - pt1.y); - //cvSetImageROI(copy_img, rect); - //cvSaveImage(image_name, copy_img, 0); - //cvResetImageROI(copy_img); - - cv::rectangle(*show_img, pt1, pt2, color, width, 8, 0); - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - (float)left, (float)top, b.w*show_img->cols, b.h*show_img->rows); - else - printf("\n"); - - cv::rectangle(*show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); - cv::rectangle(*show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled - cv::Scalar black_color = CV_RGB(0, 0, 0); - cv::putText(*show_img, labelstr, pt_text, cv::FONT_HERSHEY_COMPLEX_SMALL, font_size, black_color, 2 * font_size, CV_AA); - // cv::FONT_HERSHEY_COMPLEX_SMALL, cv::FONT_HERSHEY_SIMPLEX - } - } - if (ext_output) { - fflush(stdout); - } - } - catch (...) { - cerr << "OpenCV exception: draw_detections_cv_v3() \n"; - } -} -// ---------------------------------------- - -// ==================================================================== -// Draw Loss & Accuracy chart -// ==================================================================== -extern "C" mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show, char* chart_path) -{ - int img_offset = 60; - int draw_size = img_size - img_offset; - cv::Mat *img_ptr = new cv::Mat(img_size, img_size, CV_8UC3, CV_RGB(255, 255, 255)); - cv::Mat &img = *img_ptr; - cv::Point pt1, pt2, pt_text; - - try { - // load chart from file - if (chart_path != NULL && chart_path[0] != '\0') { - *img_ptr = cv::imread(chart_path); - } - else { - // draw new chart - char char_buff[100]; - int i; - // vertical lines - pt1.x = img_offset; pt2.x = img_size, pt_text.x = 30; - for (i = 1; i <= number_of_lines; ++i) { - pt1.y = pt2.y = (float)i * draw_size / number_of_lines; - cv::line(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); - if (i % 10 == 0) { - sprintf(char_buff, "%2.1f", max_img_loss*(number_of_lines - i) / number_of_lines); - pt_text.y = pt1.y + 3; - - cv::putText(img, char_buff, pt_text, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 0), 1, CV_AA); - cv::line(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); - } - } - // horizontal lines - pt1.y = draw_size; pt2.y = 0, pt_text.y = draw_size + 15; - for (i = 0; i <= number_of_lines; ++i) { - pt1.x = pt2.x = img_offset + (float)i * draw_size / number_of_lines; - cv::line(img, pt1, pt2, CV_RGB(224, 224, 224), 1, 8, 0); - if (i % 10 == 0) { - sprintf(char_buff, "%d", max_batches * i / number_of_lines); - pt_text.x = pt1.x - 20; - cv::putText(img, char_buff, pt_text, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 0), 1, CV_AA); - cv::line(img, pt1, pt2, CV_RGB(128, 128, 128), 1, 8, 0); - } - } - - cv::putText(img, "Loss", cv::Point(10, 60), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 255), 1, CV_AA); - cv::putText(img, "Iteration number", cv::Point(draw_size / 2, img_size - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 0), 1, CV_AA); - char max_batches_buff[100]; - sprintf(max_batches_buff, "in cfg max_batches=%d", max_batches); - cv::putText(img, max_batches_buff, cv::Point(draw_size - 195, img_size - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 0), 1, CV_AA); - cv::putText(img, "Press 's' to save : chart.png", cv::Point(5, img_size - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 0), 1, CV_AA); - } - - if (!dont_show) { - printf(" If error occurs - run training with flag: -dont_show \n"); - cv::namedWindow(windows_name, cv::WINDOW_NORMAL); - cv::moveWindow(windows_name, 0, 0); - cv::resizeWindow(windows_name, img_size, img_size); - cv::imshow(windows_name, img); - cv::waitKey(20); - } - } - catch (...) { - cerr << "OpenCV exception: draw_train_chart() \n"; - } - return (mat_cv*)img_ptr; -} -// ---------------------------------------- - -extern "C" void draw_train_loss(char *windows_name, mat_cv* img_src, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches, - float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining) -{ - try { - cv::Mat &img = *(cv::Mat*)img_src; - int img_offset = 60; - int draw_size = img_size - img_offset; - char char_buff[100]; - cv::Point pt1, pt2; - pt1.x = img_offset + draw_size * (float)current_batch / max_batches; - pt1.y = draw_size * (1 - avg_loss / max_img_loss); - if (pt1.y < 0) pt1.y = 1; - cv::circle(img, pt1, 1, CV_RGB(0, 0, 255), CV_FILLED, 8, 0); - - // contrastive accuracy - if (contr_acc >= 0) { - static float old_contr_acc = 0; - - if (current_batch > 0) { - cv::line(img, - cv::Point(img_offset + draw_size * (float)(current_batch - 1) / max_batches, draw_size * (1 - old_contr_acc)), - cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - contr_acc)), - CV_RGB(0, 150, 70), 1, 8, 0); - } - old_contr_acc = contr_acc; - - sprintf(char_buff, "C:%2.1f%% ", contr_acc * 100); - cv::putText(img, char_buff, cv::Point(1, 45), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 5, CV_AA); - cv::putText(img, char_buff, cv::Point(1, 45), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 150, 70), 1, CV_AA); - } - - // precision - if (draw_precision) { - static float old_precision = 0; - static float max_precision = 0; - static int iteration_old = 0; - static int text_iteration_old = 0; - if (iteration_old == 0) - cv::putText(img, accuracy_name, cv::Point(10, 12), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA); - - if (iteration_old != 0){ - cv::line(img, - cv::Point(img_offset + draw_size * (float)iteration_old / max_batches, draw_size * (1 - old_precision)), - cv::Point(img_offset + draw_size * (float)current_batch / max_batches, draw_size * (1 - precision)), - CV_RGB(255, 0, 0), 1, 8, 0); - } - - sprintf(char_buff, "%2.1f%% ", precision * 100); - cv::putText(img, char_buff, cv::Point(10, 28), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 5, CV_AA); - cv::putText(img, char_buff, cv::Point(10, 28), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(200, 0, 0), 1, CV_AA); - - if ((std::fabs(old_precision - precision) > 0.1) || (max_precision < precision) || (current_batch - text_iteration_old) >= max_batches / 10) { - text_iteration_old = current_batch; - max_precision = std::max(max_precision, precision); - sprintf(char_buff, "%2.0f%% ", precision * 100); - cv::putText(img, char_buff, cv::Point(pt1.x - 30, draw_size * (1 - precision) + 15), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 5, CV_AA); - cv::putText(img, char_buff, cv::Point(pt1.x - 30, draw_size * (1 - precision) + 15), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(200, 0, 0), 1, CV_AA); - } - old_precision = precision; - iteration_old = current_batch; - } - sprintf(char_buff, "current avg loss = %2.4f iteration = %d approx. time left = %2.2f hours", avg_loss, current_batch, time_remaining); - pt1.x = 15, pt1.y = draw_size + 18; - pt2.x = pt1.x + 800, pt2.y = pt1.y + 20; - cv::rectangle(img, pt1, pt2, CV_RGB(255, 255, 255), CV_FILLED, 8, 0); - pt1.y += 15; - cv::putText(img, char_buff, pt1, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(0, 0, 100), 1, CV_AA); - - int k = 0; - if (!dont_show) { - cv::imshow(windows_name, img); - k = cv::waitKey(20); - } - static int old_batch = 0; - if (k == 's' || current_batch == (max_batches - 1) || (current_batch / 100 > old_batch / 100)) { - old_batch = current_batch; - save_mat_png(img, "chart.png"); - save_mat_png(img, windows_name); - cv::putText(img, "- Saved", cv::Point(260, img_size - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 0, 0), 1, CV_AA); - } - else - cv::putText(img, "- Saved", cv::Point(260, img_size - 10), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.7, CV_RGB(255, 255, 255), 1, CV_AA); - - if (mjpeg_port > 0) send_mjpeg((mat_cv *)&img, mjpeg_port, 500000, 70); - } - catch (...) { - cerr << "OpenCV exception: draw_train_loss() \n"; - } -} -// ---------------------------------------- - - -// ==================================================================== -// Data augmentation -// ==================================================================== - -extern "C" image image_data_augmentation(mat_cv* mat, int w, int h, - int pleft, int ptop, int swidth, int sheight, int flip, - float dhue, float dsat, float dexp, - int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth) -{ - image out; - try { - cv::Mat img = *(cv::Mat *)mat; - - // crop - cv::Rect src_rect(pleft, ptop, swidth, sheight); - cv::Rect img_rect(cv::Point2i(0, 0), img.size()); - cv::Rect new_src_rect = src_rect & img_rect; - - cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size()); - cv::Mat sized; - - if (src_rect.x == 0 && src_rect.y == 0 && src_rect.size() == img.size()) { - cv::resize(img, sized, cv::Size(w, h), 0, 0, cv::INTER_LINEAR); - } - else { - cv::Mat cropped(src_rect.size(), img.type()); - //cropped.setTo(cv::Scalar::all(0)); - cropped.setTo(cv::mean(img)); - - img(new_src_rect).copyTo(cropped(dst_rect)); - - // resize - cv::resize(cropped, sized, cv::Size(w, h), 0, 0, cv::INTER_LINEAR); - } - - // flip - if (flip) { - cv::Mat cropped; - cv::flip(sized, cropped, 1); // 0 - x-axis, 1 - y-axis, -1 - both axes (x & y) - sized = cropped.clone(); - } - - // HSV augmentation - // cv::COLOR_BGR2HSV, cv::COLOR_RGB2HSV, cv::COLOR_HSV2BGR, cv::COLOR_HSV2RGB - if (dsat != 1 || dexp != 1 || dhue != 0) { - if (img.channels() >= 3) - { - cv::Mat hsv_src; - cvtColor(sized, hsv_src, cv::COLOR_RGB2HSV); // RGB to HSV - - std::vector hsv; - cv::split(hsv_src, hsv); - - hsv[1] *= dsat; - hsv[2] *= dexp; - hsv[0] += 179 * dhue; - - cv::merge(hsv, hsv_src); - - cvtColor(hsv_src, sized, cv::COLOR_HSV2RGB); // HSV to RGB (the same as previous) - } - else - { - sized *= dexp; - } - } - - //std::stringstream window_name; - //window_name << "augmentation - " << ipl; - //cv::imshow(window_name.str(), sized); - //cv::waitKey(0); - - if (blur) { - cv::Mat dst(sized.size(), sized.type()); - if (blur == 1) { - cv::GaussianBlur(sized, dst, cv::Size(17, 17), 0); - //cv::bilateralFilter(sized, dst, 17, 75, 75); - } - else { - int ksize = (blur / 2) * 2 + 1; - cv::Size kernel_size = cv::Size(ksize, ksize); - cv::GaussianBlur(sized, dst, kernel_size, 0); - //cv::medianBlur(sized, dst, ksize); - //cv::bilateralFilter(sized, dst, ksize, 75, 75); - - // sharpen - //cv::Mat img_tmp; - //cv::GaussianBlur(dst, img_tmp, cv::Size(), 3); - //cv::addWeighted(dst, 1.5, img_tmp, -0.5, 0, img_tmp); - //dst = img_tmp; - } - //std::cout << " blur num_boxes = " << num_boxes << std::endl; - - if (blur == 1) { - cv::Rect img_rect(0, 0, sized.cols, sized.rows); - int t; - for (t = 0; t < num_boxes; ++t) { - box b = float_to_box_stride(truth + t*truth_size, 1); - if (!b.x) break; - int left = (b.x - b.w / 2.)*sized.cols; - int width = b.w*sized.cols; - int top = (b.y - b.h / 2.)*sized.rows; - int height = b.h*sized.rows; - cv::Rect roi(left, top, width, height); - roi = roi & img_rect; - - sized(roi).copyTo(dst(roi)); - } - } - dst.copyTo(sized); - } - - if (gaussian_noise) { - cv::Mat noise = cv::Mat(sized.size(), sized.type()); - gaussian_noise = std::min(gaussian_noise, 127); - gaussian_noise = std::max(gaussian_noise, 0); - cv::randn(noise, 0, gaussian_noise); //mean and variance - cv::Mat sized_norm = sized + noise; - //cv::normalize(sized_norm, sized_norm, 0.0, 255.0, cv::NORM_MINMAX, sized.type()); - //cv::imshow("source", sized); - //cv::imshow("gaussian noise", sized_norm); - //cv::waitKey(0); - sized = sized_norm; - } - - //char txt[100]; - //sprintf(txt, "blur = %d", blur); - //cv::putText(sized, txt, cv::Point(100, 100), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.7, CV_RGB(255, 0, 0), 1, CV_AA); - - // Mat -> image - out = mat_to_image(sized); - } - catch (...) { - cerr << "OpenCV can't augment image: " << w << " x " << h << " \n"; - out = mat_to_image(*(cv::Mat*)mat); - } - return out; -} - -// blend two images with (alpha and beta) -extern "C" void blend_images_cv(image new_img, float alpha, image old_img, float beta) -{ - cv::Mat new_mat(cv::Size(new_img.w, new_img.h), CV_32FC(new_img.c), new_img.data);// , size_t step = AUTO_STEP) - cv::Mat old_mat(cv::Size(old_img.w, old_img.h), CV_32FC(old_img.c), old_img.data); - cv::addWeighted(new_mat, alpha, old_mat, beta, 0.0, new_mat); -} - -// bilateralFilter bluring -extern "C" image blur_image(image src_img, int ksize) -{ - cv::Mat src = image_to_mat(src_img); - cv::Mat dst; - cv::Size kernel_size = cv::Size(ksize, ksize); - cv::GaussianBlur(src, dst, kernel_size, 0); - //cv::bilateralFilter(src, dst, ksize, 75, 75); - image dst_img = mat_to_image(dst); - return dst_img; -} - -// ==================================================================== -// Draw object - adversarial attack dnn -// ==================================================================== - -std::atomic x_start, y_start; -std::atomic x_end, y_end; -std::atomic x_size, y_size; -std::atomic draw_select, selected; - -void callback_mouse_click(int event, int x, int y, int flags, void* user_data) -{ - if (event == cv::EVENT_LBUTTONDOWN) - { - draw_select = true; - selected = false; - x_start = x; - y_start = y; - - //if (prev_img_rect.contains(Point2i(x, y))) add_id_img = -1; - //else if (next_img_rect.contains(Point2i(x, y))) add_id_img = 1; - //else add_id_img = 0; - //std::cout << "cv::EVENT_LBUTTONDOWN \n"; - } - else if (event == cv::EVENT_LBUTTONUP) - { - x_size = abs(x - x_start); - y_size = abs(y - y_start); - x_end = std::max(x, 0); - y_end = std::max(y, 0); - draw_select = false; - selected = true; - //std::cout << "cv::EVENT_LBUTTONUP \n"; - } - else if (event == cv::EVENT_MOUSEMOVE) - { - x_size = abs(x - x_start); - y_size = abs(y - y_start); - x_end = std::max(x, 0); - y_end = std::max(y, 0); - } -} - -extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int *boxonly, int classes, char **names) -{ - cv::Mat frame = image_to_mat(sized); - if(frame.channels() == 3) cv::cvtColor(frame, frame, cv::COLOR_RGB2BGR); - cv::Mat frame_clone = frame.clone(); - - - std::string const window_name = "Marking image"; - cv::namedWindow(window_name, cv::WINDOW_NORMAL); - cv::resizeWindow(window_name, 1280, 720); - cv::imshow(window_name, frame); - cv::moveWindow(window_name, 0, 0); - cv::setMouseCallback(window_name, callback_mouse_click); - - - int it_trackbar_value = 200; - std::string const it_trackbar_name = "iterations"; - int it_tb_res = cv::createTrackbar(it_trackbar_name, window_name, &it_trackbar_value, 1000); - - int lr_trackbar_value = 10; - std::string const lr_trackbar_name = "learning_rate exp"; - int lr_tb_res = cv::createTrackbar(lr_trackbar_name, window_name, &lr_trackbar_value, 20); - - int cl_trackbar_value = 0; - std::string const cl_trackbar_name = "class_id"; - int cl_tb_res = cv::createTrackbar(cl_trackbar_name, window_name, &cl_trackbar_value, classes-1); - - std::string const bo_trackbar_name = "box-only"; - int bo_tb_res = cv::createTrackbar(bo_trackbar_name, window_name, boxonly, 1); - - int i = 0; - - while (!selected) { -#ifndef CV_VERSION_EPOCH - int pressed_key = cv::waitKeyEx(20); // OpenCV 3.x -#else - int pressed_key = cv::waitKey(20); // OpenCV 2.x -#endif - if (pressed_key == 27 || pressed_key == 1048603) break;// break; // ESC - save & exit - - frame_clone = frame.clone(); - char buff[100]; - std::string lr_value = "learning_rate = " + std::to_string(1.0 / pow(2, lr_trackbar_value)); - cv::putText(frame_clone, lr_value, cv::Point2i(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(10, 50, 10), 3); - cv::putText(frame_clone, lr_value, cv::Point2i(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(20, 120, 60), 2); - cv::putText(frame_clone, lr_value, cv::Point2i(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(50, 200, 100), 1); - - if (names) { - std::string obj_name = names[cl_trackbar_value]; - cv::putText(frame_clone, obj_name, cv::Point2i(10, 40), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(10, 50, 10), 3); - cv::putText(frame_clone, obj_name, cv::Point2i(10, 40), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(20, 120, 60), 2); - cv::putText(frame_clone, obj_name, cv::Point2i(10, 40), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(50, 200, 100), 1); - } - - if (draw_select) { - cv::Rect selected_rect( - cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)), - cv::Size(x_size, y_size)); - - rectangle(frame_clone, selected_rect, cv::Scalar(150, 200, 150)); - } - - - cv::imshow(window_name, frame_clone); - } - - if (selected) { - cv::Rect selected_rect( - cv::Point2i((int)min(x_start, x_end), (int)min(y_start, y_end)), - cv::Size(x_size, y_size)); - - printf(" x_start = %d, y_start = %d, x_size = %d, y_size = %d \n", - x_start.load(), y_start.load(), x_size.load(), y_size.load()); - - rectangle(frame, selected_rect, cv::Scalar(150, 200, 150)); - cv::imshow(window_name, frame); - cv::waitKey(100); - - float width = x_end - x_start; - float height = y_end - y_start; - - float const relative_center_x = (float)(x_start + width / 2) / frame.cols; - float const relative_center_y = (float)(y_start + height / 2) / frame.rows; - float const relative_width = (float)width / frame.cols; - float const relative_height = (float)height / frame.rows; - - truth_cpu[i * 5 + 0] = relative_center_x; - truth_cpu[i * 5 + 1] = relative_center_y; - truth_cpu[i * 5 + 2] = relative_width; - truth_cpu[i * 5 + 3] = relative_height; - truth_cpu[i * 5 + 4] = cl_trackbar_value; - } - - *it_num_set = it_trackbar_value; - *lr_set = 1.0 / pow(2, lr_trackbar_value); -} - -// ==================================================================== -// Show Anchors -// ==================================================================== -extern "C" void show_acnhors(int number_of_boxes, int num_of_clusters, float *rel_width_height_array, model anchors_data, int width, int height) -{ - cv::Mat labels = cv::Mat(number_of_boxes, 1, CV_32SC1); - cv::Mat points = cv::Mat(number_of_boxes, 2, CV_32FC1); - cv::Mat centers = cv::Mat(num_of_clusters, 2, CV_32FC1); - - for (int i = 0; i < number_of_boxes; ++i) { - points.at(i, 0) = rel_width_height_array[i * 2]; - points.at(i, 1) = rel_width_height_array[i * 2 + 1]; - } - - for (int i = 0; i < num_of_clusters; ++i) { - centers.at(i, 0) = anchors_data.centers.vals[i][0]; - centers.at(i, 1) = anchors_data.centers.vals[i][1]; - } - - for (int i = 0; i < number_of_boxes; ++i) { - labels.at(i, 0) = anchors_data.assignments[i]; - } - - size_t img_size = 700; - cv::Mat img = cv::Mat(img_size, img_size, CV_8UC3); - - for (int i = 0; i < number_of_boxes; ++i) { - cv::Point pt; - pt.x = points.at(i, 0) * img_size / width; - pt.y = points.at(i, 1) * img_size / height; - int cluster_idx = labels.at(i, 0); - int red_id = (cluster_idx * (uint64_t)123 + 55) % 255; - int green_id = (cluster_idx * (uint64_t)321 + 33) % 255; - int blue_id = (cluster_idx * (uint64_t)11 + 99) % 255; - cv::circle(img, pt, 1, CV_RGB(red_id, green_id, blue_id), CV_FILLED, 8, 0); - //if(pt.x > img_size || pt.y > img_size) printf("\n pt.x = %d, pt.y = %d \n", pt.x, pt.y); - } - - for (int j = 0; j < num_of_clusters; ++j) { - cv::Point pt1, pt2; - pt1.x = pt1.y = 0; - pt2.x = centers.at(j, 0) * img_size / width; - pt2.y = centers.at(j, 1) * img_size / height; - cv::rectangle(img, pt1, pt2, CV_RGB(255, 255, 255), 1, 8, 0); - } - save_mat_png(img, "cloud.png"); - cv::imshow("clusters", img); - cv::waitKey(0); - cv::destroyAllWindows(); -} - -void show_opencv_info() -{ - std::cerr << " OpenCV version: " << CV_VERSION_MAJOR << "." << CV_VERSION_MINOR << "." << CVAUX_STR(CV_VERSION_REVISION) OCV_D - << std::endl; -} - - - -} // extern "C" -#else // OPENCV -extern "C" void show_opencv_info() -{ - std::cerr << " OpenCV isn't used - data augmentation will be slow \n"; -} -extern "C" int wait_key_cv(int delay) { return 0; } -extern "C" int wait_until_press_key_cv() { return 0; } -extern "C" void destroy_all_windows_cv() {} -extern "C" void resize_window_cv(char const* window_name, int width, int height) {} -#endif // OPENCV diff --git a/src/Detector/darknet/src/image_opencv.h b/src/Detector/darknet/src/image_opencv.h deleted file mode 100644 index 6fa6cb5c6..000000000 --- a/src/Detector/darknet/src/image_opencv.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef IMAGE_OPENCV_H -#define IMAGE_OPENCV_H - -#include "image.h" -#include "matrix.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef OPENCV - -// declaration -typedef void* mat_cv; -typedef void* cap_cv; -typedef void* write_cv; - -//typedef struct mat_cv mat_cv; -//typedef struct cap_cv cap_cv; -//typedef struct write_cv write_cv; - -// cv::Mat -mat_cv *load_image_mat_cv(const char *filename, int flag); -image load_image_cv(char *filename, int channels); -image load_image_resize(char *filename, int w, int h, int c, image *im); -int get_width_mat(mat_cv *mat); -int get_height_mat(mat_cv *mat); -void release_mat(mat_cv **mat); - -// IplImage - to delete -//int get_width_cv(mat_cv *ipl); -//int get_height_cv(mat_cv *ipl); -//void release_ipl(mat_cv **ipl); - -// image-to-ipl, ipl-to-image, image_to_mat, mat_to_image -//mat_cv *image_to_ipl(image im); // to delete -//image ipl_to_image(mat_cv* src_ptr); // to delete - - -// mat_cv *image_to_ipl(image im) -// image ipl_to_image(mat_cv* src_ptr) -// cv::Mat ipl_to_mat(IplImage *ipl) -// IplImage *mat_to_ipl(cv::Mat mat) -// Mat image_to_mat(image img) -// image mat_to_image(cv::Mat mat) -image mat_to_image_cv(mat_cv *mat); - -// Window -void create_window_cv(char const* window_name, int full_screen, int width, int height); -void resize_window_cv(char const* window_name, int width, int height); -void destroy_all_windows_cv(); -int wait_key_cv(int delay); -int wait_until_press_key_cv(); -void make_window(char *name, int w, int h, int fullscreen); -void show_image_cv(image p, const char *name); -//void show_image_cv_ipl(mat_cv *disp, const char *name); -void show_image_mat(mat_cv *mat_ptr, const char *name); - -// Video Writer -write_cv *create_video_writer(char *out_filename, char c1, char c2, char c3, char c4, int fps, int width, int height, int is_color); -void write_frame_cv(write_cv *output_video_writer, mat_cv *mat); -void release_video_writer(write_cv **output_video_writer); - - -//void *open_video_stream(const char *f, int c, int w, int h, int fps); -//image get_image_from_stream(void *p); -//image load_image_cv(char *filename, int channels); -//int show_image_cv(image im, const char* name, int ms); - -// Video Capture -cap_cv* get_capture_video_stream(const char *path); -cap_cv* get_capture_webcam(int index); -void release_capture(cap_cv* cap); - -mat_cv* get_capture_frame_cv(cap_cv *cap); -int get_stream_fps_cpp_cv(cap_cv *cap); -double get_capture_property_cv(cap_cv *cap, int property_id); -double get_capture_frame_count_cv(cap_cv *cap); -int set_capture_property_cv(cap_cv *cap, int property_id, double value); -int set_capture_position_frame_cv(cap_cv *cap, int index); - -// ... Video Capture -image get_image_from_stream_cpp(cap_cv *cap); -image get_image_from_stream_resize(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close); -image get_image_from_stream_letterbox(cap_cv *cap, int w, int h, int c, mat_cv** in_img, int dont_close); - - -// Image Saving -void save_cv_png(mat_cv *img, const char *name); -void save_cv_jpg(mat_cv *img, const char *name); - -// Draw Detection -void draw_detections_cv_v3(mat_cv* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output); - -// Draw Loss & Accuracy chart -mat_cv* draw_train_chart(char *windows_name, float max_img_loss, int max_batches, int number_of_lines, int img_size, int dont_show, char* chart_path); -void draw_train_loss(char *windows_name, mat_cv* img, int img_size, float avg_loss, float max_img_loss, int current_batch, int max_batches, - float precision, int draw_precision, char *accuracy_name, float contr_acc, int dont_show, int mjpeg_port, double time_remaining); - -// Data augmentation -image image_data_augmentation(mat_cv* mat, int w, int h, - int pleft, int ptop, int swidth, int sheight, int flip, - float dhue, float dsat, float dexp, - int gaussian_noise, int blur, int num_boxes, int truth_size, float *truth); - -// blend two images with (alpha and beta) -void blend_images_cv(image new_img, float alpha, image old_img, float beta); - -// bilateralFilter bluring -image blur_image(image src_img, int ksize); - -// draw objects for Adversarial attacks -void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int num_truth, int *it_num_set, float *lr_set, int *boxonly, int classes, char **names); - -// Show Anchors -void show_acnhors(int number_of_boxes, int num_of_clusters, float *rel_width_height_array, model anchors_data, int width, int height); - -void show_opencv_info(); - -#else // OPENCV - -void show_opencv_info(); -int wait_key_cv(int delay); -int wait_until_press_key_cv(); -void destroy_all_windows_cv(); -void resize_window_cv(char const* window_name, int width, int height); - -#endif // OPENCV - -#ifdef __cplusplus -} -#endif - -#endif // IMAGE_OPENCV_H diff --git a/src/Detector/darknet/src/layer.c b/src/Detector/darknet/src/layer.c deleted file mode 100644 index 032a24e0f..000000000 --- a/src/Detector/darknet/src/layer.c +++ /dev/null @@ -1,268 +0,0 @@ -#include "layer.h" -#include "dark_cuda.h" -#include - -void free_sublayer(layer *l) -{ - if (l) { - free_layer(*l); - free(l); - } -} - -void free_layer(layer l) -{ - free_layer_custom(l, 0); -} - -void free_layer_custom(layer l, int keep_cudnn_desc) -{ - if (l.share_layer != NULL) return; // don't free shared layers - if (l.antialiasing) { - free_sublayer(l.input_layer); - } - if (l.type == CONV_LSTM) { - if (l.peephole) { - free_sublayer(l.vf); - free_sublayer(l.vi); - free_sublayer(l.vo); - } - else { - free(l.vf); - free(l.vi); - free(l.vo); - } - free_sublayer(l.wf); - if (!l.bottleneck) { - free_sublayer(l.wi); - free_sublayer(l.wg); - free_sublayer(l.wo); - } - free_sublayer(l.uf); - free_sublayer(l.ui); - free_sublayer(l.ug); - free_sublayer(l.uo); - } - if (l.type == CRNN) { - free_sublayer(l.input_layer); - free_sublayer(l.self_layer); - free_sublayer(l.output_layer); - l.output = NULL; - l.delta = NULL; -#ifdef GPU - l.output_gpu = NULL; - l.delta_gpu = NULL; -#endif // GPU - } - if (l.type == DROPOUT) { - if (l.rand) free(l.rand); -#ifdef GPU - if (l.rand_gpu) cuda_free(l.rand_gpu); - if (l.drop_blocks_scale) cuda_free_host(l.drop_blocks_scale); - if (l.drop_blocks_scale_gpu) cuda_free(l.drop_blocks_scale_gpu); -#endif - return; - } - if (l.mask) free(l.mask); - if (l.classes_multipliers)free(l.classes_multipliers); - if (l.cweights) free(l.cweights); - if (l.indexes) free(l.indexes); - if (l.input_layers) free(l.input_layers); - if (l.input_sizes) free(l.input_sizes); - if (l.layers_output) free(l.layers_output); - if (l.layers_delta) free(l.layers_delta); - if (l.map) free(l.map); - if (l.rand) free(l.rand); - if (l.cost) free(l.cost); - if (l.labels && !l.detection) free(l.labels); - if (l.class_ids && !l.detection) free(l.class_ids); - if (l.cos_sim) free(l.cos_sim); - if (l.exp_cos_sim) free(l.exp_cos_sim); - if (l.p_constrastive) free(l.p_constrastive); - if (l.embedding_output) free(l.embedding_output); - if (l.state) free(l.state); - if (l.prev_state) free(l.prev_state); - if (l.forgot_state) free(l.forgot_state); - if (l.forgot_delta) free(l.forgot_delta); - if (l.state_delta) free(l.state_delta); - if (l.concat) free(l.concat); - if (l.concat_delta) free(l.concat_delta); - if (l.binary_weights) free(l.binary_weights); - if (l.biases) free(l.biases), l.biases = NULL; - if (l.bias_updates) free(l.bias_updates), l.bias_updates = NULL; - if (l.scales) free(l.scales), l.scales = NULL; - if (l.scale_updates) free(l.scale_updates), l.scale_updates = NULL; - if (l.biases_ema) free(l.biases_ema), l.biases = NULL; - if (l.scales_ema) free(l.scales_ema), l.scales = NULL; - if (l.weights_ema) free(l.weights_ema), l.weights = NULL; - if (l.weights) free(l.weights), l.weights = NULL; - if (l.weight_updates) free(l.weight_updates), l.weight_updates = NULL; - if (l.align_bit_weights) free(l.align_bit_weights); - if (l.mean_arr) free(l.mean_arr); -#ifdef GPU - if (l.delta && l.delta_pinned) { - cudaFreeHost(l.delta); - l.delta = NULL; - } - if (l.output && l.output_pinned) { - cudaFreeHost(l.output); - l.output = NULL; - } -#endif // GPU - if (l.delta) free(l.delta), l.delta = NULL; - if (l.output) free(l.output), l.output = NULL; - if (l.activation_input) free(l.activation_input), l.activation_input = NULL; - if (l.squared) free(l.squared); - if (l.norms) free(l.norms); - if (l.spatial_mean) free(l.spatial_mean); - if (l.mean) free(l.mean), l.mean = NULL; - if (l.variance) free(l.variance), l.variance = NULL; - if (l.mean_delta) free(l.mean_delta), l.mean_delta = NULL; - if (l.variance_delta) free(l.variance_delta), l.variance_delta = NULL; - if (l.rolling_mean) free(l.rolling_mean), l.rolling_mean = NULL; - if (l.rolling_variance) free(l.rolling_variance), l.rolling_variance = NULL; - if (l.x) free(l.x); - if (l.x_norm) free(l.x_norm); - if (l.m) free(l.m); - if (l.v) free(l.v); - if (l.z_cpu) free(l.z_cpu); - if (l.r_cpu) free(l.r_cpu); - if (l.binary_input) free(l.binary_input); - if (l.bin_re_packed_input) free(l.bin_re_packed_input); - if (l.t_bit_input) free(l.t_bit_input); - if (l.loss) free(l.loss); - - // CONV-LSTM - if (l.f_cpu) free(l.f_cpu); - if (l.i_cpu) free(l.i_cpu); - if (l.g_cpu) free(l.g_cpu); - if (l.o_cpu) free(l.o_cpu); - if (l.c_cpu) free(l.c_cpu); - if (l.h_cpu) free(l.h_cpu); - if (l.temp_cpu) free(l.temp_cpu); - if (l.temp2_cpu) free(l.temp2_cpu); - if (l.temp3_cpu) free(l.temp3_cpu); - if (l.dc_cpu) free(l.dc_cpu); - if (l.dh_cpu) free(l.dh_cpu); - if (l.prev_state_cpu) free(l.prev_state_cpu); - if (l.prev_cell_cpu) free(l.prev_cell_cpu); - if (l.stored_c_cpu) free(l.stored_c_cpu); - if (l.stored_h_cpu) free(l.stored_h_cpu); - if (l.cell_cpu) free(l.cell_cpu); - -#ifdef GPU - if (l.indexes_gpu) cuda_free((float *)l.indexes_gpu); - - if (l.contrast_p_gpu) cuda_free((float *)l.contrast_p_gpu); - if (l.z_gpu) cuda_free(l.z_gpu); - if (l.r_gpu) cuda_free(l.r_gpu); - if (l.m_gpu) cuda_free(l.m_gpu); - if (l.v_gpu) cuda_free(l.v_gpu); - if (l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); - if (l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); - if (l.state_gpu) cuda_free(l.state_gpu); - if (l.state_delta_gpu) cuda_free(l.state_delta_gpu); - if (l.gate_gpu) cuda_free(l.gate_gpu); - if (l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); - if (l.save_gpu) cuda_free(l.save_gpu); - if (l.save_delta_gpu) cuda_free(l.save_delta_gpu); - if (l.concat_gpu) cuda_free(l.concat_gpu); - if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); - if (l.binary_input_gpu) cuda_free(l.binary_input_gpu); - if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); - if (l.mean_gpu) cuda_free(l.mean_gpu), l.mean_gpu = NULL; - if (l.variance_gpu) cuda_free(l.variance_gpu), l.variance_gpu = NULL; - if (l.m_cbn_avg_gpu) cuda_free(l.m_cbn_avg_gpu), l.m_cbn_avg_gpu = NULL; - if (l.v_cbn_avg_gpu) cuda_free(l.v_cbn_avg_gpu), l.v_cbn_avg_gpu = NULL; - if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu), l.rolling_mean_gpu = NULL; - if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu), l.rolling_variance_gpu = NULL; - if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu), l.variance_delta_gpu = NULL; - if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu), l.mean_delta_gpu = NULL; - if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); - - // assisted excitation - if (l.gt_gpu) cuda_free(l.gt_gpu); - if (l.a_avg_gpu) cuda_free(l.a_avg_gpu); - - if (l.align_bit_weights_gpu) cuda_free((float *)l.align_bit_weights_gpu); - if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu); - if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); - if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu); - - if (l.weights_gpu) cuda_free(l.weights_gpu), l.weights_gpu = NULL; - if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu), l.weight_updates_gpu = NULL; - if (l.weight_deform_gpu) cuda_free(l.weight_deform_gpu), l.weight_deform_gpu = NULL; - if (l.weights_gpu16) cuda_free(l.weights_gpu16), l.weights_gpu16 = NULL; - if (l.weight_updates_gpu16) cuda_free(l.weight_updates_gpu16), l.weight_updates_gpu16 = NULL; - if (l.biases_gpu) cuda_free(l.biases_gpu), l.biases_gpu = NULL; - if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu), l.bias_updates_gpu = NULL; - if (l.scales_gpu) cuda_free(l.scales_gpu), l.scales_gpu = NULL; - if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu), l.scale_updates_gpu = NULL; - if (l.input_antialiasing_gpu) cuda_free(l.input_antialiasing_gpu), l.input_antialiasing_gpu = NULL; - if (l.optimized_memory < 2) { - if (l.x_gpu) cuda_free(l.x_gpu), l.x_gpu = NULL; - if (l.output_gpu) cuda_free(l.output_gpu), l.output_gpu = NULL; - if (l.output_avg_gpu) cuda_free(l.output_avg_gpu), l.output_avg_gpu = NULL; - if (l.activation_input_gpu) cuda_free(l.activation_input_gpu), l.activation_input_gpu = NULL; - } - if (l.delta_gpu && (l.optimized_memory < 1 || l.keep_delta_gpu && l.optimized_memory < 3)) cuda_free(l.delta_gpu), l.delta_gpu = NULL; - if (l.cos_sim_gpu) cuda_free(l.cos_sim_gpu); - if (l.rand_gpu) cuda_free(l.rand_gpu); - if (l.squared_gpu) cuda_free(l.squared_gpu); - if (l.norms_gpu) cuda_free(l.norms_gpu); - if (l.input_sizes_gpu) cuda_free((float*)l.input_sizes_gpu); - if (l.layers_output_gpu) cuda_free((float*)l.layers_output_gpu); - if (l.layers_delta_gpu) cuda_free((float*)l.layers_delta_gpu); - - // CONV-LSTM - if (l.f_gpu) cuda_free(l.f_gpu); - if (l.i_gpu) cuda_free(l.i_gpu); - if (l.g_gpu) cuda_free(l.g_gpu); - if (l.o_gpu) cuda_free(l.o_gpu); - if (l.c_gpu) cuda_free(l.c_gpu); - if (l.h_gpu) cuda_free(l.h_gpu); - if (l.bottelneck_hi_gpu) cuda_free(l.bottelneck_hi_gpu); - if (l.bottelneck_delta_gpu) cuda_free(l.bottelneck_delta_gpu); - if (l.temp_gpu) cuda_free(l.temp_gpu); - if (l.temp2_gpu) cuda_free(l.temp2_gpu); - if (l.temp3_gpu) cuda_free(l.temp3_gpu); - if (l.dc_gpu) cuda_free(l.dc_gpu); - if (l.dh_gpu) cuda_free(l.dh_gpu); - if (l.prev_state_gpu) cuda_free(l.prev_state_gpu); - if (l.prev_cell_gpu) cuda_free(l.prev_cell_gpu); - if (l.stored_c_gpu) cuda_free(l.stored_c_gpu); - if (l.stored_h_gpu) cuda_free(l.stored_h_gpu); - if (l.last_prev_state_gpu) cuda_free(l.last_prev_state_gpu); - if (l.last_prev_cell_gpu) cuda_free(l.last_prev_cell_gpu); - if (l.cell_gpu) cuda_free(l.cell_gpu); -#ifdef CUDNN // shouldn't be used for -map - if (!keep_cudnn_desc) { - if (l.srcTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.srcTensorDesc)); - if (l.dstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dstTensorDesc)); - if (l.srcTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.srcTensorDesc16)); - if (l.dstTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dstTensorDesc16)); - if (l.dsrcTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dsrcTensorDesc)); - if (l.ddstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.ddstTensorDesc)); - if (l.dsrcTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.dsrcTensorDesc16)); - if (l.ddstTensorDesc16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.ddstTensorDesc16)); - if (l.normTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normTensorDesc)); - if (l.normDstTensorDesc) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normDstTensorDesc)); - if (l.normDstTensorDescF16) CHECK_CUDNN(cudnnDestroyTensorDescriptor(l.normDstTensorDescF16)); - - if (l.weightDesc) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.weightDesc)); - if (l.weightDesc16) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.weightDesc16)); - if (l.dweightDesc) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.dweightDesc)); - if (l.dweightDesc16) CHECK_CUDNN(cudnnDestroyFilterDescriptor(l.dweightDesc16)); - - if (l.convDesc) CHECK_CUDNN(cudnnDestroyConvolutionDescriptor(l.convDesc)); - - if (l.poolingDesc) CHECK_CUDNN(cudnnDestroyPoolingDescriptor(l.poolingDesc)); - - //cudnnConvolutionFwdAlgo_t fw_algo, fw_algo16; - //cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; - //cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; - } -#endif // CUDNN - -#endif // GPU -} diff --git a/src/Detector/darknet/src/layer.h b/src/Detector/darknet/src/layer.h deleted file mode 100644 index e92d3b4a9..000000000 --- a/src/Detector/darknet/src/layer.h +++ /dev/null @@ -1,338 +0,0 @@ -#ifndef BASE_LAYER_H -#define BASE_LAYER_H - -#include "activations.h" -#include "stddef.h" -#include "tree.h" -#ifdef __cplusplus -extern "C" { -#endif - -//struct network_state; - -//struct layer; -//typedef struct layer layer; - -//typedef enum { -// CONVOLUTIONAL, -// DECONVOLUTIONAL, -// CONNECTED, -// MAXPOOL, -// SOFTMAX, -// DETECTION, -// DROPOUT, -// CROP, -// ROUTE, -// COST, -// NORMALIZATION, -// AVGPOOL, -// LOCAL, -// SHORTCUT, -// ACTIVE, -// RNN, -// GRU, -// CRNN, -// BATCHNORM, -// NETWORK, -// XNOR, -// REGION, -// YOLO, -// REORG, -// UPSAMPLE, -// REORG_OLD, -// BLANK -//} LAYER_TYPE; - -//typedef enum{ -// SSE, MASKED, SMOOTH -//} COST_TYPE; - -//typedef struct { -// int batch; -// float learning_rate; -// float momentum; -// float decay; -// int adam; -// float B1; -// float B2; -// float eps; -// int t; -//} update_args; - -/* -struct layer{ - LAYER_TYPE type; - ACTIVATION activation; - COST_TYPE cost_type; - void (*forward) (struct layer, struct network_state); - void (*backward) (struct layer, struct network_state); - void (*update) (struct layer, int, float, float, float); - void (*forward_gpu) (struct layer, struct network_state); - void (*backward_gpu) (struct layer, struct network_state); - void (*update_gpu) (struct layer, int, float, float, float); - int batch_normalize; - int shortcut; - int batch; - int forced; - int flipped; - int inputs; - int outputs; - int truths; - int h,w,c; - int out_h, out_w, out_c; - int n; - int max_boxes; - int groups; - int size; - int side; - int stride; - int reverse; - int spatial; - int pad; - int sqrt; - int flip; - int index; - int binary; - int xnor; - int use_bin_output; - int steps; - int hidden; - float dot; - float angle; - float jitter; - float saturation; - float exposure; - float shift; - float ratio; - float learning_rate_scale; - int focal_loss; - int noloss; - int softmax; - int classes; - int coords; - int background; - int rescore; - int objectness; - int does_cost; - int joint; - int noadjust; - int reorg; - int log; - int tanh; - int *mask; - int total; - float bflops; - - int adam; - float B1; - float B2; - float eps; - - int t; - float *m; - float *v; - float * bias_m; - float * bias_v; - float * scale_m; - float * scale_v; - - tree *softmax_tree; - int *map; - - float alpha; - float beta; - float kappa; - - float coord_scale; - float object_scale; - float noobject_scale; - float mask_scale; - float class_scale; - int bias_match; - int random; - float ignore_thresh; - float truth_thresh; - float thresh; - float focus; - int classfix; - int absolute; - - int onlyforward; - int stopbackward; - int dontload; - int dontloadscales; - - float temperature; - float probability; - float scale; - - int *indexes; - float *rand; - float *cost; - char *cweights; - float *state; - float *prev_state; - float *forgot_state; - float *forgot_delta; - float *state_delta; - - float *concat; - float *concat_delta; - - float *binary_weights; - - float *biases; - float *bias_updates; - - float *scales; - float *scale_updates; - - float *weights; - float *weight_updates; - - char *align_bit_weights_gpu; - float *mean_arr_gpu; - float *align_workspace_gpu; - float *transposed_align_workspace_gpu; - int align_workspace_size; - - char *align_bit_weights; - float *mean_arr; - int align_bit_weights_size; - int lda_align; - int new_lda; - int bit_align; - - float *col_image; - int * input_layers; - int * input_sizes; - float * delta; - float * output; - float * loss; - float * squared; - float * norms; - - float * spatial_mean; - float * mean; - float * variance; - - float * mean_delta; - float * variance_delta; - - float * rolling_mean; - float * rolling_variance; - - float * x; - float * x_norm; - - struct layer *input_layer; - struct layer *self_layer; - struct layer *output_layer; - - struct layer *input_gate_layer; - struct layer *state_gate_layer; - struct layer *input_save_layer; - struct layer *state_save_layer; - struct layer *input_state_layer; - struct layer *state_state_layer; - - struct layer *input_z_layer; - struct layer *state_z_layer; - - struct layer *input_r_layer; - struct layer *state_r_layer; - - struct layer *input_h_layer; - struct layer *state_h_layer; - - float *z_cpu; - float *r_cpu; - float *h_cpu; - - float *binary_input; - - size_t workspace_size; - -#ifdef GPU - float *z_gpu; - float *r_gpu; - float *h_gpu; - - int *indexes_gpu; - float * prev_state_gpu; - float * forgot_state_gpu; - float * forgot_delta_gpu; - float * state_gpu; - float * state_delta_gpu; - float * gate_gpu; - float * gate_delta_gpu; - float * save_gpu; - float * save_delta_gpu; - float * concat_gpu; - float * concat_delta_gpu; - - // adam - float *m_gpu; - float *v_gpu; - float *bias_m_gpu; - float *scale_m_gpu; - float *bias_v_gpu; - float *scale_v_gpu; - - float *binary_input_gpu; - float *binary_weights_gpu; - - float * mean_gpu; - float * variance_gpu; - - float * rolling_mean_gpu; - float * rolling_variance_gpu; - - float * variance_delta_gpu; - float * mean_delta_gpu; - - float * col_image_gpu; - - float * x_gpu; - float * x_norm_gpu; - float * weights_gpu; - float * weight_updates_gpu; - - float * weights_gpu16; - float * weight_updates_gpu16; - - float * biases_gpu; - float * bias_updates_gpu; - - float * scales_gpu; - float * scale_updates_gpu; - - float * output_gpu; - float * loss_gpu; - float * delta_gpu; - float * rand_gpu; - float * squared_gpu; - float * norms_gpu; - #ifdef CUDNN - cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; - cudnnTensorDescriptor_t srcTensorDesc16, dstTensorDesc16; - cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; - cudnnTensorDescriptor_t dsrcTensorDesc16, ddstTensorDesc16; - cudnnTensorDescriptor_t normTensorDesc, normDstTensorDesc, normDstTensorDescF16; - cudnnFilterDescriptor_t weightDesc, weightDesc16; - cudnnFilterDescriptor_t dweightDesc, dweightDesc16; - cudnnConvolutionDescriptor_t convDesc; - cudnnConvolutionFwdAlgo_t fw_algo, fw_algo16; - cudnnConvolutionBwdDataAlgo_t bd_algo, bd_algo16; - cudnnConvolutionBwdFilterAlgo_t bf_algo, bf_algo16; - cudnnPoolingDescriptor_t poolingDesc; - #endif // CUDNN -#endif // GPU -}; -*/ -//void free_layer(layer); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/list.c b/src/Detector/darknet/src/list.c deleted file mode 100644 index dbc70c3ae..000000000 --- a/src/Detector/darknet/src/list.c +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include -#include "list.h" -#include "utils.h" -#include "option_list.h" - -list *make_list() -{ - list* l = (list*)xmalloc(sizeof(list)); - l->size = 0; - l->front = 0; - l->back = 0; - return l; -} - -/* -void transfer_node(list *s, list *d, node *n) -{ - node *prev, *next; - prev = n->prev; - next = n->next; - if(prev) prev->next = next; - if(next) next->prev = prev; - --s->size; - if(s->front == n) s->front = next; - if(s->back == n) s->back = prev; -} -*/ - -void *list_pop(list *l){ - if(!l->back) return 0; - node *b = l->back; - void *val = b->val; - l->back = b->prev; - if(l->back) l->back->next = 0; - free(b); - --l->size; - - return val; -} - -void list_insert(list *l, void *val) -{ - node* newnode = (node*)xmalloc(sizeof(node)); - newnode->val = val; - newnode->next = 0; - - if(!l->back){ - l->front = newnode; - newnode->prev = 0; - }else{ - l->back->next = newnode; - newnode->prev = l->back; - } - l->back = newnode; - ++l->size; -} - -void free_node(node *n) -{ - node *next; - while(n) { - next = n->next; - free(n); - n = next; - } -} - -void free_list_val(list *l) -{ - node *n = l->front; - node *next; - while (n) { - next = n->next; - free(n->val); - n = next; - } -} - -void free_list(list *l) -{ - free_node(l->front); - free(l); -} - -void free_list_contents(list *l) -{ - node *n = l->front; - while(n){ - free(n->val); - n = n->next; - } -} - -void free_list_contents_kvp(list *l) -{ - node *n = l->front; - while (n) { - kvp* p = (kvp*)n->val; - free(p->key); - free(n->val); - n = n->next; - } -} - -void **list_to_array(list *l) -{ - void** a = (void**)xcalloc(l->size, sizeof(void*)); - int count = 0; - node *n = l->front; - while(n){ - a[count++] = n->val; - n = n->next; - } - return a; -} diff --git a/src/Detector/darknet/src/list.h b/src/Detector/darknet/src/list.h deleted file mode 100644 index 182648f73..000000000 --- a/src/Detector/darknet/src/list.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef LIST_H -#define LIST_H - -typedef struct node{ - void *val; - struct node *next; - struct node *prev; -} node; - -typedef struct list{ - int size; - node *front; - node *back; -} list; - -#ifdef __cplusplus -extern "C" { -#endif -list *make_list(); -int list_find(list *l, void *val); - -void list_insert(list *, void *); - -void **list_to_array(list *l); - -void free_list_val(list *l); -void free_list(list *l); -void free_list_contents(list *l); -void free_list_contents_kvp(list *l); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/local_layer.c b/src/Detector/darknet/src/local_layer.c deleted file mode 100644 index 88c7b1252..000000000 --- a/src/Detector/darknet/src/local_layer.c +++ /dev/null @@ -1,283 +0,0 @@ -#include "local_layer.h" -#include "utils.h" -#include "im2col.h" -#include "col2im.h" -#include "blas.h" -#include "gemm.h" -#include -#include - -int local_out_height(local_layer l) -{ - int h = l.h; - if (!l.pad) h -= l.size; - else h -= 1; - return h/l.stride + 1; -} - -int local_out_width(local_layer l) -{ - int w = l.w; - if (!l.pad) w -= l.size; - else w -= 1; - return w/l.stride + 1; -} - -local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation) -{ - int i; - local_layer l = { (LAYER_TYPE)0 }; - l.type = LOCAL; - - l.h = h; - l.w = w; - l.c = c; - l.n = n; - l.batch = batch; - l.stride = stride; - l.size = size; - l.pad = pad; - - int out_h = local_out_height(l); - int out_w = local_out_width(l); - int locations = out_h*out_w; - l.out_h = out_h; - l.out_w = out_w; - l.out_c = n; - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = l.w * l.h * l.c; - - l.weights = (float*)xcalloc(c * n * size * size * locations, sizeof(float)); - l.weight_updates = (float*)xcalloc(c * n * size * size * locations, sizeof(float)); - - l.biases = (float*)xcalloc(l.outputs, sizeof(float)); - l.bias_updates = (float*)xcalloc(l.outputs, sizeof(float)); - - // float scale = 1./sqrt(size*size*c); - float scale = sqrt(2./(size*size*c)); - for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1,1); - - l.col_image = (float*)xcalloc(out_h * out_w * size * size * c, sizeof(float)); - l.output = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float)); - l.delta = (float*)xcalloc(l.batch * out_h * out_w * n, sizeof(float)); - - l.forward = forward_local_layer; - l.backward = backward_local_layer; - l.update = update_local_layer; - -#ifdef GPU - l.forward_gpu = forward_local_layer_gpu; - l.backward_gpu = backward_local_layer_gpu; - l.update_gpu = update_local_layer_gpu; - - l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations); - l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations); - - l.biases_gpu = cuda_make_array(l.biases, l.outputs); - l.bias_updates_gpu = cuda_make_array(l.bias_updates, l.outputs); - - l.col_image_gpu = cuda_make_array(l.col_image, out_h*out_w*size*size*c); - l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); - l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); - -#endif - l.activation = activation; - - fprintf(stderr, "Local Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); - - return l; -} - -void forward_local_layer(const local_layer l, network_state state) -{ - int out_h = local_out_height(l); - int out_w = local_out_width(l); - int i, j; - int locations = out_h * out_w; - - for(i = 0; i < l.batch; ++i){ - copy_cpu(l.outputs, l.biases, 1, l.output + i*l.outputs, 1); - } - - for(i = 0; i < l.batch; ++i){ - float *input = state.input + i*l.w*l.h*l.c; - im2col_cpu(input, l.c, l.h, l.w, - l.size, l.stride, l.pad, l.col_image); - float *output = l.output + i*l.outputs; - for(j = 0; j < locations; ++j){ - float *a = l.weights + j*l.size*l.size*l.c*l.n; - float *b = l.col_image + j; - float *c = output + j; - - int m = l.n; - int n = 1; - int k = l.size*l.size*l.c; - - gemm(0,0,m,n,k,1,a,k,b,locations,1,c,locations); - } - } - activate_array(l.output, l.outputs*l.batch, l.activation); -} - -void backward_local_layer(local_layer l, network_state state) -{ - int i, j; - int locations = l.out_w*l.out_h; - - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - - for(i = 0; i < l.batch; ++i){ - axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1); - } - - for(i = 0; i < l.batch; ++i){ - float *input = state.input + i*l.w*l.h*l.c; - im2col_cpu(input, l.c, l.h, l.w, - l.size, l.stride, l.pad, l.col_image); - - for(j = 0; j < locations; ++j){ - float *a = l.delta + i*l.outputs + j; - float *b = l.col_image + j; - float *c = l.weight_updates + j*l.size*l.size*l.c*l.n; - int m = l.n; - int n = l.size*l.size*l.c; - int k = 1; - - gemm(0,1,m,n,k,1,a,locations,b,locations,1,c,n); - } - - if(state.delta){ - for(j = 0; j < locations; ++j){ - float *a = l.weights + j*l.size*l.size*l.c*l.n; - float *b = l.delta + i*l.outputs + j; - float *c = l.col_image + j; - - int m = l.size*l.size*l.c; - int n = 1; - int k = l.n; - - gemm(1,0,m,n,k,1,a,m,b,locations,0,c,locations); - } - - col2im_cpu(l.col_image, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w); - } - } -} - -void update_local_layer(local_layer l, int batch, float learning_rate, float momentum, float decay) -{ - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1); - scal_cpu(l.outputs, momentum, l.bias_updates, 1); - - axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1); - axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1); - scal_cpu(size, momentum, l.weight_updates, 1); -} - -#ifdef GPU - -void forward_local_layer_gpu(const local_layer l, network_state state) -{ - int out_h = local_out_height(l); - int out_w = local_out_width(l); - int i, j; - int locations = out_h * out_w; - - for(i = 0; i < l.batch; ++i){ - copy_ongpu(l.outputs, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); - } - - for(i = 0; i < l.batch; ++i){ - float *input = state.input + i*l.w*l.h*l.c; - im2col_ongpu(input, l.c, l.h, l.w, - l.size, l.stride, l.pad, l.col_image_gpu); - float *output = l.output_gpu + i*l.outputs; - for(j = 0; j < locations; ++j){ - float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; - float *b = l.col_image_gpu + j; - float *c = output + j; - - int m = l.n; - int n = 1; - int k = l.size*l.size*l.c; - - gemm_ongpu(0,0,m,n,k,1,a,k,b,locations,1,c,locations); - } - } - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); -} - -void backward_local_layer_gpu(local_layer l, network_state state) -{ - int i, j; - int locations = l.out_w*l.out_h; - - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - for(i = 0; i < l.batch; ++i){ - axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); - } - - for(i = 0; i < l.batch; ++i){ - float *input = state.input + i*l.w*l.h*l.c; - im2col_ongpu(input, l.c, l.h, l.w, - l.size, l.stride, l.pad, l.col_image_gpu); - - for(j = 0; j < locations; ++j){ - float *a = l.delta_gpu + i*l.outputs + j; - float *b = l.col_image_gpu + j; - float *c = l.weight_updates_gpu + j*l.size*l.size*l.c*l.n; - int m = l.n; - int n = l.size*l.size*l.c; - int k = 1; - - gemm_ongpu(0,1,m,n,k,1,a,locations,b,locations,1,c,n); - } - - if(state.delta){ - for(j = 0; j < locations; ++j){ - float *a = l.weights_gpu + j*l.size*l.size*l.c*l.n; - float *b = l.delta_gpu + i*l.outputs + j; - float *c = l.col_image_gpu + j; - - int m = l.size*l.size*l.c; - int n = 1; - int k = l.n; - - gemm_ongpu(1,0,m,n,k,1,a,m,b,locations,0,c,locations); - } - - col2im_ongpu(l.col_image_gpu, l.c, l.h, l.w, l.size, l.stride, l.pad, state.delta+i*l.c*l.h*l.w); - } - } -} - -void update_local_layer_gpu(local_layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - axpy_ongpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1); - scal_ongpu(l.outputs, momentum, l.bias_updates_gpu, 1); - - axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); - axpy_ongpu(size, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - scal_ongpu(size, momentum, l.weight_updates_gpu, 1); -} - -void pull_local_layer(local_layer l) -{ - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - cuda_pull_array(l.weights_gpu, l.weights, size); - cuda_pull_array(l.biases_gpu, l.biases, l.outputs); -} - -void push_local_layer(local_layer l) -{ - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - cuda_push_array(l.weights_gpu, l.weights, size); - cuda_push_array(l.biases_gpu, l.biases, l.outputs); -} -#endif diff --git a/src/Detector/darknet/src/local_layer.h b/src/Detector/darknet/src/local_layer.h deleted file mode 100644 index 45e02a5c0..000000000 --- a/src/Detector/darknet/src/local_layer.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef LOCAL_LAYER_H -#define LOCAL_LAYER_H - -#include "dark_cuda.h" -#include "image.h" -#include "activations.h" -#include "layer.h" -#include "network.h" - -typedef layer local_layer; - -#ifdef __cplusplus -extern "C" { -#endif -#ifdef GPU -void forward_local_layer_gpu(local_layer layer, network_state state); -void backward_local_layer_gpu(local_layer layer, network_state state); -void update_local_layer_gpu(local_layer layer, int batch, float learning_rate, float momentum, float decay, float loss_scale); - -void push_local_layer(local_layer layer); -void pull_local_layer(local_layer layer); -#endif - -local_layer make_local_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation); - -void forward_local_layer(const local_layer layer, network_state state); -void backward_local_layer(local_layer layer, network_state state); -void update_local_layer(local_layer layer, int batch, float learning_rate, float momentum, float decay); - -void bias_output(float *output, float *biases, int batch, int n, int size); -void backward_bias(float *bias_updates, float *delta, int batch, int n, int size); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/lstm_layer.c b/src/Detector/darknet/src/lstm_layer.c deleted file mode 100644 index a79455618..000000000 --- a/src/Detector/darknet/src/lstm_layer.c +++ /dev/null @@ -1,646 +0,0 @@ -#include "lstm_layer.h" -#include "connected_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -static void increment_layer(layer *l, int steps) -{ - int num = l->outputs*l->batch*steps; - l->output += num; - l->delta += num; - l->x += num; - l->x_norm += num; - -#ifdef GPU - l->output_gpu += num; - l->delta_gpu += num; - l->x_gpu += num; - l->x_norm_gpu += num; -#endif -} - -layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize) -{ - fprintf(stderr, "LSTM Layer: %d inputs, %d outputs\n", inputs, outputs); - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.batch = batch; - l.type = LSTM; - l.steps = steps; - l.inputs = inputs; - l.out_w = 1; - l.out_h = 1; - l.out_c = outputs; - - l.uf = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.uf->batch = batch; - if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; - - l.ui = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.ui->batch = batch; - if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; - - l.ug = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.ug->batch = batch; - if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; - - l.uo = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); - l.uo->batch = batch; - if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; - - l.wf = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.wf->batch = batch; - if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; - - l.wi = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.wi->batch = batch; - if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; - - l.wg = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.wg->batch = batch; - if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; - - l.wo = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); - l.wo->batch = batch; - if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; - - l.batch_normalize = batch_normalize; - l.outputs = outputs; - - l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float)); - l.state = (float*)xcalloc(outputs * batch, sizeof(float)); - - l.forward = forward_lstm_layer; - l.update = update_lstm_layer; - l.backward = backward_lstm_layer; - - l.prev_state_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.prev_cell_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.cell_cpu = (float*)xcalloc(batch*outputs*steps, sizeof(float)); - - l.f_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.i_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.g_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.o_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp2_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.temp3_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.dc_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - l.dh_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); - -#ifdef GPU - l.forward_gpu = forward_lstm_layer_gpu; - l.backward_gpu = backward_lstm_layer_gpu; - l.update_gpu = update_lstm_layer_gpu; - - //l.state_gpu = cuda_make_array(l.state, batch*l.outputs); - - l.output_gpu = cuda_make_array(0, batch*outputs*steps); - l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); - - l.prev_state_gpu = cuda_make_array(0, batch*outputs); - l.prev_cell_gpu = cuda_make_array(0, batch*outputs); - l.cell_gpu = cuda_make_array(0, batch*outputs*steps); - - l.f_gpu = cuda_make_array(0, batch*outputs); - l.i_gpu = cuda_make_array(0, batch*outputs); - l.g_gpu = cuda_make_array(0, batch*outputs); - l.o_gpu = cuda_make_array(0, batch*outputs); - l.c_gpu = cuda_make_array(0, batch*outputs); - l.h_gpu = cuda_make_array(0, batch*outputs); - l.temp_gpu = cuda_make_array(0, batch*outputs); - l.temp2_gpu = cuda_make_array(0, batch*outputs); - l.temp3_gpu = cuda_make_array(0, batch*outputs); - l.dc_gpu = cuda_make_array(0, batch*outputs); - l.dh_gpu = cuda_make_array(0, batch*outputs); -#ifdef CUDNN - /* - cudnnSetTensor4dDescriptor(l.wf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wf->out_c, l.wf->out_h, l.wf->out_w); - cudnnSetTensor4dDescriptor(l.wi->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wi->out_c, l.wi->out_h, l.wi->out_w); - cudnnSetTensor4dDescriptor(l.wg->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wg->out_c, l.wg->out_h, l.wg->out_w); - cudnnSetTensor4dDescriptor(l.wo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.wo->out_c, l.wo->out_h, l.wo->out_w); - - cudnnSetTensor4dDescriptor(l.uf->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uf->out_c, l.uf->out_h, l.uf->out_w); - cudnnSetTensor4dDescriptor(l.ui->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ui->out_c, l.ui->out_h, l.ui->out_w); - cudnnSetTensor4dDescriptor(l.ug->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.ug->out_c, l.ug->out_h, l.ug->out_w); - cudnnSetTensor4dDescriptor(l.uo->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, batch, l.uo->out_c, l.uo->out_h, l.uo->out_w); - */ -#endif - -#endif - - return l; -} - -void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - update_connected_layer(*(l.wf), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.wi), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.wg), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.wo), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.uf), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.ui), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.ug), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.uo), batch, learning_rate, momentum, decay); -} - -void forward_lstm_layer(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); - - fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); - fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); - if (state.train) { - fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = l.h_cpu; - forward_connected_layer(wf, s); - forward_connected_layer(wi, s); - forward_connected_layer(wg, s); - forward_connected_layer(wo, s); - - s.input = state.input; - forward_connected_layer(uf, s); - forward_connected_layer(ui, s); - forward_connected_layer(ug, s); - forward_connected_layer(uo, s); - - copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); - - copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); - - copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); - - copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); - - activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.g_cpu, l.outputs*l.batch, TANH); - activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); - - copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); - activate_array(l.h_cpu, l.outputs*l.batch, TANH); - mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); - copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); - - state.input += l.inputs*l.batch; - l.output += l.outputs*l.batch; - l.cell_cpu += l.outputs*l.batch; - - increment_layer(&wf, 1); - increment_layer(&wi, 1); - increment_layer(&wg, 1); - increment_layer(&wo, 1); - - increment_layer(&uf, 1); - increment_layer(&ui, 1); - increment_layer(&ug, 1); - increment_layer(&uo, 1); - } -} - -void backward_lstm_layer(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - increment_layer(&wf, l.steps - 1); - increment_layer(&wi, l.steps - 1); - increment_layer(&wg, l.steps - 1); - increment_layer(&wo, l.steps - 1); - - increment_layer(&uf, l.steps - 1); - increment_layer(&ui, l.steps - 1); - increment_layer(&ug, l.steps - 1); - increment_layer(&uo, l.steps - 1); - - state.input += l.inputs*l.batch*(l.steps - 1); - if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); - - l.output += l.outputs*l.batch*(l.steps - 1); - l.cell_cpu += l.outputs*l.batch*(l.steps - 1); - l.delta += l.outputs*l.batch*(l.steps - 1); - - for (i = l.steps - 1; i >= 0; --i) { - if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); - copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); - if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); - copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); - - l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; - - copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); - - copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); - - copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); - - copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); - axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); - - activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); - activate_array(l.g_cpu, l.outputs*l.batch, TANH); - activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); - - copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); - activate_array(l.temp_cpu, l.outputs*l.batch, TANH); - - copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); - mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); - - gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); - axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); - - copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); - activate_array(l.temp_cpu, l.outputs*l.batch, TANH); - mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); - gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); - s.input = l.prev_state_cpu; - s.delta = l.dh_cpu; - backward_connected_layer(wo, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer(uo, s); - - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); - gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); - s.input = l.prev_state_cpu; - s.delta = l.dh_cpu; - backward_connected_layer(wg, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer(ug, s); - - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); - gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); - s.input = l.prev_state_cpu; - s.delta = l.dh_cpu; - backward_connected_layer(wi, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer(ui, s); - - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); - gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); - s.input = l.prev_state_cpu; - s.delta = l.dh_cpu; - backward_connected_layer(wf, s); - - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer(uf, s); - - copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); - mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); - copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); - - state.input -= l.inputs*l.batch; - if (state.delta) state.delta -= l.inputs*l.batch; - l.output -= l.outputs*l.batch; - l.cell_cpu -= l.outputs*l.batch; - l.delta -= l.outputs*l.batch; - - increment_layer(&wf, -1); - increment_layer(&wi, -1); - increment_layer(&wg, -1); - increment_layer(&wo, -1); - - increment_layer(&uf, -1); - increment_layer(&ui, -1); - increment_layer(&ug, -1); - increment_layer(&uo, -1); - } -} - -#ifdef GPU -void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - update_connected_layer_gpu(*(l.wf), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.wi), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.wg), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.wo), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.uf), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.ui), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.ug), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.uo), batch, learning_rate, momentum, decay, loss_scale); -} - -void forward_lstm_layer_gpu(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); - fill_ongpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); - if (state.train) { - fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); - } - - for (i = 0; i < l.steps; ++i) { - s.input = l.h_gpu; - forward_connected_layer_gpu(wf, s); - forward_connected_layer_gpu(wi, s); - forward_connected_layer_gpu(wg, s); - forward_connected_layer_gpu(wo, s); - - s.input = state.input; - forward_connected_layer_gpu(uf, s); - forward_connected_layer_gpu(ui, s); - forward_connected_layer_gpu(ug, s); - forward_connected_layer_gpu(uo, s); - - copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); - - activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH); - activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); - - copy_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); - activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH); - mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); - - state.input += l.inputs*l.batch; - l.output_gpu += l.outputs*l.batch; - l.cell_gpu += l.outputs*l.batch; - - increment_layer(&wf, 1); - increment_layer(&wi, 1); - increment_layer(&wg, 1); - increment_layer(&wo, 1); - - increment_layer(&uf, 1); - increment_layer(&ui, 1); - increment_layer(&ug, 1); - increment_layer(&uo, 1); - } -} - -void backward_lstm_layer_gpu(layer l, network_state state) -{ - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer wf = *(l.wf); - layer wi = *(l.wi); - layer wg = *(l.wg); - layer wo = *(l.wo); - - layer uf = *(l.uf); - layer ui = *(l.ui); - layer ug = *(l.ug); - layer uo = *(l.uo); - - increment_layer(&wf, l.steps - 1); - increment_layer(&wi, l.steps - 1); - increment_layer(&wg, l.steps - 1); - increment_layer(&wo, l.steps - 1); - - increment_layer(&uf, l.steps - 1); - increment_layer(&ui, l.steps - 1); - increment_layer(&ug, l.steps - 1); - increment_layer(&uo, l.steps - 1); - - state.input += l.inputs*l.batch*(l.steps - 1); - if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); - - l.output_gpu += l.outputs*l.batch*(l.steps - 1); - l.cell_gpu += l.outputs*l.batch*(l.steps - 1); - l.delta_gpu += l.outputs*l.batch*(l.steps - 1); - - for (i = l.steps - 1; i >= 0; --i) { - if (i != 0) copy_ongpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); - if (i != 0) copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); - - l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; - - copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); - - copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); - axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); - - activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); - activate_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH); - activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); - - copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); - activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); - - copy_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); - - gradient_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); - axpy_ongpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); - - copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); - activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); - mul_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); - s.input = l.prev_state_gpu; - s.delta = l.dh_gpu; - backward_connected_layer_gpu(wo, s); - - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer_gpu(uo, s); - - copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); - s.input = l.prev_state_gpu; - s.delta = l.dh_gpu; - backward_connected_layer_gpu(wg, s); - - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer_gpu(ug, s); - - copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); - s.input = l.prev_state_gpu; - s.delta = l.dh_gpu; - backward_connected_layer_gpu(wi, s); - - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer_gpu(ui, s); - - copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); - gradient_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); - s.input = l.prev_state_gpu; - s.delta = l.dh_gpu; - backward_connected_layer_gpu(wf, s); - - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); - s.input = state.input; - s.delta = state.delta; - backward_connected_layer_gpu(uf, s); - - copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); - mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); - copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); - - state.input -= l.inputs*l.batch; - if (state.delta) state.delta -= l.inputs*l.batch; - l.output_gpu -= l.outputs*l.batch; - l.cell_gpu -= l.outputs*l.batch; - l.delta_gpu -= l.outputs*l.batch; - - increment_layer(&wf, -1); - increment_layer(&wi, -1); - increment_layer(&wg, -1); - increment_layer(&wo, -1); - - increment_layer(&uf, -1); - increment_layer(&ui, -1); - increment_layer(&ug, -1); - increment_layer(&uo, -1); - } -} -#endif diff --git a/src/Detector/darknet/src/lstm_layer.h b/src/Detector/darknet/src/lstm_layer.h deleted file mode 100644 index a116a83d6..000000000 --- a/src/Detector/darknet/src/lstm_layer.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef LSTM_LAYER_H -#define LSTM_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" -#define USET - -#ifdef __cplusplus -extern "C" { -#endif -layer make_lstm_layer(int batch, int inputs, int outputs, int steps, int batch_normalize); - -void forward_lstm_layer(layer l, network_state state); -void backward_lstm_layer(layer l, network_state state); -void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -#ifdef GPU -void forward_lstm_layer_gpu(layer l, network_state state); -void backward_lstm_layer_gpu(layer l, network_state state); -void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/matrix.c b/src/Detector/darknet/src/matrix.c deleted file mode 100644 index 715ee80c9..000000000 --- a/src/Detector/darknet/src/matrix.c +++ /dev/null @@ -1,332 +0,0 @@ -#include "matrix.h" -#include "utils.h" -#include -#include -#include -#include -#include - -void free_matrix(matrix m) -{ - int i; - for(i = 0; i < m.rows; ++i) free(m.vals[i]); - free(m.vals); -} - -float matrix_topk_accuracy(matrix truth, matrix guess, int k) -{ - int* indexes = (int*)xcalloc(k, sizeof(int)); - int n = truth.cols; - int i,j; - int correct = 0; - for(i = 0; i < truth.rows; ++i){ - top_k(guess.vals[i], n, k, indexes); - for(j = 0; j < k; ++j){ - int class_id = indexes[j]; - if(truth.vals[i][class_id]){ - ++correct; - break; - } - } - } - free(indexes); - return (float)correct/truth.rows; -} - -void scale_matrix(matrix m, float scale) -{ - int i,j; - for(i = 0; i < m.rows; ++i){ - for(j = 0; j < m.cols; ++j){ - m.vals[i][j] *= scale; - } - } -} - -matrix resize_matrix(matrix m, int size) -{ - int i; - if (m.rows == size) return m; - if (m.rows < size) { - m.vals = (float**)xrealloc(m.vals, size * sizeof(float*)); - for (i = m.rows; i < size; ++i) { - m.vals[i] = (float*)xcalloc(m.cols, sizeof(float)); - } - } else if (m.rows > size) { - for (i = size; i < m.rows; ++i) { - free(m.vals[i]); - } - m.vals = (float**)xrealloc(m.vals, size * sizeof(float*)); - } - m.rows = size; - return m; -} - -void matrix_add_matrix(matrix from, matrix to) -{ - assert(from.rows == to.rows && from.cols == to.cols); - int i,j; - for(i = 0; i < from.rows; ++i){ - for(j = 0; j < from.cols; ++j){ - to.vals[i][j] += from.vals[i][j]; - } - } -} - -matrix make_matrix(int rows, int cols) -{ - int i; - matrix m; - m.rows = rows; - m.cols = cols; - m.vals = (float**)xcalloc(m.rows, sizeof(float*)); - for(i = 0; i < m.rows; ++i){ - m.vals[i] = (float*)xcalloc(m.cols, sizeof(float)); - } - return m; -} - -matrix hold_out_matrix(matrix *m, int n) -{ - int i; - matrix h; - h.rows = n; - h.cols = m->cols; - h.vals = (float**)xcalloc(h.rows, sizeof(float*)); - for(i = 0; i < n; ++i){ - int index = rand()%m->rows; - h.vals[i] = m->vals[index]; - m->vals[index] = m->vals[--(m->rows)]; - } - return h; -} - -float *pop_column(matrix *m, int c) -{ - float* col = (float*)xcalloc(m->rows, sizeof(float)); - int i, j; - for(i = 0; i < m->rows; ++i){ - col[i] = m->vals[i][c]; - for(j = c; j < m->cols-1; ++j){ - m->vals[i][j] = m->vals[i][j+1]; - } - } - --m->cols; - return col; -} - -matrix csv_to_matrix(char *filename) -{ - FILE *fp = fopen(filename, "r"); - if(!fp) file_error(filename); - - matrix m; - m.cols = -1; - - char *line; - - int n = 0; - int size = 1024; - m.vals = (float**)xcalloc(size, sizeof(float*)); - while((line = fgetl(fp))){ - if(m.cols == -1) m.cols = count_fields(line); - if(n == size){ - size *= 2; - m.vals = (float**)xrealloc(m.vals, size * sizeof(float*)); - } - m.vals[n] = parse_fields(line, m.cols); - free(line); - ++n; - } - m.vals = (float**)xrealloc(m.vals, n * sizeof(float*)); - m.rows = n; - return m; -} - -void matrix_to_csv(matrix m) -{ - int i, j; - - for(i = 0; i < m.rows; ++i){ - for(j = 0; j < m.cols; ++j){ - if(j > 0) printf(","); - printf("%.17g", m.vals[i][j]); - } - printf("\n"); - } -} - -void print_matrix(matrix m) -{ - int i, j; - printf("%d X %d Matrix:\n",m.rows, m.cols); - printf(" __"); - for(j = 0; j < 16*m.cols-1; ++j) printf(" "); - printf("__ \n"); - - printf("| "); - for(j = 0; j < 16*m.cols-1; ++j) printf(" "); - printf(" |\n"); - - for(i = 0; i < m.rows; ++i){ - printf("| "); - for(j = 0; j < m.cols; ++j){ - printf("%15.7f ", m.vals[i][j]); - } - printf(" |\n"); - } - printf("|__"); - for(j = 0; j < 16*m.cols-1; ++j) printf(" "); - printf("__|\n"); -} - - -matrix make_matrix(int rows, int cols); - -void copy(float *x, float *y, int n); -float dist(float *x, float *y, int n); -int *sample(int n); - -int closest_center(float *datum, matrix centers) -{ - int j; - int best = 0; - float best_dist = dist(datum, centers.vals[best], centers.cols); - for (j = 0; j < centers.rows; ++j) { - float new_dist = dist(datum, centers.vals[j], centers.cols); - if (new_dist < best_dist) { - best_dist = new_dist; - best = j; - } - } - return best; -} - -float dist_to_closest_center(float *datum, matrix centers) -{ - int ci = closest_center(datum, centers); - return dist(datum, centers.vals[ci], centers.cols); -} - -int kmeans_expectation(matrix data, int *assignments, matrix centers) -{ - int i; - int converged = 1; - for (i = 0; i < data.rows; ++i) { - int closest = closest_center(data.vals[i], centers); - if (closest != assignments[i]) converged = 0; - assignments[i] = closest; - } - return converged; -} - -void kmeans_maximization(matrix data, int *assignments, matrix centers) -{ - matrix old_centers = make_matrix(centers.rows, centers.cols); - - int i, j; - int *counts = (int*)xcalloc(centers.rows, sizeof(int)); - for (i = 0; i < centers.rows; ++i) { - for (j = 0; j < centers.cols; ++j) { - old_centers.vals[i][j] = centers.vals[i][j]; - centers.vals[i][j] = 0; - } - } - for (i = 0; i < data.rows; ++i) { - ++counts[assignments[i]]; - for (j = 0; j < data.cols; ++j) { - centers.vals[assignments[i]][j] += data.vals[i][j]; - } - } - for (i = 0; i < centers.rows; ++i) { - if (counts[i]) { - for (j = 0; j < centers.cols; ++j) { - centers.vals[i][j] /= counts[i]; - } - } - } - - for (i = 0; i < centers.rows; ++i) { - for (j = 0; j < centers.cols; ++j) { - if(centers.vals[i][j] == 0) centers.vals[i][j] = old_centers.vals[i][j]; - } - } - free(counts); - free_matrix(old_centers); -} - - - -void random_centers(matrix data, matrix centers) { - int i; - int *s = sample(data.rows); - for (i = 0; i < centers.rows; ++i) { - copy(data.vals[s[i]], centers.vals[i], data.cols); - } - free(s); -} - -int *sample(int n) -{ - int i; - int* s = (int*)xcalloc(n, sizeof(int)); - for (i = 0; i < n; ++i) s[i] = i; - for (i = n - 1; i >= 0; --i) { - int swap = s[i]; - int index = rand() % (i + 1); - s[i] = s[index]; - s[index] = swap; - } - return s; -} - -float dist(float *x, float *y, int n) -{ - //printf(" x0 = %f, x1 = %f, y0 = %f, y1 = %f \n", x[0], x[1], y[0], y[1]); - float mw = (x[0] < y[0]) ? x[0] : y[0]; - float mh = (x[1] < y[1]) ? x[1] : y[1]; - float inter = mw*mh; - float sum = x[0] * x[1] + y[0] * y[1]; - float un = sum - inter; - float iou = inter / un; - return 1 - iou; -} - -void copy(float *x, float *y, int n) -{ - int i; - for (i = 0; i < n; ++i) y[i] = x[i]; -} - -model do_kmeans(matrix data, int k) -{ - matrix centers = make_matrix(k, data.cols); - int* assignments = (int*)xcalloc(data.rows, sizeof(int)); - //smart_centers(data, centers); - random_centers(data, centers); // IoU = 67.31% after kmeans - - /* - // IoU = 63.29%, anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 - centers.vals[0][0] = 10; centers.vals[0][1] = 13; - centers.vals[1][0] = 16; centers.vals[1][1] = 30; - centers.vals[2][0] = 33; centers.vals[2][1] = 23; - centers.vals[3][0] = 30; centers.vals[3][1] = 61; - centers.vals[4][0] = 62; centers.vals[4][1] = 45; - centers.vals[5][0] = 59; centers.vals[5][1] = 119; - centers.vals[6][0] = 116; centers.vals[6][1] = 90; - centers.vals[7][0] = 156; centers.vals[7][1] = 198; - centers.vals[8][0] = 373; centers.vals[8][1] = 326; - */ - - // range centers [min - max] using exp graph or Pyth example - if (k == 1) kmeans_maximization(data, assignments, centers); - int i; - for(i = 0; i < 1000 && !kmeans_expectation(data, assignments, centers); ++i) { - kmeans_maximization(data, assignments, centers); - } - printf("\n iterations = %d \n", i); - model m; - m.assignments = assignments; - m.centers = centers; - return m; -} diff --git a/src/Detector/darknet/src/matrix.h b/src/Detector/darknet/src/matrix.h deleted file mode 100644 index d565722cd..000000000 --- a/src/Detector/darknet/src/matrix.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifndef MATRIX_H -#define MATRIX_H -#include "darknet.h" - -//typedef struct matrix{ -// int rows, cols; -// float **vals; -//} matrix; - -typedef struct { - int *assignments; - matrix centers; -} model; - -#ifdef __cplusplus -extern "C" { -#endif - -model do_kmeans(matrix data, int k); -matrix make_matrix(int rows, int cols); -void free_matrix(matrix m); -void print_matrix(matrix m); - -matrix csv_to_matrix(char *filename); -void matrix_to_csv(matrix m); -matrix hold_out_matrix(matrix *m, int n); -float matrix_topk_accuracy(matrix truth, matrix guess, int k); -void matrix_add_matrix(matrix from, matrix to); -void scale_matrix(matrix m, float scale); -matrix resize_matrix(matrix m, int size); - -float *pop_column(matrix *m, int c); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/maxpool_layer.c b/src/Detector/darknet/src/maxpool_layer.c deleted file mode 100644 index 89ae55d46..000000000 --- a/src/Detector/darknet/src/maxpool_layer.c +++ /dev/null @@ -1,414 +0,0 @@ -#include "maxpool_layer.h" -#include "convolutional_layer.h" -#include "dark_cuda.h" -#include "utils.h" -#include "gemm.h" -#include - -image get_maxpool_image(maxpool_layer l) -{ - int h = l.out_h; - int w = l.out_w; - int c = l.c; - return float_to_image(w,h,c,l.output); -} - -image get_maxpool_delta(maxpool_layer l) -{ - int h = l.out_h; - int w = l.out_w; - int c = l.c; - return float_to_image(w,h,c,l.delta); -} - -void create_maxpool_cudnn_tensors(layer *l) -{ -#ifdef CUDNN - CHECK_CUDNN(cudnnCreatePoolingDescriptor(&l->poolingDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->srcTensorDesc)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&l->dstTensorDesc)); -#endif // CUDNN -} - -void cudnn_maxpool_setup(layer *l) -{ -#ifdef CUDNN - CHECK_CUDNN(cudnnSetPooling2dDescriptor( - l->poolingDesc, - CUDNN_POOLING_MAX, - CUDNN_NOT_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN - l->size, - l->size, - l->pad/2, //0, //l.pad, - l->pad/2, //0, //l.pad, - l->stride_x, - l->stride_y)); - - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); -#endif // CUDNN -} - - -void cudnn_local_avgpool_setup(layer *l) -{ -#ifdef CUDNN - CHECK_CUDNN(cudnnSetPooling2dDescriptor( - l->poolingDesc, - CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING, - CUDNN_NOT_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN - l->size, - l->size, - l->pad / 2, //0, //l.pad, - l->pad / 2, //0, //l.pad, - l->stride_x, - l->stride_y)); - - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w)); - CHECK_CUDNN(cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w)); -#endif // CUDNN -} - -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing, int avgpool, int train) -{ - maxpool_layer l = { (LAYER_TYPE)0 }; - l.avgpool = avgpool; - if (avgpool) l.type = LOCAL_AVGPOOL; - else l.type = MAXPOOL; - l.train = train; - - const int blur_stride_x = stride_x; - const int blur_stride_y = stride_y; - l.antialiasing = antialiasing; - if (antialiasing) { - stride_x = stride_y = l.stride = l.stride_x = l.stride_y = 1; // use stride=1 in host-layer - } - - l.batch = batch; - l.h = h; - l.w = w; - l.c = c; - l.pad = padding; - l.maxpool_depth = maxpool_depth; - l.out_channels = out_channels; - if (maxpool_depth) { - l.out_c = out_channels; - l.out_w = l.w; - l.out_h = l.h; - } - else { - l.out_w = (w + padding - size) / stride_x + 1; - l.out_h = (h + padding - size) / stride_y + 1; - l.out_c = c; - } - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = h*w*c; - l.size = size; - l.stride = stride_x; - l.stride_x = stride_x; - l.stride_y = stride_y; - int output_size = l.out_h * l.out_w * l.out_c * batch; - - if (train) { - if (!avgpool) l.indexes = (int*)xcalloc(output_size, sizeof(int)); - l.delta = (float*)xcalloc(output_size, sizeof(float)); - } - l.output = (float*)xcalloc(output_size, sizeof(float)); - if (avgpool) { - l.forward = forward_local_avgpool_layer; - l.backward = backward_local_avgpool_layer; - } - else { - l.forward = forward_maxpool_layer; - l.backward = backward_maxpool_layer; - } -#ifdef GPU - if (avgpool) { - l.forward_gpu = forward_local_avgpool_layer_gpu; - l.backward_gpu = backward_local_avgpool_layer_gpu; - } - else { - l.forward_gpu = forward_maxpool_layer_gpu; - l.backward_gpu = backward_maxpool_layer_gpu; - } - - if (train) { - if (!avgpool) l.indexes_gpu = cuda_make_int_array(output_size); - l.delta_gpu = cuda_make_array(l.delta, output_size); - } - l.output_gpu = cuda_make_array(l.output, output_size); - create_maxpool_cudnn_tensors(&l); - if (avgpool) cudnn_local_avgpool_setup(&l); - else cudnn_maxpool_setup(&l); - -#endif // GPU - l.bflops = (l.size*l.size*l.c * l.out_h*l.out_w) / 1000000000.; - if (avgpool) { - if (stride_x == stride_y) - fprintf(stderr, "avg %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - else - fprintf(stderr, "avg %2dx%2d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - } - else { - if (maxpool_depth) - fprintf(stderr, "max-depth %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - else if (stride_x == stride_y) - fprintf(stderr, "max %2dx%2d/%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - else - fprintf(stderr, "max %2dx%2d/%2dx%2d %4d x%4d x%4d -> %4d x%4d x%4d %5.3f BF\n", size, size, stride_x, stride_y, w, h, c, l.out_w, l.out_h, l.out_c, l.bflops); - } - - if (l.antialiasing) { - printf("AA: "); - l.input_layer = (layer*)calloc(1, sizeof(layer)); - int blur_size = 3; - int blur_pad = blur_size / 2; - if (l.antialiasing == 2) { - blur_size = 2; - blur_pad = 0; - } - *(l.input_layer) = make_convolutional_layer(batch, 1, l.out_h, l.out_w, l.out_c, l.out_c, l.out_c, blur_size, blur_stride_x, blur_stride_y, 1, blur_pad, LINEAR, 0, 0, 0, 0, 0, 1, 0, NULL, 0, 0, train); - const int blur_nweights = l.out_c * blur_size * blur_size; // (n / n) * n * blur_size * blur_size; - int i; - if (blur_size == 2) { - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - l.input_layer->weights[i + 0] = 1 / 4.f; - l.input_layer->weights[i + 1] = 1 / 4.f; - l.input_layer->weights[i + 2] = 1 / 4.f; - l.input_layer->weights[i + 3] = 1 / 4.f; - } - } - else { - for (i = 0; i < blur_nweights; i += (blur_size*blur_size)) { - l.input_layer->weights[i + 0] = 1 / 16.f; - l.input_layer->weights[i + 1] = 2 / 16.f; - l.input_layer->weights[i + 2] = 1 / 16.f; - - l.input_layer->weights[i + 3] = 2 / 16.f; - l.input_layer->weights[i + 4] = 4 / 16.f; - l.input_layer->weights[i + 5] = 2 / 16.f; - - l.input_layer->weights[i + 6] = 1 / 16.f; - l.input_layer->weights[i + 7] = 2 / 16.f; - l.input_layer->weights[i + 8] = 1 / 16.f; - } - } - for (i = 0; i < l.out_c; ++i) l.input_layer->biases[i] = 0; -#ifdef GPU - if (gpu_index >= 0) { - if (l.antialiasing) l.input_antialiasing_gpu = cuda_make_array(NULL, l.batch*l.outputs); - push_convolutional_layer(*(l.input_layer)); - } -#endif // GPU - } - - return l; -} - -void resize_maxpool_layer(maxpool_layer *l, int w, int h) -{ - l->h = h; - l->w = w; - l->inputs = h*w*l->c; - - l->out_w = (w + l->pad - l->size) / l->stride_x + 1; - l->out_h = (h + l->pad - l->size) / l->stride_y + 1; - l->outputs = l->out_w * l->out_h * l->out_c; - int output_size = l->outputs * l->batch; - - if (l->train) { - if (!l->avgpool) l->indexes = (int*)xrealloc(l->indexes, output_size * sizeof(int)); - l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float)); - } - l->output = (float*)xrealloc(l->output, output_size * sizeof(float)); - -#ifdef GPU - CHECK_CUDA(cudaFree(l->output_gpu)); - l->output_gpu = cuda_make_array(l->output, output_size); - - if (l->train) { - if (!l->avgpool) { - CHECK_CUDA(cudaFree((float *)l->indexes_gpu)); - l->indexes_gpu = cuda_make_int_array(output_size); - } - CHECK_CUDA(cudaFree(l->delta_gpu)); - l->delta_gpu = cuda_make_array(l->delta, output_size); - } - - if(l->avgpool) cudnn_local_avgpool_setup(l); - else cudnn_maxpool_setup(l); -#endif -} - -void forward_maxpool_layer(const maxpool_layer l, network_state state) -{ - if (l.maxpool_depth) - { - int b, i, j, k, g; - for (b = 0; b < l.batch; ++b) { - #pragma omp parallel for - for (i = 0; i < l.h; ++i) { - for (j = 0; j < l.w; ++j) { - for (g = 0; g < l.out_c; ++g) - { - int out_index = j + l.w*(i + l.h*(g + l.out_c*b)); - float max = -FLT_MAX; - int max_i = -1; - - for (k = g; k < l.c; k += l.out_c) - { - int in_index = j + l.w*(i + l.h*(k + l.c*b)); - float val = state.input[in_index]; - - max_i = (val > max) ? in_index : max_i; - max = (val > max) ? val : max; - } - l.output[out_index] = max; - if (l.indexes) l.indexes[out_index] = max_i; - } - } - } - } - return; - } - - - if (!state.train && l.stride_x == l.stride_y) { - forward_maxpool_layer_avx(state.input, l.output, l.indexes, l.size, l.w, l.h, l.out_w, l.out_h, l.c, l.pad, l.stride, l.batch); - } - else - { - - int b, i, j, k, m, n; - int w_offset = -l.pad / 2; - int h_offset = -l.pad / 2; - - int h = l.out_h; - int w = l.out_w; - int c = l.c; - - for (b = 0; b < l.batch; ++b) { - for (k = 0; k < c; ++k) { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - int out_index = j + w*(i + h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; - for (n = 0; n < l.size; ++n) { - for (m = 0; m < l.size; ++m) { - int cur_h = h_offset + i*l.stride_y + n; - int cur_w = w_offset + j*l.stride_x + m; - int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); - int valid = (cur_h >= 0 && cur_h < l.h && - cur_w >= 0 && cur_w < l.w); - float val = (valid != 0) ? state.input[index] : -FLT_MAX; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; - } - } - l.output[out_index] = max; - if (l.indexes) l.indexes[out_index] = max_i; - } - } - } - } - } - - if (l.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - s.input = l.output; - forward_convolutional_layer(*(l.input_layer), s); - //simple_copy_ongpu(l.outputs*l.batch, l.output, l.input_antialiasing); - memcpy(l.output, l.input_layer->output, l.input_layer->outputs * l.input_layer->batch * sizeof(float)); - } -} - -void backward_maxpool_layer(const maxpool_layer l, network_state state) -{ - int i; - int h = l.out_h; - int w = l.out_w; - int c = l.out_c; - #pragma omp parallel for - for(i = 0; i < h*w*c*l.batch; ++i){ - int index = l.indexes[i]; - state.delta[index] += l.delta[i]; - } -} - - -void forward_local_avgpool_layer(const maxpool_layer l, network_state state) -{ - int b, i, j, k, m, n; - int w_offset = -l.pad / 2; - int h_offset = -l.pad / 2; - - int h = l.out_h; - int w = l.out_w; - int c = l.c; - - for (b = 0; b < l.batch; ++b) { - for (k = 0; k < c; ++k) { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - int out_index = j + w*(i + h*(k + c*b)); - float avg = 0; - int counter = 0; - for (n = 0; n < l.size; ++n) { - for (m = 0; m < l.size; ++m) { - int cur_h = h_offset + i*l.stride_y + n; - int cur_w = w_offset + j*l.stride_x + m; - int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); - int valid = (cur_h >= 0 && cur_h < l.h && - cur_w >= 0 && cur_w < l.w); - if (valid) { - counter++; - avg += state.input[index]; - } - - } - } - l.output[out_index] = avg / counter; - } - } - } - } -} - -void backward_local_avgpool_layer(const maxpool_layer l, network_state state) -{ - - int b, i, j, k, m, n; - int w_offset = -l.pad / 2; - int h_offset = -l.pad / 2; - - int h = l.out_h; - int w = l.out_w; - int c = l.c; - - for (b = 0; b < l.batch; ++b) { - for (k = 0; k < c; ++k) { - for (i = 0; i < h; ++i) { - for (j = 0; j < w; ++j) { - int out_index = j + w*(i + h*(k + c*b)); - for (n = 0; n < l.size; ++n) { - for (m = 0; m < l.size; ++m) { - int cur_h = h_offset + i*l.stride_y + n; - int cur_w = w_offset + j*l.stride_x + m; - int index = cur_w + l.w*(cur_h + l.h*(k + b*l.c)); - int valid = (cur_h >= 0 && cur_h < l.h && - cur_w >= 0 && cur_w < l.w); - - if (valid) state.delta[index] += l.delta[out_index] / (l.size*l.size); - } - } - - } - } - } - } - -} \ No newline at end of file diff --git a/src/Detector/darknet/src/maxpool_layer.h b/src/Detector/darknet/src/maxpool_layer.h deleted file mode 100644 index 3c0061178..000000000 --- a/src/Detector/darknet/src/maxpool_layer.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef MAXPOOL_LAYER_H -#define MAXPOOL_LAYER_H - -#include "image.h" -#include "dark_cuda.h" -#include "layer.h" -#include "network.h" - -typedef layer maxpool_layer; - -#ifdef __cplusplus -extern "C" { -#endif -image get_maxpool_image(maxpool_layer l); -maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride_x, int stride_y, int padding, int maxpool_depth, int out_channels, int antialiasing, int avgpool, int train); -void resize_maxpool_layer(maxpool_layer *l, int w, int h); -void forward_maxpool_layer(const maxpool_layer l, network_state state); -void backward_maxpool_layer(const maxpool_layer l, network_state state); - -void forward_local_avgpool_layer(const maxpool_layer l, network_state state); -void backward_local_avgpool_layer(const maxpool_layer l, network_state state); - -#ifdef GPU -void forward_maxpool_layer_gpu(maxpool_layer l, network_state state); -void backward_maxpool_layer_gpu(maxpool_layer l, network_state state); -void cudnn_maxpool_setup(maxpool_layer *l); - -void forward_local_avgpool_layer_gpu(maxpool_layer layer, network_state state); -void backward_local_avgpool_layer_gpu(maxpool_layer layer, network_state state); -#endif // GPU - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/maxpool_layer_kernels.cu b/src/Detector/darknet/src/maxpool_layer_kernels.cu deleted file mode 100644 index ab39d6b57..000000000 --- a/src/Detector/darknet/src/maxpool_layer_kernels.cu +++ /dev/null @@ -1,387 +0,0 @@ -#include -#include -#include - -#include "maxpool_layer.h" -#include "convolutional_layer.h" -#include "blas.h" -#include "dark_cuda.h" - -__global__ void forward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int out_c, int batch, float *input, float *output, int *indexes) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - int j = id % w; - id = id / w; - int i = id % h; - id = id / h; - //int g = id % out_c; - //id = id / out_c; - int b = id % batch; - - int k; - for (int g = 0; g < out_c; ++g) - { - int out_index = j + w*(i + h*(g + out_c*b)); - float max = -FLT_MAX; - int max_i = -1; - - for (k = g; k < c; k += out_c) - { - int in_index = j + w*(i + h*(k + c*b)); - float val = input[in_index]; - - max_i = (val > max) ? in_index : max_i; - max = (val > max) ? val : max; - } - output[out_index] = max; - if (indexes) indexes[out_index] = max_i; - } -} - - -__global__ void backward_maxpool_depth_layer_kernel(int n, int w, int h, int c, int batch, float *delta, float *prev_delta, int *indexes) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - int index = indexes[id]; - prev_delta[index] += delta[id]; -} - - -__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output, int *indexes) -{ - int h = (in_h + pad - size) / stride_y + 1; - int w = (in_w + pad - size) / stride_x + 1; - int c = in_c; - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int j = id % w; - id /= w; - int i = id % h; - id /= h; - int k = id % c; - id /= c; - int b = id; - - int w_offset = -pad / 2; - int h_offset = -pad / 2; - - int out_index = j + w*(i + h*(k + c*b)); - float max = -INFINITY; - int max_i = -1; - int l, m; - for(l = 0; l < size; ++l){ - for(m = 0; m < size; ++m){ - int cur_h = h_offset + i*stride_y + l; - int cur_w = w_offset + j*stride_x + m; - int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); - int valid = (cur_h >= 0 && cur_h < in_h && - cur_w >= 0 && cur_w < in_w); - float val = (valid != 0) ? input[index] : -INFINITY; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; - } - } - output[out_index] = max; - if (indexes) indexes[out_index] = max_i; -} - -__global__ void forward_zero_nonmax_kernel(int n, float *input, float *output) -{ - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - if (input[id] != output[id]) output[id] = 0; -} - -__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta, int *indexes) -{ - int h = (in_h + pad - size) / stride_y + 1; - int w = (in_w + pad - size) / stride_x + 1; - int c = in_c; - int area_x = (size - 1) / stride_x; - int area_y = (size - 1) / stride_y; - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int index = id; - int j = id % in_w; - id /= in_w; - int i = id % in_h; - id /= in_h; - int k = id % in_c; - id /= in_c; - int b = id; - - int w_offset = -pad / 2; - int h_offset = -pad / 2; - - float d = 0; - int l, m; - for(l = -area_y; l < area_y+1; ++l){ - for(m = -area_x; m < area_x+1; ++m){ - int out_w = (j-w_offset)/stride_x + m; - int out_h = (i-h_offset)/stride_y + l; - int out_index = out_w + w*(out_h + h*(k + c*b)); - int valid = (out_w >= 0 && out_w < w && - out_h >= 0 && out_h < h); - d += (valid && indexes[out_index] == index) ? delta[out_index] : 0; - } - } - prev_delta[index] += d; -} - -__global__ void backward_zero_nonmax_kernel(int n, int *indexes, float *prev_delta) -{ - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - if (indexes[id] != id) prev_delta[id] = 0; -} -extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state) -{ - if (layer.maxpool_depth) { - int h = layer.out_h; - int w = layer.out_w; - int c = 1;// layer.out_c; - - size_t n = h*w*c*layer.batch; - - forward_maxpool_depth_layer_kernel << > >( - n, layer.w, layer.h, layer.c, layer.out_c, layer.batch, state.input, layer.output_gpu, layer.indexes_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - - return; - } - -#ifdef CUDNN_DISABLED - if (!state.train && layer.stride == layer.size) { - // cudnnPoolingBackward - cudnnStatus_t maxpool_status; - - float alpha = 1, beta = 0; - maxpool_status = cudnnPoolingForward( - cudnn_handle(), - layer.poolingDesc, - &alpha, - layer.srcTensorDesc, - state.input, - &beta, - layer.dstTensorDesc, - layer.output_gpu); - - //maxpool_status = cudnnDestroyPoolingDescriptor(poolingDesc); - //cudnnDestroyTensorDescriptor(layer.srcTensorDesc); - //cudnnDestroyTensorDescriptor(layer.dstTensorDesc); - - } - else -#endif - { - int h = layer.out_h; - int w = layer.out_w; - int c = layer.out_c; - - size_t n = h*w*c*layer.batch; - - forward_maxpool_layer_kernel << > > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - - if (layer.maxpool_zero_nonmax) { - forward_zero_nonmax_kernel << > > (n, state.input, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - } - } - - if (layer.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - s.input = layer.output_gpu; - forward_convolutional_layer_gpu(*(layer.input_layer), s); - simple_copy_ongpu(layer.outputs*layer.batch, layer.output_gpu, layer.input_antialiasing_gpu); - simple_copy_ongpu(layer.input_layer->outputs*layer.input_layer->batch, layer.input_layer->output_gpu, layer.output_gpu); - } -} - -extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) -{ - if (layer.antialiasing) { - network_state s = { 0 }; - s.train = state.train; - s.workspace = state.workspace; - s.net = state.net; - s.delta = layer.delta_gpu; // s.delta will be returned to l.delta_gpu - s.input = layer.input_antialiasing_gpu; - //if (!state.train) s.index = state.index; // don't use TC for training (especially without cuda_convert_f32_to_f16() ) - simple_copy_ongpu(layer.input_layer->outputs*layer.input_layer->batch, layer.delta_gpu, layer.input_layer->delta_gpu); - backward_convolutional_layer_gpu(*(layer.input_layer), s); - - //simple_copy_ongpu(layer.outputs*layer.batch, layer.input_antialiasing_gpu, layer.output_gpu); - } - - if (layer.maxpool_depth) { - int h = layer.out_h; - int w = layer.out_w; - int c = layer.out_c; - - size_t n = h * w * c * layer.batch; - - backward_maxpool_depth_layer_kernel << > >(n, layer.w, layer.h, layer.c, layer.batch, layer.delta_gpu, state.delta, layer.indexes_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - return; - } - - size_t n = layer.h*layer.w*layer.c*layer.batch; - - backward_maxpool_layer_kernel<<>>(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - - if (layer.maxpool_zero_nonmax) { - backward_zero_nonmax_kernel << > > (n, layer.indexes_gpu, state.delta); - CHECK_CUDA(cudaPeekAtLastError()); - } -} - - - - -__global__ void forward_local_avgpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *input, float *output) -{ - int h = (in_h + pad - size) / stride_y + 1; - int w = (in_w + pad - size) / stride_x + 1; - int c = in_c; - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - int j = id % w; - id /= w; - int i = id % h; - id /= h; - int k = id % c; - id /= c; - int b = id; - - int w_offset = -pad / 2; - int h_offset = -pad / 2; - - int out_index = j + w*(i + h*(k + c*b)); - float avg = 0; - int counter = 0; - int l, m; - for (l = 0; l < size; ++l) { - for (m = 0; m < size; ++m) { - int cur_h = h_offset + i*stride_y + l; - int cur_w = w_offset + j*stride_x + m; - int index = cur_w + in_w*(cur_h + in_h*(k + b*in_c)); - int valid = (cur_h >= 0 && cur_h < in_h && - cur_w >= 0 && cur_w < in_w); - if (valid) { - counter++; - avg += input[index]; - } - } - } - output[out_index] = avg / counter; // as CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING -} - - -__global__ void backward_local_avgpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride_x, int stride_y, int size, int pad, float *delta, float *prev_delta) -{ - int h = (in_h + pad - size) / stride_y + 1; - int w = (in_w + pad - size) / stride_x + 1; - int c = in_c; - int area_x = (size - 1) / stride_x; - int area_y = (size - 1) / stride_y; - - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if (id >= n) return; - - int index = id; - int j = id % in_w; - id /= in_w; - int i = id % in_h; - id /= in_h; - int k = id % in_c; - id /= in_c; - int b = id; - - int w_offset = -pad / 2; - int h_offset = -pad / 2; - - int counter = 0; - float d = 0; - int l, m; - for (l = -area_y; l < area_y + 1; ++l) { - for (m = -area_x; m < area_x + 1; ++m) { - int out_w = (j - w_offset) / stride_x + m; - int out_h = (i - h_offset) / stride_y + l; - int out_index = out_w + w*(out_h + h*(k + c*b)); - int valid = (out_w >= 0 && out_w < w && out_h >= 0 && out_h < h); - if (valid) { - counter++; - d += delta[out_index]; - } - } - } - if(counter > 0) prev_delta[index] += d / counter; -} - - - -extern "C" void forward_local_avgpool_layer_gpu(maxpool_layer layer, network_state state) -{ - -#ifdef CUDNN_DISABLED - if (!state.train && layer.stride == layer.size) { - // cudnnPoolingBackward - cudnnStatus_t maxpool_status; - - float alpha = 1, beta = 0; - maxpool_status = cudnnPoolingForward( - cudnn_handle(), - layer.poolingDesc, - &alpha, - layer.srcTensorDesc, - state.input, - &beta, - layer.dstTensorDesc, - layer.output_gpu); - - //maxpool_status = cudnnDestroyPoolingDescriptor(poolingDesc); - //cudnnDestroyTensorDescriptor(layer.srcTensorDesc); - //cudnnDestroyTensorDescriptor(layer.dstTensorDesc); - - } - else -#endif - { - int h = layer.out_h; - int w = layer.out_w; - int c = layer.out_c; - - size_t n = h*w*c*layer.batch; - - forward_local_avgpool_layer_kernel << > > (n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, state.input, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - } -} - -extern "C" void backward_local_avgpool_layer_gpu(maxpool_layer layer, network_state state) -{ - size_t n = layer.h*layer.w*layer.c*layer.batch; - - backward_local_avgpool_layer_kernel << > >(n, layer.h, layer.w, layer.c, layer.stride_x, layer.stride_y, layer.size, layer.pad, layer.delta_gpu, state.delta); - CHECK_CUDA(cudaPeekAtLastError()); -} diff --git a/src/Detector/darknet/src/network.c b/src/Detector/darknet/src/network.c deleted file mode 100644 index d42a212b4..000000000 --- a/src/Detector/darknet/src/network.c +++ /dev/null @@ -1,1666 +0,0 @@ -#include "darknet.h" - -#include -#include -#include - -#include "network.h" -#include "image.h" -#include "data.h" -#include "utils.h" -#include "blas.h" - -#include "crop_layer.h" -#include "connected_layer.h" -#include "gru_layer.h" -#include "rnn_layer.h" -#include "crnn_layer.h" -#include "conv_lstm_layer.h" -#include "local_layer.h" -#include "convolutional_layer.h" -#include "activation_layer.h" -#include "detection_layer.h" -#include "region_layer.h" -#include "normalization_layer.h" -#include "batchnorm_layer.h" -#include "maxpool_layer.h" -#include "reorg_layer.h" -#include "reorg_old_layer.h" -#include "avgpool_layer.h" -#include "cost_layer.h" -#include "softmax_layer.h" -#include "dropout_layer.h" -#include "route_layer.h" -#include "shortcut_layer.h" -#include "scale_channels_layer.h" -#include "sam_layer.h" -#include "yolo_layer.h" -#include "gaussian_yolo_layer.h" -#include "upsample_layer.h" -#include "parser.h" - -load_args get_base_args(network *net) -{ - load_args args = { 0 }; - args.w = net->w; - args.h = net->h; - args.size = net->w; - - args.min = net->min_crop; - args.max = net->max_crop; - args.angle = net->angle; - args.aspect = net->aspect; - args.exposure = net->exposure; - args.center = net->center; - args.saturation = net->saturation; - args.hue = net->hue; - return args; -} - -int64_t get_current_iteration(network net) -{ - return *net.cur_iteration; -} - -int get_current_batch(network net) -{ - int batch_num = (*net.seen)/(net.batch*net.subdivisions); - return batch_num; -} - -/* -void reset_momentum(network net) -{ - if (net.momentum == 0) return; - net.learning_rate = 0; - net.momentum = 0; - net.decay = 0; - #ifdef GPU - //if(net.gpu_index >= 0) update_network_gpu(net); - #endif -} -*/ - -void reset_network_state(network *net, int b) -{ - int i; - for (i = 0; i < net->n; ++i) { -#ifdef GPU - layer l = net->layers[i]; - if (l.state_gpu) { - fill_ongpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); - } - if (l.h_gpu) { - fill_ongpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1); - } -#endif - } -} - -void reset_rnn(network *net) -{ - reset_network_state(net, 0); -} - -float get_current_seq_subdivisions(network net) -{ - int sequence_subdivisions = net.init_sequential_subdivisions; - - if (net.num_steps > 0) - { - int batch_num = get_current_batch(net); - int i; - for (i = 0; i < net.num_steps; ++i) { - if (net.steps[i] > batch_num) break; - sequence_subdivisions *= net.seq_scales[i]; - } - } - if (sequence_subdivisions < 1) sequence_subdivisions = 1; - if (sequence_subdivisions > net.subdivisions) sequence_subdivisions = net.subdivisions; - return sequence_subdivisions; -} - -int get_sequence_value(network net) -{ - int sequence = 1; - if (net.sequential_subdivisions != 0) sequence = net.subdivisions / net.sequential_subdivisions; - if (sequence < 1) sequence = 1; - return sequence; -} - -float get_current_rate(network net) -{ - int batch_num = get_current_batch(net); - int i; - float rate; - if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); - switch (net.policy) { - case CONSTANT: - return net.learning_rate; - case STEP: - return net.learning_rate * pow(net.scale, batch_num/net.step); - case STEPS: - rate = net.learning_rate; - for(i = 0; i < net.num_steps; ++i){ - if(net.steps[i] > batch_num) return rate; - rate *= net.scales[i]; - //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net); - } - return rate; - case EXP: - return net.learning_rate * pow(net.gamma, batch_num); - case POLY: - return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); - //if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power); - //return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power); - case RANDOM: - return net.learning_rate * pow(rand_uniform(0,1), net.power); - case SIG: - return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step)))); - case SGDR: - { - int last_iteration_start = 0; - int cycle_size = net.batches_per_cycle; - while ((last_iteration_start + cycle_size) < batch_num) - { - last_iteration_start += cycle_size; - cycle_size *= net.batches_cycle_mult; - } - rate = net.learning_rate_min + - 0.5*(net.learning_rate - net.learning_rate_min) - * (1. + cos((float)(batch_num - last_iteration_start)*3.14159265 / cycle_size)); - - return rate; - } - default: - fprintf(stderr, "Policy is weird!\n"); - return net.learning_rate; - } -} - -char *get_layer_string(LAYER_TYPE a) -{ - switch(a){ - case CONVOLUTIONAL: - return "convolutional"; - case ACTIVE: - return "activation"; - case LOCAL: - return "local"; - case DECONVOLUTIONAL: - return "deconvolutional"; - case CONNECTED: - return "connected"; - case RNN: - return "rnn"; - case GRU: - return "gru"; - case LSTM: - return "lstm"; - case CRNN: - return "crnn"; - case MAXPOOL: - return "maxpool"; - case REORG: - return "reorg"; - case AVGPOOL: - return "avgpool"; - case SOFTMAX: - return "softmax"; - case DETECTION: - return "detection"; - case REGION: - return "region"; - case YOLO: - return "yolo"; - case GAUSSIAN_YOLO: - return "Gaussian_yolo"; - case DROPOUT: - return "dropout"; - case CROP: - return "crop"; - case COST: - return "cost"; - case ROUTE: - return "route"; - case SHORTCUT: - return "shortcut"; - case SCALE_CHANNELS: - return "scale_channels"; - case SAM: - return "sam"; - case NORMALIZATION: - return "normalization"; - case BATCHNORM: - return "batchnorm"; - default: - break; - } - return "none"; -} - -network make_network(int n) -{ - network net = {0}; - net.n = n; - net.layers = (layer*)xcalloc(net.n, sizeof(layer)); - net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t)); - net.badlabels_reject_threshold = (float*)xcalloc(1, sizeof(float)); - net.delta_rolling_max = (float*)xcalloc(1, sizeof(float)); - net.delta_rolling_avg = (float*)xcalloc(1, sizeof(float)); - net.delta_rolling_std = (float*)xcalloc(1, sizeof(float)); - net.cur_iteration = (int*)xcalloc(1, sizeof(int)); - net.total_bbox = (int*)xcalloc(1, sizeof(int)); - net.rewritten_bbox = (int*)xcalloc(1, sizeof(int)); - *net.rewritten_bbox = *net.total_bbox = 0; -#ifdef GPU - net.input_gpu = (float**)xcalloc(1, sizeof(float*)); - net.truth_gpu = (float**)xcalloc(1, sizeof(float*)); - - net.input16_gpu = (float**)xcalloc(1, sizeof(float*)); - net.output16_gpu = (float**)xcalloc(1, sizeof(float*)); - net.max_input16_size = (size_t*)xcalloc(1, sizeof(size_t)); - net.max_output16_size = (size_t*)xcalloc(1, sizeof(size_t)); -#endif - return net; -} - -void forward_network(network net, network_state state) -{ - state.workspace = net.workspace; - int i; - for(i = 0; i < net.n; ++i){ - state.index = i; - layer l = net.layers[i]; - if(l.delta && state.train){ - scal_cpu(l.outputs * l.batch, 0, l.delta, 1); - } - //double time = get_time_point(); - l.forward(l, state); - //printf("%d - Predicted in %lf milli-seconds.\n", i, ((double)get_time_point() - time) / 1000); - state.input = l.output; - - /* - float avg_val = 0; - int k; - for (k = 0; k < l.outputs; ++k) avg_val += l.output[k]; - printf(" i: %d - avg_val = %f \n", i, avg_val / l.outputs); - */ - } -} - -void update_network(network net) -{ - int i; - int update_batch = net.batch*net.subdivisions; - float rate = get_current_rate(net); - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - if(l.update){ - l.update(l, update_batch, rate, net.momentum, net.decay); - } - } -} - -float *get_network_output(network net) -{ -#ifdef GPU - if (gpu_index >= 0) return get_network_output_gpu(net); -#endif - int i; - for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break; - return net.layers[i].output; -} - -float get_network_cost(network net) -{ - int i; - float sum = 0; - int count = 0; - for(i = 0; i < net.n; ++i){ - if(net.layers[i].cost){ - sum += net.layers[i].cost[0]; - ++count; - } - } - return sum/count; -} - -int get_predicted_class_network(network net) -{ - float *out = get_network_output(net); - int k = get_network_output_size(net); - return max_index(out, k); -} - -void backward_network(network net, network_state state) -{ - int i; - float *original_input = state.input; - float *original_delta = state.delta; - state.workspace = net.workspace; - for(i = net.n-1; i >= 0; --i){ - state.index = i; - if(i == 0){ - state.input = original_input; - state.delta = original_delta; - }else{ - layer prev = net.layers[i-1]; - state.input = prev.output; - state.delta = prev.delta; - } - layer l = net.layers[i]; - if (l.stopbackward) break; - if (l.onlyforward) continue; - l.backward(l, state); - } -} - -float train_network_datum(network net, float *x, float *y) -{ -#ifdef GPU - if(gpu_index >= 0) return train_network_datum_gpu(net, x, y); -#endif - network_state state={0}; - *net.seen += net.batch; - state.index = 0; - state.net = net; - state.input = x; - state.delta = 0; - state.truth = y; - state.train = 1; - forward_network(net, state); - backward_network(net, state); - float error = get_network_cost(net); - //if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net); - if(*(state.net.total_bbox) > 0) - fprintf(stderr, " total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox)); - return error; -} - -float train_network_sgd(network net, data d, int n) -{ - int batch = net.batch; - float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float)); - float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float)); - - int i; - float sum = 0; - for(i = 0; i < n; ++i){ - get_random_batch(d, batch, X, y); - net.current_subdivision = i; - float err = train_network_datum(net, X, y); - sum += err; - } - free(X); - free(y); - return (float)sum/(n*batch); -} - -float train_network(network net, data d) -{ - return train_network_waitkey(net, d, 0); -} - -float train_network_waitkey(network net, data d, int wait_key) -{ - assert(d.X.rows % net.batch == 0); - int batch = net.batch; - int n = d.X.rows / batch; - float* X = (float*)xcalloc(batch * d.X.cols, sizeof(float)); - float* y = (float*)xcalloc(batch * d.y.cols, sizeof(float)); - - int i; - float sum = 0; - for(i = 0; i < n; ++i){ - get_next_batch(d, batch, i*batch, X, y); - net.current_subdivision = i; - float err = train_network_datum(net, X, y); - sum += err; - if(wait_key) wait_key_cv(5); - } - (*net.cur_iteration) += 1; -#ifdef GPU - update_network_gpu(net); -#else // GPU - update_network(net); -#endif // GPU - - int ema_start_point = net.max_batches / 2; - - if (net.ema_alpha && (*net.cur_iteration) >= ema_start_point) - { - int ema_period = (net.max_batches - ema_start_point - 1000) * (1.0 - net.ema_alpha); - int ema_apply_point = net.max_batches - 1000; - - if (!is_ema_initialized(net)) - { - ema_update(net, 0); // init EMA - printf(" EMA initialization \n"); - } - - if ((*net.cur_iteration) == ema_apply_point) - { - ema_apply(net); // apply EMA (BN rolling mean/var recalculation is required) - printf(" ema_apply() \n"); - } - else - if ((*net.cur_iteration) < ema_apply_point && - (*net.cur_iteration) % ema_period == 0) - { - ema_update(net, net.ema_alpha); // update EMA - printf(" ema_update(), ema_alpha = %f \n", net.ema_alpha); - } - } - - - int reject_stop_point = net.max_batches*3/4; - - if ((*net.cur_iteration) < reject_stop_point && - net.weights_reject_freq && - (*net.cur_iteration) % net.weights_reject_freq == 0) - { - float sim_threshold = 0.4; - reject_similar_weights(net, sim_threshold); - } - - - free(X); - free(y); - return (float)sum/(n*batch); -} - - -float train_network_batch(network net, data d, int n) -{ - int i,j; - network_state state={0}; - state.index = 0; - state.net = net; - state.train = 1; - state.delta = 0; - float sum = 0; - int batch = 2; - for(i = 0; i < n; ++i){ - for(j = 0; j < batch; ++j){ - int index = random_gen()%d.X.rows; - state.input = d.X.vals[index]; - state.truth = d.y.vals[index]; - forward_network(net, state); - backward_network(net, state); - sum += get_network_cost(net); - } - update_network(net); - } - return (float)sum/(n*batch); -} - -int recalculate_workspace_size(network *net) -{ -#ifdef GPU - cuda_set_device(net->gpu_index); - if (gpu_index >= 0) cuda_free(net->workspace); -#endif - int i; - size_t workspace_size = 0; - for (i = 0; i < net->n; ++i) { - layer l = net->layers[i]; - //printf(" %d: layer = %d,", i, l.type); - if (l.type == CONVOLUTIONAL) { - l.workspace_size = get_convolutional_workspace_size(l); - } - else if (l.type == CONNECTED) { - l.workspace_size = get_connected_workspace_size(l); - } - if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; - net->layers[i] = l; - } - -#ifdef GPU - if (gpu_index >= 0) { - printf("\n try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000); - net->workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); - printf(" CUDA allocate done! \n"); - } - else { - free(net->workspace); - net->workspace = (float*)xcalloc(1, workspace_size); - } -#else - free(net->workspace); - net->workspace = (float*)xcalloc(1, workspace_size); -#endif - //fprintf(stderr, " Done!\n"); - return 0; -} - -void set_batch_network(network *net, int b) -{ - net->batch = b; - int i; - for(i = 0; i < net->n; ++i){ - net->layers[i].batch = b; - -#ifdef CUDNN - if(net->layers[i].type == CONVOLUTIONAL){ - cudnn_convolutional_setup(net->layers + i, cudnn_fastest, 0); - } - else if (net->layers[i].type == MAXPOOL) { - cudnn_maxpool_setup(net->layers + i); - } -#endif - - } - recalculate_workspace_size(net); // recalculate workspace size -} - -int resize_network(network *net, int w, int h) -{ -#ifdef GPU - cuda_set_device(net->gpu_index); - if(gpu_index >= 0){ - cuda_free(net->workspace); - if (net->input_gpu) { - cuda_free(*net->input_gpu); - *net->input_gpu = 0; - cuda_free(*net->truth_gpu); - *net->truth_gpu = 0; - } - - if (net->input_state_gpu) cuda_free(net->input_state_gpu); - if (net->input_pinned_cpu) { - if (net->input_pinned_cpu_flag) cudaFreeHost(net->input_pinned_cpu); - else free(net->input_pinned_cpu); - } - } -#endif - int i; - //if(w == net->w && h == net->h) return 0; - net->w = w; - net->h = h; - int inputs = 0; - size_t workspace_size = 0; - //fprintf(stderr, "Resizing to %d x %d...\n", w, h); - //fflush(stderr); - for (i = 0; i < net->n; ++i){ - layer l = net->layers[i]; - //printf(" (resize %d: layer = %d) , ", i, l.type); - if(l.type == CONVOLUTIONAL){ - resize_convolutional_layer(&l, w, h); - } - else if (l.type == CRNN) { - resize_crnn_layer(&l, w, h); - }else if (l.type == CONV_LSTM) { - resize_conv_lstm_layer(&l, w, h); - }else if(l.type == CROP){ - resize_crop_layer(&l, w, h); - }else if(l.type == MAXPOOL){ - resize_maxpool_layer(&l, w, h); - }else if (l.type == LOCAL_AVGPOOL) { - resize_maxpool_layer(&l, w, h); - }else if (l.type == BATCHNORM) { - resize_batchnorm_layer(&l, w, h); - }else if(l.type == REGION){ - resize_region_layer(&l, w, h); - }else if (l.type == YOLO) { - resize_yolo_layer(&l, w, h); - }else if (l.type == GAUSSIAN_YOLO) { - resize_gaussian_yolo_layer(&l, w, h); - }else if(l.type == ROUTE){ - resize_route_layer(&l, net); - }else if (l.type == SHORTCUT) { - resize_shortcut_layer(&l, w, h, net); - }else if (l.type == SCALE_CHANNELS) { - resize_scale_channels_layer(&l, net); - }else if (l.type == SAM) { - resize_sam_layer(&l, w, h); - }else if (l.type == DROPOUT) { - resize_dropout_layer(&l, inputs); - l.out_w = l.w = w; - l.out_h = l.h = h; - l.output = net->layers[i - 1].output; - l.delta = net->layers[i - 1].delta; -#ifdef GPU - l.output_gpu = net->layers[i-1].output_gpu; - l.delta_gpu = net->layers[i-1].delta_gpu; -#endif - }else if (l.type == UPSAMPLE) { - resize_upsample_layer(&l, w, h); - }else if(l.type == REORG){ - resize_reorg_layer(&l, w, h); - } else if (l.type == REORG_OLD) { - resize_reorg_old_layer(&l, w, h); - }else if(l.type == AVGPOOL){ - resize_avgpool_layer(&l, w, h); - }else if(l.type == NORMALIZATION){ - resize_normalization_layer(&l, w, h); - }else if(l.type == COST){ - resize_cost_layer(&l, inputs); - }else{ - fprintf(stderr, "Resizing type %d \n", (int)l.type); - error("Cannot resize this type of layer"); - } - if(l.workspace_size > workspace_size) workspace_size = l.workspace_size; - inputs = l.outputs; - net->layers[i] = l; - //if(l.type != DROPOUT) - { - w = l.out_w; - h = l.out_h; - } - //if(l.type == AVGPOOL) break; - } -#ifdef GPU - const int size = get_network_input_size(*net) * net->batch; - if(gpu_index >= 0){ - printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000); - net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); - net->input_state_gpu = cuda_make_array(0, size); - if (cudaSuccess == cudaHostAlloc(&net->input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) - net->input_pinned_cpu_flag = 1; - else { - cudaGetLastError(); // reset CUDA-error - net->input_pinned_cpu = (float*)xcalloc(size, sizeof(float)); - net->input_pinned_cpu_flag = 0; - } - printf(" CUDA allocate done! \n"); - }else { - free(net->workspace); - net->workspace = (float*)xcalloc(1, workspace_size); - if(!net->input_pinned_cpu_flag) - net->input_pinned_cpu = (float*)xrealloc(net->input_pinned_cpu, size * sizeof(float)); - } -#else - free(net->workspace); - net->workspace = (float*)xcalloc(1, workspace_size); -#endif - //fprintf(stderr, " Done!\n"); - return 0; -} - -int get_network_output_size(network net) -{ - int i; - for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break; - return net.layers[i].outputs; -} - -int get_network_input_size(network net) -{ - return net.layers[0].inputs; -} - -detection_layer get_network_detection_layer(network net) -{ - int i; - for(i = 0; i < net.n; ++i){ - if(net.layers[i].type == DETECTION){ - return net.layers[i]; - } - } - fprintf(stderr, "Detection layer not found!!\n"); - detection_layer l = { (LAYER_TYPE)0 }; - return l; -} - -image get_network_image_layer(network net, int i) -{ - layer l = net.layers[i]; - if (l.out_w && l.out_h && l.out_c){ - return float_to_image(l.out_w, l.out_h, l.out_c, l.output); - } - image def = {0}; - return def; -} - -layer* get_network_layer(network* net, int i) -{ - return net->layers + i; -} - -image get_network_image(network net) -{ - int i; - for(i = net.n-1; i >= 0; --i){ - image m = get_network_image_layer(net, i); - if(m.h != 0) return m; - } - image def = {0}; - return def; -} - -void visualize_network(network net) -{ - image *prev = 0; - int i; - char buff[256]; - for(i = 0; i < net.n; ++i){ - sprintf(buff, "Layer %d", i); - layer l = net.layers[i]; - if(l.type == CONVOLUTIONAL){ - prev = visualize_convolutional_layer(l, buff, prev); - } - } -} - -void top_predictions(network net, int k, int *index) -{ - int size = get_network_output_size(net); - float *out = get_network_output(net); - top_k(out, size, k, index); -} - -// A version of network_predict that uses a pointer for the network -// struct to make the python binding work properly. -float *network_predict_ptr(network *net, float *input) -{ - return network_predict(*net, input); -} - -float *network_predict(network net, float *input) -{ -#ifdef GPU - if(gpu_index >= 0) return network_predict_gpu(net, input); -#endif - - network_state state = {0}; - state.net = net; - state.index = 0; - state.input = input; - state.truth = 0; - state.train = 0; - state.delta = 0; - forward_network(net, state); - float *out = get_network_output(net); - return out; -} - -int num_detections(network *net, float thresh) -{ - int i; - int s = 0; - for (i = 0; i < net->n; ++i) { - layer l = net->layers[i]; - if (l.type == YOLO) { - s += yolo_num_detections(l, thresh); - } - if (l.type == GAUSSIAN_YOLO) { - s += gaussian_yolo_num_detections(l, thresh); - } - if (l.type == DETECTION || l.type == REGION) { - s += l.w*l.h*l.n; - } - } - return s; -} - -int num_detections_batch(network *net, float thresh, int batch) -{ - int i; - int s = 0; - for (i = 0; i < net->n; ++i) { - layer l = net->layers[i]; - if (l.type == YOLO) { - s += yolo_num_detections_batch(l, thresh, batch); - } - if (l.type == DETECTION || l.type == REGION) { - s += l.w*l.h*l.n; - } - } - return s; -} - -detection *make_network_boxes(network *net, float thresh, int *num) -{ - int i; - layer l = net->layers[net->n - 1]; - for (i = 0; i < net->n; ++i) { - layer l_tmp = net->layers[i]; - if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) { - l = l_tmp; - break; - } - } - - int nboxes = num_detections(net, thresh); - if (num) *num = nboxes; - detection* dets = (detection*)xcalloc(nboxes, sizeof(detection)); - for (i = 0; i < nboxes; ++i) { - dets[i].prob = (float*)xcalloc(l.classes, sizeof(float)); - // tx,ty,tw,th uncertainty - if(l.type == GAUSSIAN_YOLO) dets[i].uc = (float*)xcalloc(4, sizeof(float)); // Gaussian_YOLOv3 - else dets[i].uc = NULL; - - if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); - else dets[i].mask = NULL; - - if(l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); - else dets[i].embeddings = NULL; - dets[i].embedding_size = l.embedding_size; - } - return dets; -} - -detection *make_network_boxes_batch(network *net, float thresh, int *num, int batch) -{ - int i; - layer l = net->layers[net->n - 1]; - for (i = 0; i < net->n; ++i) { - layer l_tmp = net->layers[i]; - if (l_tmp.type == YOLO || l_tmp.type == GAUSSIAN_YOLO || l_tmp.type == DETECTION || l_tmp.type == REGION) { - l = l_tmp; - break; - } - } - - int nboxes = num_detections_batch(net, thresh, batch); - assert(num != NULL); - *num = nboxes; - detection* dets = (detection*)calloc(nboxes, sizeof(detection)); - for (i = 0; i < nboxes; ++i) { - dets[i].prob = (float*)calloc(l.classes, sizeof(float)); - // tx,ty,tw,th uncertainty - if (l.type == GAUSSIAN_YOLO) dets[i].uc = (float*)xcalloc(4, sizeof(float)); // Gaussian_YOLOv3 - else dets[i].uc = NULL; - - if (l.coords > 4) dets[i].mask = (float*)xcalloc(l.coords - 4, sizeof(float)); - else dets[i].mask = NULL; - - if (l.embedding_output) dets[i].embeddings = (float*)xcalloc(l.embedding_size, sizeof(float)); - else dets[i].embeddings = NULL; - dets[i].embedding_size = l.embedding_size; - } - return dets; -} - -void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter) -{ - box* boxes = (box*)xcalloc(l.w * l.h * l.n, sizeof(box)); - float** probs = (float**)xcalloc(l.w * l.h * l.n, sizeof(float*)); - int i, j; - for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float*)xcalloc(l.classes, sizeof(float)); - get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); - for (j = 0; j < l.w*l.h*l.n; ++j) { - dets[j].classes = l.classes; - dets[j].bbox = boxes[j]; - dets[j].objectness = 1; - for (i = 0; i < l.classes; ++i) { - dets[j].prob[i] = probs[j][i]; - } - } - - free(boxes); - free_ptrs((void **)probs, l.w*l.h*l.n); - - //correct_region_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative); - correct_yolo_boxes(dets, l.w*l.h*l.n, w, h, net_w, net_h, relative, letter); -} - -void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets, int letter) -{ - int prev_classes = -1; - int j; - for (j = 0; j < net->n; ++j) { - layer l = net->layers[j]; - if (l.type == YOLO) { - int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); - dets += count; - if (prev_classes < 0) prev_classes = l.classes; - else if (prev_classes != l.classes) { - printf(" Error: Different [yolo] layers have different number of classes = %d and %d - check your cfg-file! \n", - prev_classes, l.classes); - } - } - if (l.type == GAUSSIAN_YOLO) { - int count = get_gaussian_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); - dets += count; - } - if (l.type == REGION) { - custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); - //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); - dets += l.w*l.h*l.n; - } - if (l.type == DETECTION) { - get_detection_detections(l, w, h, thresh, dets); - dets += l.w*l.h*l.n; - } - } -} - -void fill_network_boxes_batch(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets, int letter, int batch) -{ - int prev_classes = -1; - int j; - for (j = 0; j < net->n; ++j) { - layer l = net->layers[j]; - if (l.type == YOLO) { - int count = get_yolo_detections_batch(l, w, h, net->w, net->h, thresh, map, relative, dets, letter, batch); - dets += count; - if (prev_classes < 0) prev_classes = l.classes; - else if (prev_classes != l.classes) { - printf(" Error: Different [yolo] layers have different number of classes = %d and %d - check your cfg-file! \n", - prev_classes, l.classes); - } - } - if (l.type == REGION) { - custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); - //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); - dets += l.w*l.h*l.n; - } - if (l.type == DETECTION) { - get_detection_detections(l, w, h, thresh, dets); - dets += l.w*l.h*l.n; - } - } -} - -detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter) -{ - detection *dets = make_network_boxes(net, thresh, num); - fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter); - return dets; -} - -void free_detections(detection *dets, int n) -{ - int i; - for (i = 0; i < n; ++i) { - free(dets[i].prob); - if (dets[i].uc) free(dets[i].uc); - if (dets[i].mask) free(dets[i].mask); - if (dets[i].embeddings) free(dets[i].embeddings); - } - free(dets); -} - -void free_batch_detections(det_num_pair *det_num_pairs, int n) -{ - int i; - for(i=0; i thresh && show) - { - if (class_id != -1) strcat(send_buf, ", \n"); - class_id = j; - char *buf = (char *)calloc(2048, sizeof(char)); - if (!buf) return 0; - //sprintf(buf, "{\"image_id\":%d, \"category_id\":%d, \"bbox\":[%f, %f, %f, %f], \"score\":%f}", - // image_id, j, dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h, dets[i].prob[j]); - - sprintf(buf, " {\"class_id\":%d, \"name\":\"%s\", \"relative_coordinates\":{\"center_x\":%f, \"center_y\":%f, \"width\":%f, \"height\":%f}, \"confidence\":%f}", - j, names[j], dets[i].bbox.x, dets[i].bbox.y, dets[i].bbox.w, dets[i].bbox.h, dets[i].prob[j]); - - int send_buf_len = strlen(send_buf); - int buf_len = strlen(buf); - int total_len = send_buf_len + buf_len + 100; - send_buf = (char *)realloc(send_buf, total_len * sizeof(char)); - if (!send_buf) { - if (buf) free(buf); - return 0;// exit(-1); - } - strcat(send_buf, buf); - free(buf); - } - } - } - strcat(send_buf, "\n ] \n}"); - return send_buf; -} - - -float *network_predict_image(network *net, image im) -{ - //image imr = letterbox_image(im, net->w, net->h); - float *p; - if(net->batch != 1) set_batch_network(net, 1); - if (im.w == net->w && im.h == net->h) { - // Input image is the same size as our net, predict on that image - p = network_predict(*net, im.data); - } - else { - // Need to resize image to the desired size for the net - image imr = resize_image(im, net->w, net->h); - p = network_predict(*net, imr.data); - free_image(imr); - } - return p; -} - -det_num_pair* network_predict_batch(network *net, image im, int batch_size, int w, int h, float thresh, float hier, int *map, int relative, int letter) -{ - network_predict(*net, im.data); - det_num_pair *pdets = (struct det_num_pair *)calloc(batch_size, sizeof(det_num_pair)); - int num; - int batch; - for(batch=0; batch < batch_size; batch++){ - detection *dets = make_network_boxes_batch(net, thresh, &num, batch); - fill_network_boxes_batch(net, w, h, thresh, hier, map, relative, dets, letter, batch); - pdets[batch].num = num; - pdets[batch].dets = dets; - } - return pdets; -} - -float *network_predict_image_letterbox(network *net, image im) -{ - //image imr = letterbox_image(im, net->w, net->h); - float *p; - if (net->batch != 1) set_batch_network(net, 1); - if (im.w == net->w && im.h == net->h) { - // Input image is the same size as our net, predict on that image - p = network_predict(*net, im.data); - } - else { - // Need to resize image to the desired size for the net - image imr = letterbox_image(im, net->w, net->h); - p = network_predict(*net, imr.data); - free_image(imr); - } - return p; -} - -int network_width(network *net) { return net->w; } -int network_height(network *net) { return net->h; } - -matrix network_predict_data_multi(network net, data test, int n) -{ - int i,j,b,m; - int k = get_network_output_size(net); - matrix pred = make_matrix(test.X.rows, k); - float* X = (float*)xcalloc(net.batch * test.X.rows, sizeof(float)); - for(i = 0; i < test.X.rows; i += net.batch){ - for(b = 0; b < net.batch; ++b){ - if(i+b == test.X.rows) break; - memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); - } - for(m = 0; m < n; ++m){ - float *out = network_predict(net, X); - for(b = 0; b < net.batch; ++b){ - if(i+b == test.X.rows) break; - for(j = 0; j < k; ++j){ - pred.vals[i+b][j] += out[j+b*k]/n; - } - } - } - } - free(X); - return pred; -} - -matrix network_predict_data(network net, data test) -{ - int i,j,b; - int k = get_network_output_size(net); - matrix pred = make_matrix(test.X.rows, k); - float* X = (float*)xcalloc(net.batch * test.X.cols, sizeof(float)); - for(i = 0; i < test.X.rows; i += net.batch){ - for(b = 0; b < net.batch; ++b){ - if(i+b == test.X.rows) break; - memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float)); - } - float *out = network_predict(net, X); - for(b = 0; b < net.batch; ++b){ - if(i+b == test.X.rows) break; - for(j = 0; j < k; ++j){ - pred.vals[i+b][j] = out[j+b*k]; - } - } - } - free(X); - return pred; -} - -void print_network(network net) -{ - int i,j; - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - float *output = l.output; - int n = l.outputs; - float mean = mean_array(output, n); - float vari = variance_array(output, n); - fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); - if(n > 100) n = 100; - for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); - if(n == 100)fprintf(stderr,".....\n"); - fprintf(stderr, "\n"); - } -} - -void compare_networks(network n1, network n2, data test) -{ - matrix g1 = network_predict_data(n1, test); - matrix g2 = network_predict_data(n2, test); - int i; - int a,b,c,d; - a = b = c = d = 0; - for(i = 0; i < g1.rows; ++i){ - int truth = max_index(test.y.vals[i], test.y.cols); - int p1 = max_index(g1.vals[i], g1.cols); - int p2 = max_index(g2.vals[i], g2.cols); - if(p1 == truth){ - if(p2 == truth) ++d; - else ++c; - }else{ - if(p2 == truth) ++b; - else ++a; - } - } - printf("%5d %5d\n%5d %5d\n", a, b, c, d); - float num = pow((abs(b - c) - 1.), 2.); - float den = b + c; - printf("%f\n", num/den); -} - -float network_accuracy(network net, data d) -{ - matrix guess = network_predict_data(net, d); - float acc = matrix_topk_accuracy(d.y, guess,1); - free_matrix(guess); - return acc; -} - -float *network_accuracies(network net, data d, int n) -{ - static float acc[2]; - matrix guess = network_predict_data(net, d); - acc[0] = matrix_topk_accuracy(d.y, guess, 1); - acc[1] = matrix_topk_accuracy(d.y, guess, n); - free_matrix(guess); - return acc; -} - -float network_accuracy_multi(network net, data d, int n) -{ - matrix guess = network_predict_data_multi(net, d, n); - float acc = matrix_topk_accuracy(d.y, guess,1); - free_matrix(guess); - return acc; -} - -void free_network_ptr(network* net) -{ - free_network(*net); -} - -void free_network(network net) -{ - int i; - for (i = 0; i < net.n; ++i) { - free_layer(net.layers[i]); - } - free(net.layers); - - free(net.seq_scales); - free(net.scales); - free(net.steps); - free(net.seen); - free(net.badlabels_reject_threshold); - free(net.delta_rolling_max); - free(net.delta_rolling_avg); - free(net.delta_rolling_std); - free(net.cur_iteration); - free(net.total_bbox); - free(net.rewritten_bbox); - -#ifdef GPU - if (gpu_index >= 0) cuda_free(net.workspace); - else free(net.workspace); - free_pinned_memory(); - if (net.input_state_gpu) cuda_free(net.input_state_gpu); - if (net.input_pinned_cpu) { // CPU - if (net.input_pinned_cpu_flag) cudaFreeHost(net.input_pinned_cpu); - else free(net.input_pinned_cpu); - } - if (*net.input_gpu) cuda_free(*net.input_gpu); - if (*net.truth_gpu) cuda_free(*net.truth_gpu); - if (net.input_gpu) free(net.input_gpu); - if (net.truth_gpu) free(net.truth_gpu); - - if (*net.input16_gpu) cuda_free(*net.input16_gpu); - if (*net.output16_gpu) cuda_free(*net.output16_gpu); - if (net.input16_gpu) free(net.input16_gpu); - if (net.output16_gpu) free(net.output16_gpu); - if (net.max_input16_size) free(net.max_input16_size); - if (net.max_output16_size) free(net.max_output16_size); -#else - free(net.workspace); -#endif -} - -static float relu(float src) { - if (src > 0) return src; - return 0; -} - -static float lrelu(float src) { - const float eps = 0.001; - if (src > eps) return src; - return eps; -} - -void fuse_conv_batchnorm(network net) -{ - int j; - for (j = 0; j < net.n; ++j) { - layer *l = &net.layers[j]; - - if (l->type == CONVOLUTIONAL) { - //printf(" Merges Convolutional-%d and batch_norm \n", j); - - if (l->share_layer != NULL) { - l->batch_normalize = 0; - } - - if (l->batch_normalize) { - int f; - for (f = 0; f < l->n; ++f) - { - l->biases[f] = l->biases[f] - (double)l->scales[f] * l->rolling_mean[f] / (sqrt((double)l->rolling_variance[f] + .00001)); - - double precomputed = l->scales[f] / (sqrt((double)l->rolling_variance[f] + .00001)); - - const size_t filter_size = l->size*l->size*l->c / l->groups; - int i; - for (i = 0; i < filter_size; ++i) { - int w_index = f*filter_size + i; - - l->weights[w_index] *= precomputed; - } - } - - free_convolutional_batchnorm(l); - l->batch_normalize = 0; -#ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(*l); - } -#endif - } - } - else if (l->type == SHORTCUT && l->weights && l->weights_normalization) - { - if (l->nweights > 0) { - //cuda_pull_array(l.weights_gpu, l.weights, l.nweights); - int i; - for (i = 0; i < l->nweights; ++i) printf(" w = %f,", l->weights[i]); - printf(" l->nweights = %d, j = %d \n", l->nweights, j); - } - - // nweights - l.n or l.n*l.c or (l.n*l.c*l.h*l.w) - const int layer_step = l->nweights / (l->n + 1); // 1 or l.c or (l.c * l.h * l.w) - - int chan, i; - for (chan = 0; chan < layer_step; ++chan) - { - float sum = 1, max_val = -FLT_MAX; - - if (l->weights_normalization == SOFTMAX_NORMALIZATION) { - for (i = 0; i < (l->n + 1); ++i) { - int w_index = chan + i * layer_step; - float w = l->weights[w_index]; - if (max_val < w) max_val = w; - } - } - - const float eps = 0.0001; - sum = eps; - - for (i = 0; i < (l->n + 1); ++i) { - int w_index = chan + i * layer_step; - float w = l->weights[w_index]; - if (l->weights_normalization == RELU_NORMALIZATION) sum += lrelu(w); - else if (l->weights_normalization == SOFTMAX_NORMALIZATION) sum += expf(w - max_val); - } - - for (i = 0; i < (l->n + 1); ++i) { - int w_index = chan + i * layer_step; - float w = l->weights[w_index]; - if (l->weights_normalization == RELU_NORMALIZATION) w = lrelu(w) / sum; - else if (l->weights_normalization == SOFTMAX_NORMALIZATION) w = expf(w - max_val) / sum; - l->weights[w_index] = w; - } - } - - l->weights_normalization = NO_NORMALIZATION; - -#ifdef GPU - if (gpu_index >= 0) { - push_shortcut_layer(*l); - } -#endif - } - else { - //printf(" Fusion skip layer type: %d \n", l->type); - } - } -} - -void forward_blank_layer(layer l, network_state state) {} - -void calculate_binary_weights(network net) -{ - int j; - for (j = 0; j < net.n; ++j) { - layer *l = &net.layers[j]; - - if (l->type == CONVOLUTIONAL) { - //printf(" Merges Convolutional-%d and batch_norm \n", j); - - if (l->xnor) { - //printf("\n %d \n", j); - //l->lda_align = 256; // 256bit for AVX2 // set in make_convolutional_layer() - //if (l->size*l->size*l->c >= 2048) l->lda_align = 512; - - binary_align_weights(l); - - if (net.layers[j].use_bin_output) { - l->activation = LINEAR; - } - -#ifdef GPU - // fuse conv_xnor + shortcut -> conv_xnor - if ((j + 1) < net.n && net.layers[j].type == CONVOLUTIONAL) { - layer *sc = &net.layers[j + 1]; - if (sc->type == SHORTCUT && sc->w == sc->out_w && sc->h == sc->out_h && sc->c == sc->out_c) - { - l->bin_conv_shortcut_in_gpu = net.layers[net.layers[j + 1].index].output_gpu; - l->bin_conv_shortcut_out_gpu = net.layers[j + 1].output_gpu; - - net.layers[j + 1].type = BLANK; - net.layers[j + 1].forward_gpu = forward_blank_layer; - } - } -#endif // GPU - } - } - } - //printf("\n calculate_binary_weights Done! \n"); - -} - -void copy_cudnn_descriptors(layer src, layer *dst) -{ -#ifdef CUDNN - dst->normTensorDesc = src.normTensorDesc; - dst->normDstTensorDesc = src.normDstTensorDesc; - dst->normDstTensorDescF16 = src.normDstTensorDescF16; - - dst->srcTensorDesc = src.srcTensorDesc; - dst->dstTensorDesc = src.dstTensorDesc; - - dst->srcTensorDesc16 = src.srcTensorDesc16; - dst->dstTensorDesc16 = src.dstTensorDesc16; -#endif // CUDNN -} - -void copy_weights_net(network net_train, network *net_map) -{ - int k; - for (k = 0; k < net_train.n; ++k) { - layer *l = &(net_train.layers[k]); - layer tmp_layer; - copy_cudnn_descriptors(net_map->layers[k], &tmp_layer); - net_map->layers[k] = net_train.layers[k]; - copy_cudnn_descriptors(tmp_layer, &net_map->layers[k]); - - if (l->type == CRNN) { - layer tmp_input_layer, tmp_self_layer, tmp_output_layer; - copy_cudnn_descriptors(*net_map->layers[k].input_layer, &tmp_input_layer); - copy_cudnn_descriptors(*net_map->layers[k].self_layer, &tmp_self_layer); - copy_cudnn_descriptors(*net_map->layers[k].output_layer, &tmp_output_layer); - net_map->layers[k].input_layer = net_train.layers[k].input_layer; - net_map->layers[k].self_layer = net_train.layers[k].self_layer; - net_map->layers[k].output_layer = net_train.layers[k].output_layer; - //net_map->layers[k].output_gpu = net_map->layers[k].output_layer->output_gpu; // already copied out of if() - - copy_cudnn_descriptors(tmp_input_layer, net_map->layers[k].input_layer); - copy_cudnn_descriptors(tmp_self_layer, net_map->layers[k].self_layer); - copy_cudnn_descriptors(tmp_output_layer, net_map->layers[k].output_layer); - } - else if(l->input_layer) // for AntiAliasing - { - layer tmp_input_layer; - copy_cudnn_descriptors(*net_map->layers[k].input_layer, &tmp_input_layer); - net_map->layers[k].input_layer = net_train.layers[k].input_layer; - copy_cudnn_descriptors(tmp_input_layer, net_map->layers[k].input_layer); - } - net_map->layers[k].batch = 1; - net_map->layers[k].steps = 1; - } -} - - -// combine Training and Validation networks -network combine_train_valid_networks(network net_train, network net_map) -{ - network net_combined = make_network(net_train.n); - layer *old_layers = net_combined.layers; - net_combined = net_train; - net_combined.layers = old_layers; - net_combined.batch = 1; - - int k; - for (k = 0; k < net_train.n; ++k) { - layer *l = &(net_train.layers[k]); - net_combined.layers[k] = net_train.layers[k]; - net_combined.layers[k].batch = 1; - - if (l->type == CONVOLUTIONAL) { -#ifdef CUDNN - net_combined.layers[k].normTensorDesc = net_map.layers[k].normTensorDesc; - net_combined.layers[k].normDstTensorDesc = net_map.layers[k].normDstTensorDesc; - net_combined.layers[k].normDstTensorDescF16 = net_map.layers[k].normDstTensorDescF16; - - net_combined.layers[k].srcTensorDesc = net_map.layers[k].srcTensorDesc; - net_combined.layers[k].dstTensorDesc = net_map.layers[k].dstTensorDesc; - - net_combined.layers[k].srcTensorDesc16 = net_map.layers[k].srcTensorDesc16; - net_combined.layers[k].dstTensorDesc16 = net_map.layers[k].dstTensorDesc16; -#endif // CUDNN - } - } - return net_combined; -} - -void free_network_recurrent_state(network net) -{ - int k; - for (k = 0; k < net.n; ++k) { - if (net.layers[k].type == CONV_LSTM) free_state_conv_lstm(net.layers[k]); - if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]); - } -} - -void randomize_network_recurrent_state(network net) -{ - int k; - for (k = 0; k < net.n; ++k) { - if (net.layers[k].type == CONV_LSTM) randomize_state_conv_lstm(net.layers[k]); - if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]); - } -} - - -void remember_network_recurrent_state(network net) -{ - int k; - for (k = 0; k < net.n; ++k) { - if (net.layers[k].type == CONV_LSTM) remember_state_conv_lstm(net.layers[k]); - //if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]); - } -} - -void restore_network_recurrent_state(network net) -{ - int k; - for (k = 0; k < net.n; ++k) { - if (net.layers[k].type == CONV_LSTM) restore_state_conv_lstm(net.layers[k]); - if (net.layers[k].type == CRNN) free_state_crnn(net.layers[k]); - } -} - - -int is_ema_initialized(network net) -{ - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL) { - int k; - if (l.weights_ema) { - for (k = 0; k < l.nweights; ++k) { - if (l.weights_ema[k] != 0) return 1; - } - } - } - } - - return 0; -} - -void ema_update(network net, float ema_alpha) -{ - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL) { -#ifdef GPU - if (gpu_index >= 0) { - pull_convolutional_layer(l); - } -#endif - int k; - if (l.weights_ema) { - for (k = 0; k < l.nweights; ++k) { - l.weights_ema[k] = ema_alpha * l.weights_ema[k] + (1 - ema_alpha) * l.weights[k]; - } - } - - for (k = 0; k < l.n; ++k) { - if (l.biases_ema) l.biases_ema[k] = ema_alpha * l.biases_ema[k] + (1 - ema_alpha) * l.biases[k]; - if (l.scales_ema) l.scales_ema[k] = ema_alpha * l.scales_ema[k] + (1 - ema_alpha) * l.scales[k]; - } - } - } -} - - -void ema_apply(network net) -{ - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL) { - int k; - if (l.weights_ema) { - for (k = 0; k < l.nweights; ++k) { - l.weights[k] = l.weights_ema[k]; - } - } - - for (k = 0; k < l.n; ++k) { - if (l.biases_ema) l.biases[k] = l.biases_ema[k]; - if (l.scales_ema) l.scales[k] = l.scales_ema[k]; - } - -#ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(l); - } -#endif - } - } -} - - - -void reject_similar_weights(network net, float sim_threshold) -{ - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (i == 0) continue; - if (net.n > i + 1) if (net.layers[i + 1].type == YOLO) continue; - if (net.n > i + 2) if (net.layers[i + 2].type == YOLO) continue; - if (net.n > i + 3) if (net.layers[i + 3].type == YOLO) continue; - - if (l.type == CONVOLUTIONAL && l.activation != LINEAR) { -#ifdef GPU - if (gpu_index >= 0) { - pull_convolutional_layer(l); - } -#endif - int k, j; - float max_sim = -1000; - int max_sim_index = 0; - int max_sim_index2 = 0; - int filter_size = l.size*l.size*l.c; - for (k = 0; k < l.n; ++k) - { - for (j = k+1; j < l.n; ++j) - { - int w1 = k; - int w2 = j; - - float sim = cosine_similarity(&l.weights[filter_size*w1], &l.weights[filter_size*w2], filter_size); - if (sim > max_sim) { - max_sim = sim; - max_sim_index = w1; - max_sim_index2 = w2; - } - } - } - - printf(" reject_similar_weights: i = %d, l.n = %d, w1 = %d, w2 = %d, sim = %f, thresh = %f \n", - i, l.n, max_sim_index, max_sim_index2, max_sim, sim_threshold); - - if (max_sim > sim_threshold) { - printf(" rejecting... \n"); - float scale = sqrt(2. / (l.size*l.size*l.c / l.groups)); - - for (k = 0; k < filter_size; ++k) { - l.weights[max_sim_index*filter_size + k] = scale*rand_uniform(-1, 1); - } - if (l.biases) l.biases[max_sim_index] = 0.0f; - if (l.scales) l.scales[max_sim_index] = 1.0f; - } - -#ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(l); - } -#endif - } - } -} diff --git a/src/Detector/darknet/src/network.h b/src/Detector/darknet/src/network.h deleted file mode 100644 index 7661c8ef8..000000000 --- a/src/Detector/darknet/src/network.h +++ /dev/null @@ -1,184 +0,0 @@ -// Oh boy, why am I about to do this.... -#ifndef NETWORK_H -#define NETWORK_H -#include "darknet.h" - -#include -#include "layer.h" - - -#include "image.h" -#include "data.h" -#include "tree.h" - -#ifdef __cplusplus -extern "C" { -#endif -/* -typedef enum { - CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM -} learning_rate_policy; - -typedef struct network{ - float *workspace; - int n; - int batch; - uint64_t *seen; - float epoch; - int subdivisions; - float momentum; - float decay; - layer *layers; - int outputs; - float *output; - learning_rate_policy policy; - - float learning_rate; - float gamma; - float scale; - float power; - int time_steps; - int step; - int max_batches; - float *scales; - int *steps; - int num_steps; - int burn_in; - int cudnn_half; - - int adam; - float B1; - float B2; - float eps; - - int inputs; - int h, w, c; - int max_crop; - int min_crop; - int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1) - float angle; - float aspect; - float exposure; - float saturation; - float hue; - int small_object; - - int gpu_index; - tree *hierarchy; - - #ifdef GPU - float *input_state_gpu; - - float **input_gpu; - float **truth_gpu; - float **input16_gpu; - float **output16_gpu; - size_t *max_input16_size; - size_t *max_output16_size; - int wait_stream; - #endif -} network; - - -typedef struct network_state { - float *truth; - float *input; - float *delta; - float *workspace; - int train; - int index; - network net; -} network_state; -*/ - -#ifdef GPU -float train_networks(network *nets, int n, data d, int interval); -void sync_nets(network *nets, int n, int interval); -float train_network_datum_gpu(network net, float *x, float *y); -float *network_predict_gpu(network net, float *input); -float * get_network_output_gpu_layer(network net, int i); -float * get_network_delta_gpu_layer(network net, int i); -float *get_network_output_gpu(network net); -void forward_network_gpu(network net, network_state state); -void backward_network_gpu(network net, network_state state); -void update_network_gpu(network net); -void forward_backward_network_gpu(network net, float *x, float *y); -#endif - -float get_current_seq_subdivisions(network net); -int get_sequence_value(network net); -float get_current_rate(network net); -int get_current_batch(network net); -int64_t get_current_iteration(network net); -//void free_network(network net); // darknet.h -void compare_networks(network n1, network n2, data d); -char *get_layer_string(LAYER_TYPE a); - -network make_network(int n); -void forward_network(network net, network_state state); -void backward_network(network net, network_state state); -void update_network(network net); - -float train_network(network net, data d); -float train_network_waitkey(network net, data d, int wait_key); -float train_network_batch(network net, data d, int n); -float train_network_sgd(network net, data d, int n); -float train_network_datum(network net, float *x, float *y); - -matrix network_predict_data(network net, data test); -//LIB_API float *network_predict(network net, float *input); -//LIB_API float *network_predict_ptr(network *net, float *input); -float network_accuracy(network net, data d); -float *network_accuracies(network net, data d, int n); -float network_accuracy_multi(network net, data d, int n); -void top_predictions(network net, int n, int *index); -float *get_network_output(network net); -float *get_network_output_layer(network net, int i); -float *get_network_delta_layer(network net, int i); -float *get_network_delta(network net); -int get_network_output_size_layer(network net, int i); -int get_network_output_size(network net); -image get_network_image(network net); -image get_network_image_layer(network net, int i); -int get_predicted_class_network(network net); -void print_network(network net); -void visualize_network(network net); -int resize_network(network *net, int w, int h); -void set_batch_network(network *net, int b); -int get_network_input_size(network net); -float get_network_cost(network net); -//LIB_API layer* get_network_layer(network* net, int i); -//LIB_API detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); -//LIB_API detection *make_network_boxes(network *net, float thresh, int *num); -//LIB_API void free_detections(detection *dets, int n); -//LIB_API void reset_rnn(network *net); -//LIB_API network *load_network_custom(char *cfg, char *weights, int clear, int batch); -//LIB_API network *load_network(char *cfg, char *weights, int clear); -//LIB_API float *network_predict_image(network *net, image im); -//LIB_API float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float thresh_calc_avg_iou, const float iou_thresh, int map_points, int letter_box, network *existing_net); -//LIB_API void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port); -//LIB_API int network_width(network *net); -//LIB_API int network_height(network *net); -//LIB_API void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm); - -int get_network_nuisance(network net); -int get_network_background(network net); -//LIB_API void fuse_conv_batchnorm(network net); -//LIB_API void calculate_binary_weights(network net); -network combine_train_valid_networks(network net_train, network net_map); -void copy_weights_net(network net_train, network *net_map); -void free_network_recurrent_state(network net); -void randomize_network_recurrent_state(network net); -void remember_network_recurrent_state(network net); -void restore_network_recurrent_state(network net); -int is_ema_initialized(network net); -void ema_update(network net, float ema_alpha); -void ema_apply(network net); -void reject_similar_weights(network net, float sim_threshold); - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/network_kernels.cu b/src/Detector/darknet/src/network_kernels.cu deleted file mode 100644 index e6e24d2c2..000000000 --- a/src/Detector/darknet/src/network_kernels.cu +++ /dev/null @@ -1,697 +0,0 @@ -#include "dark_cuda.h" - -#include -#include -#include - -#include "network.h" -#include "image.h" -#include "data.h" -#include "utils.h" -#include "parser.h" - -#include "crop_layer.h" -#include "connected_layer.h" -#include "rnn_layer.h" -#include "gru_layer.h" -#include "crnn_layer.h" -#include "detection_layer.h" -#include "region_layer.h" -#include "convolutional_layer.h" -#include "activation_layer.h" -#include "maxpool_layer.h" -#include "reorg_layer.h" -#include "avgpool_layer.h" -#include "normalization_layer.h" -#include "batchnorm_layer.h" -#include "cost_layer.h" -#include "local_layer.h" -#include "softmax_layer.h" -#include "dropout_layer.h" -#include "route_layer.h" -#include "shortcut_layer.h" -#include "blas.h" - -//#ifdef OPENCV -//#include -//#endif - -#include "http_stream.h" - -float * get_network_output_gpu_layer(network net, int i); -float * get_network_delta_gpu_layer(network net, int i); -float * get_network_output_gpu(network net); - -typedef struct time_benchmark_layers { - float time; - int layer_id, layer_type; -} time_benchmark_layers; - -int time_comparator(const void *pa, const void *pb) -{ - time_benchmark_layers a = *(time_benchmark_layers *)pa; - time_benchmark_layers b = *(time_benchmark_layers *)pb; - float diff = a.time - b.time; - if (diff < 0) return 1; - else if (diff > 0) return -1; - return 0; -} - -void forward_network_gpu(network net, network_state state) -{ - static time_benchmark_layers *avg_time_per_layer = NULL; - static time_benchmark_layers *sorted_avg_time_per_layer = NULL; - double start_time, end_time; - if (net.benchmark_layers) { - if (!avg_time_per_layer) { - avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers)); - sorted_avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers)); - } - cudaDeviceSynchronize(); - } - - //printf("\n"); - state.workspace = net.workspace; - int i; - for(i = 0; i < net.n; ++i){ - state.index = i; - layer l = net.layers[i]; - if(l.delta_gpu && state.train){ - fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1); - } - - if (net.benchmark_layers) { - start_time = get_time_point(); - } - - l.forward_gpu(l, state); - - if (net.benchmark_layers) { - CHECK_CUDA(cudaDeviceSynchronize()); - end_time = get_time_point(); - const double took_time = (end_time - start_time) / 1000; - const double alpha = 0.9; - if (avg_time_per_layer[i].time == 0) { - avg_time_per_layer[i].layer_id = i; - avg_time_per_layer[i].layer_type = l.type; - avg_time_per_layer[i].time = took_time; - } - else avg_time_per_layer[i].time = avg_time_per_layer[i].time * alpha + took_time * (1 - alpha); - - sorted_avg_time_per_layer[i] = avg_time_per_layer[i]; - printf("\n fw-layer %d - type: %d - %lf ms - avg_time %lf ms \n", i, l.type, took_time, avg_time_per_layer[i].time); - } - - if(net.wait_stream) - cudaStreamSynchronize(get_cuda_stream()); - state.input = l.output_gpu; - //cudaDeviceSynchronize(); - - /* - cuda_pull_array(l.output_gpu, l.output, l.outputs); - cudaStreamSynchronize(get_cuda_stream()); - float avg_val = 0; - int k; - for (k = 0; k < l.outputs; ++k) avg_val += l.output[k]; - printf(" i: %d - avg_val = %f \n", i, avg_val / l.outputs); - */ - -/* - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - if (l.out_w >= 0 && l.out_h >= 1 && l.c >= 3) { - int j; - for (j = 0; j < l.out_c; ++j) { - image img = make_image(l.out_w, l.out_h, 3); - memcpy(img.data, l.output + l.out_w*l.out_h*j, l.out_w*l.out_h * 1 * sizeof(float)); - memcpy(img.data + l.out_w*l.out_h * 1, l.output + l.out_w*l.out_h*j, l.out_w*l.out_h * 1 * sizeof(float)); - memcpy(img.data + l.out_w*l.out_h * 2, l.output + l.out_w*l.out_h*j, l.out_w*l.out_h * 1 * sizeof(float)); - char buff[256]; - sprintf(buff, "layer-%d slice-%d", i, j); - show_image(img, buff); - save_image(img, buff); - } - cvWaitKey(0); // wait press-key in console - cvDestroyAllWindows(); - } -*/ - } - - if (net.benchmark_layers) { - printf("\n\nSorted by time (forward):\n"); - qsort(sorted_avg_time_per_layer, net.n, sizeof(time_benchmark_layers), time_comparator); - for (i = 0; i < net.n; ++i) { - //printf("layer %d - type: %d - avg_time %lf ms \n", avg_time_per_layer[i].layer_id, avg_time_per_layer[i].layer_type, avg_time_per_layer[i].time); - printf("%d - fw-sort-layer %d - type: %d - avg_time %lf ms \n", i, sorted_avg_time_per_layer[i].layer_id, sorted_avg_time_per_layer[i].layer_type, sorted_avg_time_per_layer[i].time); - } - } - - //cudaStreamSynchronize(get_cuda_stream()); // sync CUDA-functions - //cudaDeviceSynchronize(); -} - -void backward_network_gpu(network net, network_state state) -{ - static time_benchmark_layers *avg_time_per_layer = NULL; - static time_benchmark_layers *sorted_avg_time_per_layer = NULL; - double start_time, end_time; - if (net.benchmark_layers) { - if (!avg_time_per_layer) { - avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers)); - sorted_avg_time_per_layer = (time_benchmark_layers *)calloc(net.n, sizeof(time_benchmark_layers)); - } - cudaDeviceSynchronize(); - } - - state.workspace = net.workspace; - int i; - float * original_input = state.input; - float * original_delta = state.delta; - for(i = net.n-1; i >= 0; --i){ - state.index = i; - layer l = net.layers[i]; - if (l.stopbackward == 1) break; - if (l.stopbackward > get_current_iteration(net)) break; - if(i == 0){ - state.input = original_input; - state.delta = original_delta; - }else{ - layer prev = net.layers[i-1]; - state.input = prev.output_gpu; - state.delta = prev.delta_gpu; - if (net.optimized_memory && !prev.keep_delta_gpu) { - state.delta = net.state_delta_gpu; - } - } - if (l.onlyforward) continue; - - if (net.benchmark_layers) { - start_time = get_time_point(); - } - - l.backward_gpu(l, state); - - if (net.benchmark_layers) { - CHECK_CUDA(cudaDeviceSynchronize()); - end_time = get_time_point(); - const double took_time = (end_time - start_time) / 1000; - const double alpha = 0.9; - if (avg_time_per_layer[i].time == 0) { - avg_time_per_layer[i].layer_id = i; - avg_time_per_layer[i].layer_type = l.type; - avg_time_per_layer[i].time = took_time; - } - else avg_time_per_layer[i].time = avg_time_per_layer[i].time * alpha + took_time * (1 - alpha); - - sorted_avg_time_per_layer[i] = avg_time_per_layer[i]; - printf("\n bw-layer %d - type: %d - %lf ms - avg_time %lf ms \n", i, l.type, took_time, avg_time_per_layer[i].time); - } - - if (i != 0) { - layer prev = net.layers[i - 1]; - if (net.optimized_memory && state.delta && !prev.keep_delta_gpu) { - if (prev.delta_gpu != state.delta) simple_copy_ongpu(prev.outputs*prev.batch, state.delta, prev.delta_gpu); - fill_ongpu(prev.outputs*prev.batch, 0, net.state_delta_gpu, 1); - } - } - - /* - if(i != 0) - { - layer l = net.layers[i - 1]; - int state_delta_nan_inf = is_nan_or_inf(state.delta, l.outputs * l.batch); - int state_input_nan_inf = is_nan_or_inf(state.input, l.outputs * l.batch); - printf("\n i - %d is_nan_or_inf(s.delta) = %d \n", i, state_delta_nan_inf); - printf(" i - %d is_nan_or_inf(s.input) = %d \n", i, state_input_nan_inf); - if (state_delta_nan_inf || state_input_nan_inf) { printf(" found "); getchar(); } - } - */ - } - - if (net.adversarial && net.attention) - { - int img_size = net.w * net.h * net.c; - float *original_input_cpu = (float *)xcalloc(img_size, sizeof(float)); - float *original_delta_cpu = (float *)xcalloc(img_size, sizeof(float)); - cuda_pull_array(original_input, original_input_cpu, img_size); - cuda_pull_array(original_delta, original_delta_cpu, img_size); - - image attention_img = make_attention_image(img_size, original_delta_cpu, original_input_cpu, net.w, net.h, net.c); - show_image(attention_img, "attention_img"); - resize_window_cv("attention_img", 500, 500); - - free_image(attention_img); - - free(original_input_cpu); - free(original_delta_cpu); - } - if (net.adversarial) { - int x_size = get_network_input_size(net)*net.batch; - printf(" x_size = %d, original_delta = %p, original_input = %p, net.learning_rate = %f \n", - x_size, original_delta, original_input, net.learning_rate); - axpy_ongpu(x_size, net.learning_rate, original_delta, 1, original_input, 1); - constrain_min_max_ongpu(x_size, 0, 1, original_input, 1); - } - - if (net.benchmark_layers) { - printf("\n\nSorted by time (backward):\n"); - qsort(sorted_avg_time_per_layer, net.n, sizeof(time_benchmark_layers), time_comparator); - for (i = 0; i < net.n; ++i) { - //printf("layer %d - type: %d - avg_time %lf ms \n", avg_time_per_layer[i].layer_id, avg_time_per_layer[i].layer_type, avg_time_per_layer[i].time); - printf("%d - bw-sort-layer %d - type: %d - avg_time %lf ms \n", i, sorted_avg_time_per_layer[i].layer_id, sorted_avg_time_per_layer[i].layer_type, sorted_avg_time_per_layer[i].time); - } - } -} - -void update_network_gpu(network net) -{ - cuda_set_device(net.gpu_index); - const int iteration_num = (*net.seen) / (net.batch * net.subdivisions); - int i; - int update_batch = net.batch*net.subdivisions * get_sequence_value(net); - float rate = get_current_rate(net); - for(i = 0; i < net.n; ++i){ - layer l = net.layers[i]; - l.t = get_current_batch(net); - if (iteration_num > (net.max_batches * 1 / 2)) l.deform = 0; - if (l.burnin_update && (l.burnin_update*net.burn_in > iteration_num)) continue; - if (l.train_only_bn) continue; - - if(l.update_gpu && l.dont_update < iteration_num){ - l.update_gpu(l, update_batch, rate, net.momentum, net.decay, net.loss_scale); - } - } -} - -void forward_backward_network_gpu(network net, float *x, float *y) -{ - network_state state; - state.index = 0; - state.net = net; - int x_size = get_network_input_size(net)*net.batch; - int y_size = get_network_output_size(net)*net.batch; - if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch; - if(!*net.input_gpu){ - *net.input_gpu = cuda_make_array(x, x_size); - *net.truth_gpu = cuda_make_array(y, y_size); - }else{ - cuda_push_array(*net.input_gpu, x, x_size); - cuda_push_array(*net.truth_gpu, y, y_size); - } - state.input = *net.input_gpu; - state.delta = 0; - if (net.adversarial) { - state.delta = cuda_make_array(NULL, x_size); - } - state.truth = *net.truth_gpu; - state.train = 1; -#if defined(CUDNN_HALF) && defined(CUDNN) - int i; - for (i = 0; i < net.n; ++i) { - layer l = net.layers[i]; - if (net.cudnn_half){ - if (l.type == CONVOLUTIONAL && l.weights_gpu && l.weights_gpu16) { - assert((l.nweights) > 0); - cuda_convert_f32_to_f16(l.weights_gpu, l.nweights, l.weights_gpu16); - } - else if (l.type == CRNN && l.input_layer->weights_gpu && l.input_layer->weights_gpu16) { - assert((l.input_layer->c*l.input_layer->n*l.input_layer->size*l.input_layer->size) > 0); - cuda_convert_f32_to_f16(l.input_layer->weights_gpu, l.input_layer->nweights, l.input_layer->weights_gpu16); - cuda_convert_f32_to_f16(l.self_layer->weights_gpu, l.self_layer->nweights, l.self_layer->weights_gpu16); - cuda_convert_f32_to_f16(l.output_layer->weights_gpu, l.output_layer->nweights, l.output_layer->weights_gpu16); - } - else if (l.type == CONV_LSTM && l.wf->weights_gpu && l.wf->weights_gpu16) { - assert((l.wf->c * l.wf->n * l.wf->size * l.wf->size) > 0); - if (l.peephole) { - cuda_convert_f32_to_f16(l.vf->weights_gpu, l.vf->nweights, l.vf->weights_gpu16); - cuda_convert_f32_to_f16(l.vi->weights_gpu, l.vi->nweights, l.vi->weights_gpu16); - cuda_convert_f32_to_f16(l.vo->weights_gpu, l.vo->nweights, l.vo->weights_gpu16); - } - cuda_convert_f32_to_f16(l.wf->weights_gpu, l.wf->nweights, l.wf->weights_gpu16); - if (!l.bottleneck) { - cuda_convert_f32_to_f16(l.wi->weights_gpu, l.wi->nweights, l.wi->weights_gpu16); - cuda_convert_f32_to_f16(l.wg->weights_gpu, l.wg->nweights, l.wg->weights_gpu16); - cuda_convert_f32_to_f16(l.wo->weights_gpu, l.wo->nweights, l.wo->weights_gpu16); - } - cuda_convert_f32_to_f16(l.uf->weights_gpu, l.uf->nweights, l.uf->weights_gpu16); - cuda_convert_f32_to_f16(l.ui->weights_gpu, l.ui->nweights, l.ui->weights_gpu16); - cuda_convert_f32_to_f16(l.ug->weights_gpu, l.ug->nweights, l.ug->weights_gpu16); - cuda_convert_f32_to_f16(l.uo->weights_gpu, l.uo->nweights, l.uo->weights_gpu16); - } - } - } -#endif - forward_network_gpu(net, state); - //cudaStreamSynchronize(get_cuda_stream()); - backward_network_gpu(net, state); - - if (net.adversarial) { - cuda_free(state.delta); - cuda_pull_array(*net.input_gpu, x, x_size); - } - if(*(state.net.total_bbox) > 0) - fprintf(stderr, " total_bbox = %d, rewritten_bbox = %f %% \n", *(state.net.total_bbox), 100 * (float)*(state.net.rewritten_bbox) / *(state.net.total_bbox)); -} - -float train_network_datum_gpu(network net, float *x, float *y) -{ - *net.seen += net.batch; - if (net.adversarial_lr && rand_int(0, 1) == 1 && get_current_iteration(net) > net.burn_in) { - net.adversarial = 1; - float lr_old = net.learning_rate; - float scale = (get_current_iteration(net) / ((float)net.max_batches)); - //scale = sin(scale * M_PI); - net.learning_rate = net.adversarial_lr * scale; - layer l = net.layers[net.n - 1]; - int y_size = get_network_output_size(net)*net.batch; - if (net.layers[net.n - 1].truths) y_size = net.layers[net.n - 1].truths*net.batch; - float *truth_cpu = (float *)xcalloc(y_size, sizeof(float)); - - const int img_size = net.w*net.h*net.c; - float *old_input = (float *)xcalloc(img_size*net.batch, sizeof(float)); - memcpy(old_input, x, img_size*net.batch * sizeof(float)); - - printf("\n adversarial training, adversarial_lr = %f \n", net.adversarial_lr * scale); - - forward_backward_network_gpu(net, x, truth_cpu); - - int b; - for (b = 0; b < net.batch; ++b) { - if (b % 2 == 1 && net.contrastive) { - //printf(" b = %d old img, ", b); - memcpy(x + img_size*b, old_input + img_size*b, img_size * sizeof(float)); - } - } - - image im; - im.w = net.w; - im.h = net.h; - im.c = net.c; - im.data = x; - show_image(im, "adversarial data augmentation"); - resize_window_cv("adversarial data augmentation", 500, 500); - wait_key_cv(1); - - free(old_input); - free(truth_cpu); - net.learning_rate = lr_old; - net.adversarial = 0; - } - forward_backward_network_gpu(net, x, y); - float error = get_network_cost(net); - //if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net); - const int sequence = get_sequence_value(net); - //if (((*net.seen) / net.batch) % (net.subdivisions*sequence) == 0) update_network_gpu(net); - - return error; -} - -typedef struct { - network net; - data d; - float *err; -} train_args; - -void *train_thread(void *ptr) -{ - train_args args = *(train_args*)ptr; - free(ptr); - cuda_set_device(args.net.gpu_index); - *args.err = train_network(args.net, args.d); - return 0; -} - -pthread_t train_network_in_thread(network net, data d, float *err) -{ - pthread_t thread; - train_args *ptr = (train_args *)calloc(1, sizeof(train_args)); - ptr->net = net; - ptr->d = d; - ptr->err = err; - if(pthread_create(&thread, 0, train_thread, ptr)) error("Thread creation failed"); - return thread; -} - -void pull_updates(layer l) -{ - if(l.type == CONVOLUTIONAL){ - cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.n); - cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.nweights); - if(l.scale_updates) cuda_pull_array(l.scale_updates_gpu, l.scale_updates, l.n); - } else if(l.type == CONNECTED){ - cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs); - cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); - } -} - -void push_updates(layer l) -{ - if(l.type == CONVOLUTIONAL){ - cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.n); - cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.nweights); - if(l.scale_updates) cuda_push_array(l.scale_updates_gpu, l.scale_updates, l.n); - } else if(l.type == CONNECTED){ - cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs); - cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.outputs*l.inputs); - } -} - -void update_layer(layer l, network net) -{ - int update_batch = net.batch*net.subdivisions; - float rate = get_current_rate(net); - l.t = get_current_batch(net); - if(l.update_gpu){ - l.update_gpu(l, update_batch, rate, net.momentum, net.decay, net.loss_scale); - } -} - -void merge_weights(layer l, layer base) -{ - if (l.type == CONVOLUTIONAL) { - axpy_cpu(l.n, 1, l.biases, 1, base.biases, 1); - axpy_cpu(l.nweights, 1, l.weights, 1, base.weights, 1); - if (l.scales) { - axpy_cpu(l.n, 1, l.scales, 1, base.scales, 1); - } - } else if(l.type == CONNECTED) { - axpy_cpu(l.outputs, 1, l.biases, 1, base.biases, 1); - axpy_cpu(l.outputs*l.inputs, 1, l.weights, 1, base.weights, 1); - } -} - -void scale_weights(layer l, float s) -{ - if (l.type == CONVOLUTIONAL) { - scal_cpu(l.n, s, l.biases, 1); - scal_cpu(l.nweights, s, l.weights, 1); - if (l.scales) { - scal_cpu(l.n, s, l.scales, 1); - } - } else if(l.type == CONNECTED) { - scal_cpu(l.outputs, s, l.biases, 1); - scal_cpu(l.outputs*l.inputs, s, l.weights, 1); - } -} - - -void pull_weights(layer l) -{ - if(l.type == CONVOLUTIONAL){ - cuda_pull_array(l.biases_gpu, l.biases, l.n); - cuda_pull_array(l.weights_gpu, l.weights, l.nweights); - if(l.scales) cuda_pull_array(l.scales_gpu, l.scales, l.n); - } else if(l.type == CONNECTED){ - cuda_pull_array(l.biases_gpu, l.biases, l.outputs); - cuda_pull_array(l.weights_gpu, l.weights, l.outputs*l.inputs); - } -} - -void push_weights(layer l) -{ - if(l.type == CONVOLUTIONAL){ - cuda_push_array(l.biases_gpu, l.biases, l.n); - cuda_push_array(l.weights_gpu, l.weights, l.nweights); - if(l.scales) cuda_push_array(l.scales_gpu, l.scales, l.n); - } else if(l.type == CONNECTED){ - cuda_push_array(l.biases_gpu, l.biases, l.outputs); - cuda_push_array(l.weights_gpu, l.weights, l.outputs*l.inputs); - } -} - -void distribute_weights(layer l, layer base) -{ - if(l.type == CONVOLUTIONAL){ - cuda_push_array(l.biases_gpu, base.biases, l.n); - cuda_push_array(l.weights_gpu, base.weights, l.nweights); - if(base.scales) cuda_push_array(l.scales_gpu, base.scales, l.n); - } else if(l.type == CONNECTED){ - cuda_push_array(l.biases_gpu, base.biases, l.outputs); - cuda_push_array(l.weights_gpu, base.weights, l.outputs*l.inputs); - } -} - - -void merge_updates(layer l, layer base) -{ - if (l.type == CONVOLUTIONAL) { - axpy_cpu(l.n, 1, l.bias_updates, 1, base.bias_updates, 1); - axpy_cpu(l.nweights, 1, l.weight_updates, 1, base.weight_updates, 1); - if (l.scale_updates) { - axpy_cpu(l.n, 1, l.scale_updates, 1, base.scale_updates, 1); - } - } else if(l.type == CONNECTED) { - axpy_cpu(l.outputs, 1, l.bias_updates, 1, base.bias_updates, 1); - axpy_cpu(l.outputs*l.inputs, 1, l.weight_updates, 1, base.weight_updates, 1); - } -} - -void distribute_updates(layer l, layer base) -{ - if(l.type == CONVOLUTIONAL){ - cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.n); - cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.nweights); - if(base.scale_updates) cuda_push_array(l.scale_updates_gpu, base.scale_updates, l.n); - } else if(l.type == CONNECTED){ - cuda_push_array(l.bias_updates_gpu, base.bias_updates, l.outputs); - cuda_push_array(l.weight_updates_gpu, base.weight_updates, l.outputs*l.inputs); - } -} - -void sync_layer(network *nets, int n, int j) -{ - //printf("Syncing layer %d\n", j); - int i; - network net = nets[0]; - layer base = net.layers[j]; - cuda_set_device(net.gpu_index); - pull_weights(base); - for (i = 1; i < n; ++i) { - cuda_set_device(nets[i].gpu_index); - layer l = nets[i].layers[j]; - pull_weights(l); - merge_weights(l, base); - } - scale_weights(base, 1./n); - for (i = 0; i < n; ++i) { - cuda_set_device(nets[i].gpu_index); - layer l = nets[i].layers[j]; - distribute_weights(l, base); - } - //printf("Done syncing layer %d\n", j); -} - -typedef struct{ - network *nets; - int n; - int j; -} sync_args; - -void *sync_layer_thread(void *ptr) -{ - sync_args args = *(sync_args*)ptr; - sync_layer(args.nets, args.n, args.j); - free(ptr); - return 0; -} - -pthread_t sync_layer_in_thread(network *nets, int n, int j) -{ - pthread_t thread; - sync_args *ptr = (sync_args *)calloc(1, sizeof(sync_args)); - ptr->nets = nets; - ptr->n = n; - ptr->j = j; - if(pthread_create(&thread, 0, sync_layer_thread, ptr)) error("Thread creation failed"); - return thread; -} - -void sync_nets(network *nets, int n, int interval) -{ - int j; - int layers = nets[0].n; - pthread_t *threads = (pthread_t *) calloc(layers, sizeof(pthread_t)); - - *nets[0].seen += interval * (n-1) * nets[0].batch * nets[0].subdivisions; - for (j = 0; j < n; ++j){ - *nets[j].seen = *nets[0].seen; - } - for (j = 0; j < layers; ++j) { - threads[j] = sync_layer_in_thread(nets, n, j); - } - for (j = 0; j < layers; ++j) { - pthread_join(threads[j], 0); - } - free(threads); -} - -float train_networks(network *nets, int n, data d, int interval) -{ - int i; -#ifdef _DEBUG - int batch = nets[0].batch; - int subdivisions = nets[0].subdivisions; - assert(batch * subdivisions * n == d.X.rows); -#endif - pthread_t *threads = (pthread_t *) calloc(n, sizeof(pthread_t)); - float *errors = (float *) calloc(n, sizeof(float)); - - float sum = 0; - for(i = 0; i < n; ++i){ - data p = get_data_part(d, i, n); - threads[i] = train_network_in_thread(nets[i], p, errors + i); - } - for(i = 0; i < n; ++i){ - pthread_join(threads[i], 0); - //printf("%f\n", errors[i]); - sum += errors[i]; - } - //cudaDeviceSynchronize(); - *nets[0].cur_iteration += (n - 1); - *nets[0].seen = nets[0].batch * nets[0].subdivisions * get_current_iteration(nets[0]); // remove this line, when you will save to weights-file both: seen & cur_iteration - if (get_current_iteration(nets[0]) % interval == 0) - { - printf("Syncing... "); - fflush(stdout); - sync_nets(nets, n, interval); - printf("Done!\n"); - } - //cudaDeviceSynchronize(); - free(threads); - free(errors); - return (float)sum/(n); -} - -float *get_network_output_layer_gpu(network net, int i) -{ - layer l = net.layers[i]; - if(l.type != REGION) cuda_pull_array(l.output_gpu, l.output, l.outputs*l.batch); - return l.output; -} - -float *get_network_output_gpu(network net) -{ - int i; - for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break; - return get_network_output_layer_gpu(net, i); -} - -float *network_predict_gpu(network net, float *input) -{ - if (net.gpu_index != cuda_get_device()) - cuda_set_device(net.gpu_index); - int size = get_network_input_size(net) * net.batch; - network_state state; - state.index = 0; - state.net = net; - //state.input = cuda_make_array(input, size); // memory will be allocated in the parse_network_cfg_custom() - state.input = net.input_state_gpu; - memcpy(net.input_pinned_cpu, input, size * sizeof(float)); - cuda_push_array(state.input, net.input_pinned_cpu, size); - state.truth = 0; - state.train = 0; - state.delta = 0; - forward_network_gpu(net, state); - float *out = get_network_output_gpu(net); - //cuda_free(state.input); // will be freed in the free_network() - return out; -} diff --git a/src/Detector/darknet/src/nightmare.c b/src/Detector/darknet/src/nightmare.c deleted file mode 100644 index 5c1ca04ae..000000000 --- a/src/Detector/darknet/src/nightmare.c +++ /dev/null @@ -1,303 +0,0 @@ - -#include "network.h" -#include "parser.h" -#include "blas.h" -#include "utils.h" - -// ./darknet nightmare cfg/extractor.recon.cfg ~/trained/yolo-coco.conv frame6.png -reconstruct -iters 500 -i 3 -lambda .1 -rate .01 -smooth 2 - -float abs_mean(float *x, int n) -{ - int i; - float sum = 0; - for (i = 0; i < n; ++i){ - sum += fabs(x[i]); - } - return sum/n; -} - -void calculate_loss(float *output, float *delta, int n, float thresh) -{ - int i; - float mean = mean_array(output, n); - float var = variance_array(output, n); - for(i = 0; i < n; ++i){ - if(delta[i] > mean + thresh*sqrt(var)) delta[i] = output[i]; - else delta[i] = 0; - } -} - -void optimize_picture(network *net, image orig, int max_layer, float scale, float rate, float thresh, int norm) -{ - //scale_image(orig, 2); - //translate_image(orig, -1); - net->n = max_layer + 1; - - int dx = rand()%16 - 8; - int dy = rand()%16 - 8; - int flip = rand()%2; - - image crop = crop_image(orig, dx, dy, orig.w, orig.h); - image im = resize_image(crop, (int)(orig.w * scale), (int)(orig.h * scale)); - if(flip) flip_image(im); - - resize_network(net, im.w, im.h); - layer last = net->layers[net->n-1]; - //net->layers[net->n - 1].activation = LINEAR; - - image delta = make_image(im.w, im.h, im.c); - - network_state state = {0}; - -#ifdef GPU - state.input = cuda_make_array(im.data, im.w*im.h*im.c); - state.delta = cuda_make_array(im.data, im.w*im.h*im.c); - - forward_network_gpu(*net, state); - copy_ongpu(last.outputs, last.output_gpu, 1, last.delta_gpu, 1); - - cuda_pull_array(last.delta_gpu, last.delta, last.outputs); - calculate_loss(last.delta, last.delta, last.outputs, thresh); - cuda_push_array(last.delta_gpu, last.delta, last.outputs); - - backward_network_gpu(*net, state); - - cuda_pull_array(state.delta, delta.data, im.w*im.h*im.c); - cuda_free(state.input); - cuda_free(state.delta); -#else - state.input = im.data; - state.delta = delta.data; - forward_network(*net, state); - copy_cpu(last.outputs, last.output, 1, last.delta, 1); - calculate_loss(last.output, last.delta, last.outputs, thresh); - backward_network(*net, state); -#endif - - if(flip) flip_image(delta); - //normalize_array(delta.data, delta.w*delta.h*delta.c); - image resized = resize_image(delta, orig.w, orig.h); - image out = crop_image(resized, -dx, -dy, orig.w, orig.h); - - /* - image g = grayscale_image(out); - free_image(out); - out = g; - */ - - //rate = rate / abs_mean(out.data, out.w*out.h*out.c); - - if(norm) normalize_array(out.data, out.w*out.h*out.c); - axpy_cpu(orig.w*orig.h*orig.c, rate, out.data, 1, orig.data, 1); - - /* - normalize_array(orig.data, orig.w*orig.h*orig.c); - scale_image(orig, sqrt(var)); - translate_image(orig, mean); - */ - - //translate_image(orig, 1); - //scale_image(orig, .5); - //normalize_image(orig); - - constrain_image(orig); - - free_image(crop); - free_image(im); - free_image(delta); - free_image(resized); - free_image(out); - -} - -void smooth(image recon, image update, float lambda, int num) -{ - int i, j, k; - int ii, jj; - for(k = 0; k < recon.c; ++k){ - for(j = 0; j < recon.h; ++j){ - for(i = 0; i < recon.w; ++i){ - int out_index = i + recon.w*(j + recon.h*k); - for(jj = j-num; jj <= j + num && jj < recon.h; ++jj){ - if (jj < 0) continue; - for(ii = i-num; ii <= i + num && ii < recon.w; ++ii){ - if (ii < 0) continue; - int in_index = ii + recon.w*(jj + recon.h*k); - update.data[out_index] += lambda * (recon.data[in_index] - recon.data[out_index]); - } - } - } - } - } -} - -void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters) -{ - int iter = 0; - for (iter = 0; iter < iters; ++iter) { - image delta = make_image(recon.w, recon.h, recon.c); - - network_state state = {0}; -#ifdef GPU - state.input = cuda_make_array(recon.data, recon.w*recon.h*recon.c); - state.delta = cuda_make_array(delta.data, delta.w*delta.h*delta.c); - state.truth = cuda_make_array(features, get_network_output_size(net)); - - forward_network_gpu(net, state); - backward_network_gpu(net, state); - - cuda_pull_array(state.delta, delta.data, delta.w*delta.h*delta.c); - - cuda_free(state.input); - cuda_free(state.delta); - cuda_free(state.truth); -#else - state.input = recon.data; - state.delta = delta.data; - state.truth = features; - - forward_network(net, state); - backward_network(net, state); -#endif - - axpy_cpu(recon.w*recon.h*recon.c, 1, delta.data, 1, update.data, 1); - smooth(recon, update, lambda, smooth_size); - - axpy_cpu(recon.w*recon.h*recon.c, rate, update.data, 1, recon.data, 1); - scal_cpu(recon.w*recon.h*recon.c, momentum, update.data, 1); - - //float mag = mag_array(recon.data, recon.w*recon.h*recon.c); - //scal_cpu(recon.w*recon.h*recon.c, 600/mag, recon.data, 1); - - constrain_image(recon); - free_image(delta); - } -} - - -void run_nightmare(int argc, char **argv) -{ - srand(time(0)); - if(argc < 4){ - fprintf(stderr, "usage: %s %s [cfg] [weights] [image] [layer] [options! (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[2]; - char *weights = argv[3]; - char *input = argv[4]; - int max_layer = atoi(argv[5]); - - int range = find_int_arg(argc, argv, "-range", 1); - int norm = find_int_arg(argc, argv, "-norm", 1); - int rounds = find_int_arg(argc, argv, "-rounds", 1); - int iters = find_int_arg(argc, argv, "-iters", 10); - int octaves = find_int_arg(argc, argv, "-octaves", 4); - float zoom = find_float_arg(argc, argv, "-zoom", 1.); - float rate = find_float_arg(argc, argv, "-rate", .04); - float thresh = find_float_arg(argc, argv, "-thresh", 1.); - float rotate = find_float_arg(argc, argv, "-rotate", 0); - float momentum = find_float_arg(argc, argv, "-momentum", .9); - float lambda = find_float_arg(argc, argv, "-lambda", .01); - char *prefix = find_char_arg(argc, argv, "-prefix", 0); - int reconstruct = find_arg(argc, argv, "-reconstruct"); - int smooth_size = find_int_arg(argc, argv, "-smooth", 1); - - network net = parse_network_cfg(cfg); - load_weights(&net, weights); - char *cfgbase = basecfg(cfg); - char *imbase = basecfg(input); - - set_batch_network(&net, 1); - image im = load_image_color(input, 0, 0); - if(0){ - float scale = 1; - if(im.w > 512 || im.h > 512){ - if(im.w > im.h) scale = 512.0/im.w; - else scale = 512.0/im.h; - } - image resized = resize_image(im, scale*im.w, scale*im.h); - free_image(im); - im = resized; - } - - float *features = 0; - image update; - if (reconstruct){ - resize_network(&net, im.w, im.h); - - int zz = 0; - network_predict(net, im.data); - image out_im = get_network_image(net); - image crop = crop_image(out_im, zz, zz, out_im.w-2*zz, out_im.h-2*zz); - //flip_image(crop); - image f_im = resize_image(crop, out_im.w, out_im.h); - free_image(crop); - printf("%d features\n", out_im.w*out_im.h*out_im.c); - - - im = resize_image(im, im.w, im.h); - f_im = resize_image(f_im, f_im.w, f_im.h); - features = f_im.data; - - int i; - for(i = 0; i < 14*14*512; ++i){ - features[i] += rand_uniform(-.19, .19); - } - - free_image(im); - im = make_random_image(im.w, im.h, im.c); - update = make_image(im.w, im.h, im.c); - - } - - int e; - int n; - for(e = 0; e < rounds; ++e){ - fprintf(stderr, "Iteration: "); - fflush(stderr); - for(n = 0; n < iters; ++n){ - fprintf(stderr, "%d, ", n); - fflush(stderr); - if(reconstruct){ - reconstruct_picture(net, features, im, update, rate, momentum, lambda, smooth_size, 1); - //if ((n+1)%30 == 0) rate *= .5; - show_image(im, "reconstruction"); -#ifdef OPENCV - wait_key_cv(10); -#endif - }else{ - int layer = max_layer + rand()%range - range/2; - int octave = rand()%octaves; - optimize_picture(&net, im, layer, 1/pow(1.33333333, octave), rate, thresh, norm); - } - } - fprintf(stderr, "done\n"); - if(0){ - image g = grayscale_image(im); - free_image(im); - im = g; - } - char buff[256]; - if (prefix){ - sprintf(buff, "%s/%s_%s_%d_%06d",prefix, imbase, cfgbase, max_layer, e); - }else{ - sprintf(buff, "%s_%s_%d_%06d",imbase, cfgbase, max_layer, e); - } - printf("%d %s\n", e, buff); - save_image(im, buff); - //show_image(im, buff); - //wait_key_cv(0); - - if(rotate){ - image rot = rotate_image(im, rotate); - free_image(im); - im = rot; - } - image crop = crop_image(im, im.w * (1. - zoom)/2., im.h * (1.-zoom)/2., im.w*zoom, im.h*zoom); - image resized = resize_image(crop, im.w, im.h); - free_image(im); - free_image(crop); - im = resized; - } -} diff --git a/src/Detector/darknet/src/normalization_layer.c b/src/Detector/darknet/src/normalization_layer.c deleted file mode 100644 index d6af6212b..000000000 --- a/src/Detector/darknet/src/normalization_layer.c +++ /dev/null @@ -1,151 +0,0 @@ -#include "normalization_layer.h" -#include "blas.h" -#include "utils.h" -#include - -layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) -{ - fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); - layer layer = { (LAYER_TYPE)0 }; - layer.type = NORMALIZATION; - layer.batch = batch; - layer.h = layer.out_h = h; - layer.w = layer.out_w = w; - layer.c = layer.out_c = c; - layer.kappa = kappa; - layer.size = size; - layer.alpha = alpha; - layer.beta = beta; - layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.inputs = w*h*c; - layer.outputs = layer.inputs; - - layer.forward = forward_normalization_layer; - layer.backward = backward_normalization_layer; - #ifdef GPU - layer.forward_gpu = forward_normalization_layer_gpu; - layer.backward_gpu = backward_normalization_layer_gpu; - - layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); - layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); - layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); - layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); - #endif - return layer; -} - -void resize_normalization_layer(layer *layer, int w, int h) -{ - int c = layer->c; - int batch = layer->batch; - layer->h = h; - layer->w = w; - layer->out_h = h; - layer->out_w = w; - layer->inputs = w*h*c; - layer->outputs = layer->inputs; - layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float)); - layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float)); - layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float)); - layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float)); -#ifdef GPU - cuda_free(layer->output_gpu); - cuda_free(layer->delta_gpu); - cuda_free(layer->squared_gpu); - cuda_free(layer->norms_gpu); - layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); - layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); - layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); - layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); -#endif -} - -void forward_normalization_layer(const layer layer, network_state state) -{ - int k,b; - int w = layer.w; - int h = layer.h; - int c = layer.c; - scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); - - for(b = 0; b < layer.batch; ++b){ - float *squared = layer.squared + w*h*c*b; - float *norms = layer.norms + w*h*c*b; - float *input = state.input + w*h*c*b; - pow_cpu(w*h*c, 2, input, 1, squared, 1); - - const_cpu(w*h, layer.kappa, norms, 1); - for(k = 0; k < layer.size/2; ++k){ - axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); - } - - for(k = 1; k < layer.c; ++k){ - copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); - int prev = k - ((layer.size-1)/2) - 1; - int next = k + (layer.size/2); - if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); - if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); - } - } - pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); - mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1); -} - -void backward_normalization_layer(const layer layer, network_state state) -{ - // TODO This is approximate ;-) - // Also this should add in to delta instead of overwritting. - - int w = layer.w; - int h = layer.h; - int c = layer.c; - pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1); - mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1); -} - -#ifdef GPU -void forward_normalization_layer_gpu(const layer layer, network_state state) -{ - int k,b; - int w = layer.w; - int h = layer.h; - int c = layer.c; - scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); - - for(b = 0; b < layer.batch; ++b){ - float *squared = layer.squared_gpu + w*h*c*b; - float *norms = layer.norms_gpu + w*h*c*b; - float *input = state.input + w*h*c*b; - pow_ongpu(w*h*c, 2, input, 1, squared, 1); - - const_ongpu(w*h, layer.kappa, norms, 1); - for(k = 0; k < layer.size/2; ++k){ - axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); - } - - for(k = 1; k < layer.c; ++k){ - copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); - int prev = k - ((layer.size-1)/2) - 1; - int next = k + (layer.size/2); - if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); - if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); - } - } - pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); - mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1); -} - -void backward_normalization_layer_gpu(const layer layer, network_state state) -{ - // TODO This is approximate ;-) - - int w = layer.w; - int h = layer.h; - int c = layer.c; - pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1); - mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1); -} -#endif diff --git a/src/Detector/darknet/src/normalization_layer.h b/src/Detector/darknet/src/normalization_layer.h deleted file mode 100644 index 2ac9b0f69..000000000 --- a/src/Detector/darknet/src/normalization_layer.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef NORMALIZATION_LAYER_H -#define NORMALIZATION_LAYER_H - -#include "image.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa); -void resize_normalization_layer(layer *layer, int w, int h); -void forward_normalization_layer(const layer layer, network_state state); -void backward_normalization_layer(const layer layer, network_state state); -void visualize_normalization_layer(layer layer, char *window); - -#ifdef GPU -void forward_normalization_layer_gpu(const layer layer, network_state state); -void backward_normalization_layer_gpu(const layer layer, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/option_list.c b/src/Detector/darknet/src/option_list.c deleted file mode 100644 index 306f0e376..000000000 --- a/src/Detector/darknet/src/option_list.c +++ /dev/null @@ -1,152 +0,0 @@ -#include -#include -#include -#include "option_list.h" -#include "utils.h" -#include "data.h" - -list *read_data_cfg(char *filename) -{ - FILE *file = fopen(filename, "r"); - if(file == 0) file_error(filename); - char *line; - int nu = 0; - list *options = make_list(); - while((line=fgetl(file)) != 0){ - ++nu; - strip(line); - switch(line[0]){ - case '\0': - case '#': - case ';': - free(line); - break; - default: - if(!read_option(line, options)){ - fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); - free(line); - } - break; - } - } - fclose(file); - return options; -} - -metadata get_metadata(char *file) -{ - metadata m = { 0 }; - list *options = read_data_cfg(file); - - char *name_list = option_find_str(options, "names", 0); - if (!name_list) name_list = option_find_str(options, "labels", 0); - if (!name_list) { - fprintf(stderr, "No names or labels found\n"); - } - else { - m.names = get_labels(name_list); - } - m.classes = option_find_int(options, "classes", 2); - free_list(options); - if(name_list) { - printf("Loaded - names_list: %s, classes = %d \n", name_list, m.classes); - } - return m; -} - -int read_option(char *s, list *options) -{ - size_t i; - size_t len = strlen(s); - char *val = 0; - for(i = 0; i < len; ++i){ - if(s[i] == '='){ - s[i] = '\0'; - val = s+i+1; - break; - } - } - if(i == len-1) return 0; - char *key = s; - option_insert(options, key, val); - return 1; -} - -void option_insert(list *l, char *key, char *val) -{ - kvp* p = (kvp*)xmalloc(sizeof(kvp)); - p->key = key; - p->val = val; - p->used = 0; - list_insert(l, p); -} - -void option_unused(list *l) -{ - node *n = l->front; - while(n){ - kvp *p = (kvp *)n->val; - if(!p->used){ - fprintf(stderr, "Unused field: '%s = %s'\n", p->key, p->val); - } - n = n->next; - } -} - -char *option_find(list *l, char *key) -{ - node *n = l->front; - while(n){ - kvp *p = (kvp *)n->val; - if(strcmp(p->key, key) == 0){ - p->used = 1; - return p->val; - } - n = n->next; - } - return 0; -} -char *option_find_str(list *l, char *key, char *def) -{ - char *v = option_find(l, key); - if(v) return v; - if(def) fprintf(stderr, "%s: Using default '%s'\n", key, def); - return def; -} - -char *option_find_str_quiet(list *l, char *key, char *def) -{ - char *v = option_find(l, key); - if (v) return v; - return def; -} - -int option_find_int(list *l, char *key, int def) -{ - char *v = option_find(l, key); - if(v) return atoi(v); - fprintf(stderr, "%s: Using default '%d'\n", key, def); - return def; -} - -int option_find_int_quiet(list *l, char *key, int def) -{ - char *v = option_find(l, key); - if(v) return atoi(v); - return def; -} - -float option_find_float_quiet(list *l, char *key, float def) -{ - char *v = option_find(l, key); - if(v) return atof(v); - return def; -} - -float option_find_float(list *l, char *key, float def) -{ - char *v = option_find(l, key); - if(v) return atof(v); - fprintf(stderr, "%s: Using default '%lf'\n", key, def); - return def; -} diff --git a/src/Detector/darknet/src/option_list.h b/src/Detector/darknet/src/option_list.h deleted file mode 100644 index 726b559aa..000000000 --- a/src/Detector/darknet/src/option_list.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef OPTION_LIST_H -#define OPTION_LIST_H -#include "darknet.h" -#include "list.h" - -typedef struct{ - char *key; - char *val; - int used; -} kvp; - -#ifdef __cplusplus -extern "C" { -#endif - -list *read_data_cfg(char *filename); -int read_option(char *s, list *options); -void option_insert(list *l, char *key, char *val); -char *option_find(list *l, char *key); -char *option_find_str(list *l, char *key, char *def); -char *option_find_str_quiet(list *l, char *key, char *def); -int option_find_int(list *l, char *key, int def); -int option_find_int_quiet(list *l, char *key, int def); -float option_find_float(list *l, char *key, float def); -float option_find_float_quiet(list *l, char *key, float def); -void option_unused(list *l); - -//typedef struct { -// int classes; -// char **names; -//} metadata; - -//LIB_API metadata get_metadata(char *file); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/parser.c b/src/Detector/darknet/src/parser.c deleted file mode 100644 index be566f223..000000000 --- a/src/Detector/darknet/src/parser.c +++ /dev/null @@ -1,2275 +0,0 @@ -#include -#include -#include -#include - -#include "activation_layer.h" -#include "activations.h" -#include "assert.h" -#include "avgpool_layer.h" -#include "batchnorm_layer.h" -#include "blas.h" -#include "connected_layer.h" -#include "convolutional_layer.h" -#include "cost_layer.h" -#include "crnn_layer.h" -#include "crop_layer.h" -#include "detection_layer.h" -#include "dropout_layer.h" -#include "gru_layer.h" -#include "list.h" -#include "local_layer.h" -#include "lstm_layer.h" -#include "conv_lstm_layer.h" -#include "maxpool_layer.h" -#include "normalization_layer.h" -#include "option_list.h" -#include "parser.h" -#include "region_layer.h" -#include "reorg_layer.h" -#include "reorg_old_layer.h" -#include "rnn_layer.h" -#include "route_layer.h" -#include "shortcut_layer.h" -#include "scale_channels_layer.h" -#include "sam_layer.h" -#include "softmax_layer.h" -#include "utils.h" -#include "upsample_layer.h" -#include "version.h" -#include "yolo_layer.h" -#include "gaussian_yolo_layer.h" - -typedef struct{ - char *type; - list *options; -}section; - -list *read_cfg(char *filename); - -LAYER_TYPE string_to_layer_type(char * type) -{ - - if (strcmp(type, "[shortcut]")==0) return SHORTCUT; - if (strcmp(type, "[scale_channels]") == 0) return SCALE_CHANNELS; - if (strcmp(type, "[sam]") == 0) return SAM; - if (strcmp(type, "[crop]")==0) return CROP; - if (strcmp(type, "[cost]")==0) return COST; - if (strcmp(type, "[detection]")==0) return DETECTION; - if (strcmp(type, "[region]")==0) return REGION; - if (strcmp(type, "[yolo]") == 0) return YOLO; - if (strcmp(type, "[Gaussian_yolo]") == 0) return GAUSSIAN_YOLO; - if (strcmp(type, "[local]")==0) return LOCAL; - if (strcmp(type, "[conv]")==0 - || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; - if (strcmp(type, "[activation]")==0) return ACTIVE; - if (strcmp(type, "[net]")==0 - || strcmp(type, "[network]")==0) return NETWORK; - if (strcmp(type, "[crnn]")==0) return CRNN; - if (strcmp(type, "[gru]")==0) return GRU; - if (strcmp(type, "[lstm]")==0) return LSTM; - if (strcmp(type, "[conv_lstm]") == 0) return CONV_LSTM; - if (strcmp(type, "[history]") == 0) return HISTORY; - if (strcmp(type, "[rnn]")==0) return RNN; - if (strcmp(type, "[conn]")==0 - || strcmp(type, "[connected]")==0) return CONNECTED; - if (strcmp(type, "[max]")==0 - || strcmp(type, "[maxpool]")==0) return MAXPOOL; - if (strcmp(type, "[local_avg]") == 0 - || strcmp(type, "[local_avgpool]") == 0) return LOCAL_AVGPOOL; - if (strcmp(type, "[reorg3d]")==0) return REORG; - if (strcmp(type, "[reorg]") == 0) return REORG_OLD; - if (strcmp(type, "[avg]")==0 - || strcmp(type, "[avgpool]")==0) return AVGPOOL; - if (strcmp(type, "[dropout]")==0) return DROPOUT; - if (strcmp(type, "[lrn]")==0 - || strcmp(type, "[normalization]")==0) return NORMALIZATION; - if (strcmp(type, "[batchnorm]")==0) return BATCHNORM; - if (strcmp(type, "[soft]")==0 - || strcmp(type, "[softmax]")==0) return SOFTMAX; - if (strcmp(type, "[contrastive]") == 0) return CONTRASTIVE; - if (strcmp(type, "[route]")==0) return ROUTE; - if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; - if (strcmp(type, "[empty]") == 0) return EMPTY; - return BLANK; -} - -void free_section(section *s) -{ - free(s->type); - node *n = s->options->front; - while(n){ - kvp *pair = (kvp *)n->val; - free(pair->key); - free(pair); - node *next = n->next; - free(n); - n = next; - } - free(s->options); - free(s); -} - -void parse_data(char *data, float *a, int n) -{ - int i; - if(!data) return; - char *curr = data; - char *next = data; - int done = 0; - for(i = 0; i < n && !done; ++i){ - while(*++next !='\0' && *next != ','); - if(*next == '\0') done = 1; - *next = '\0'; - sscanf(curr, "%g", &a[i]); - curr = next+1; - } -} - -typedef struct size_params{ - int batch; - int inputs; - int h; - int w; - int c; - int index; - int time_steps; - int train; - network net; -} size_params; - -local_layer parse_local(list *options, size_params params) -{ - int n = option_find_int(options, "filters",1); - int size = option_find_int(options, "size",1); - int stride = option_find_int(options, "stride",1); - int pad = option_find_int(options, "pad",0); - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before local layer must output image."); - - local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation); - - return layer; -} - -convolutional_layer parse_convolutional(list *options, size_params params) -{ - int n = option_find_int(options, "filters",1); - int groups = option_find_int_quiet(options, "groups", 1); - int size = option_find_int(options, "size",1); - int stride = -1; - //int stride = option_find_int(options, "stride",1); - int stride_x = option_find_int_quiet(options, "stride_x", -1); - int stride_y = option_find_int_quiet(options, "stride_y", -1); - if (stride_x < 1 || stride_y < 1) { - stride = option_find_int(options, "stride", 1); - if (stride_x < 1) stride_x = stride; - if (stride_y < 1) stride_y = stride; - } - else { - stride = option_find_int_quiet(options, "stride", 1); - } - int dilation = option_find_int_quiet(options, "dilation", 1); - int antialiasing = option_find_int_quiet(options, "antialiasing", 0); - if (size == 1) dilation = 1; - int pad = option_find_int_quiet(options, "pad",0); - int padding = option_find_int_quiet(options, "padding",0); - if(pad) padding = size/2; - - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - - int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0); - - int share_index = option_find_int_quiet(options, "share_index", -1000000000); - convolutional_layer *share_layer = NULL; - if(share_index >= 0) share_layer = ¶ms.net.layers[share_index]; - else if(share_index != -1000000000) share_layer = ¶ms.net.layers[params.index + share_index]; - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before convolutional layer must output image."); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - int cbn = option_find_int_quiet(options, "cbn", 0); - if (cbn) batch_normalize = 2; - int binary = option_find_int_quiet(options, "binary", 0); - int xnor = option_find_int_quiet(options, "xnor", 0); - int use_bin_output = option_find_int_quiet(options, "bin_output", 0); - int sway = option_find_int_quiet(options, "sway", 0); - int rotate = option_find_int_quiet(options, "rotate", 0); - int stretch = option_find_int_quiet(options, "stretch", 0); - int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0); - if ((sway + rotate + stretch + stretch_sway) > 1) { - printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n"); - exit(0); - } - int deform = sway || rotate || stretch || stretch_sway; - if (deform && size == 1) { - printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n"); - exit(0); - } - - convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, deform, params.train); - layer.flipped = option_find_int_quiet(options, "flipped", 0); - layer.dot = option_find_float_quiet(options, "dot", 0); - layer.sway = sway; - layer.rotate = rotate; - layer.stretch = stretch; - layer.stretch_sway = stretch_sway; - layer.angle = option_find_float_quiet(options, "angle", 15); - layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0); - layer.reverse = option_find_float_quiet(options, "reverse", 0); - layer.coordconv = option_find_int_quiet(options, "coordconv", 0); - - if(params.net.adam){ - layer.B1 = params.net.B1; - layer.B2 = params.net.B2; - layer.eps = params.net.eps; - } - - return layer; -} - -layer parse_crnn(list *options, size_params params) -{ - int size = option_find_int_quiet(options, "size", 3); - int stride = option_find_int_quiet(options, "stride", 1); - int dilation = option_find_int_quiet(options, "dilation", 1); - int pad = option_find_int_quiet(options, "pad", 0); - int padding = option_find_int_quiet(options, "padding", 0); - if (pad) padding = size / 2; - - int output_filters = option_find_int(options, "output",1); - int hidden_filters = option_find_int(options, "hidden",1); - int groups = option_find_int_quiet(options, "groups", 1); - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - int xnor = option_find_int_quiet(options, "xnor", 0); - - layer l = make_crnn_layer(params.batch, params.h, params.w, params.c, hidden_filters, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, xnor, params.train); - - l.shortcut = option_find_int_quiet(options, "shortcut", 0); - - return l; -} - -layer parse_rnn(list *options, size_params params) -{ - int output = option_find_int(options, "output",1); - int hidden = option_find_int(options, "hidden",1); - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - int logistic = option_find_int_quiet(options, "logistic", 0); - - layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic); - - l.shortcut = option_find_int_quiet(options, "shortcut", 0); - - return l; -} - -layer parse_gru(list *options, size_params params) -{ - int output = option_find_int(options, "output",1); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - - layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize); - - return l; -} - -layer parse_lstm(list *options, size_params params) -{ - int output = option_find_int(options, "output",1); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - - layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize); - - return l; -} - -layer parse_conv_lstm(list *options, size_params params) -{ - // a ConvLSTM with a larger transitional kernel should be able to capture faster motions - int size = option_find_int_quiet(options, "size", 3); - int stride = option_find_int_quiet(options, "stride", 1); - int dilation = option_find_int_quiet(options, "dilation", 1); - int pad = option_find_int_quiet(options, "pad", 0); - int padding = option_find_int_quiet(options, "padding", 0); - if (pad) padding = size / 2; - - int output_filters = option_find_int(options, "output", 1); - int groups = option_find_int_quiet(options, "groups", 1); - char *activation_s = option_find_str(options, "activation", "linear"); - ACTIVATION activation = get_activation(activation_s); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - int xnor = option_find_int_quiet(options, "xnor", 0); - int peephole = option_find_int_quiet(options, "peephole", 0); - int bottleneck = option_find_int_quiet(options, "bottleneck", 0); - - layer l = make_conv_lstm_layer(params.batch, params.h, params.w, params.c, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, peephole, xnor, bottleneck, params.train); - - l.state_constrain = option_find_int_quiet(options, "state_constrain", params.time_steps * 32); - l.shortcut = option_find_int_quiet(options, "shortcut", 0); - - char *lstm_activation_s = option_find_str(options, "lstm_activation", "tanh"); - l.lstm_activation = get_activation(lstm_activation_s); - l.time_normalizer = option_find_float_quiet(options, "time_normalizer", 1.0); - - return l; -} - -layer parse_history(list *options, size_params params) -{ - int history_size = option_find_int(options, "history_size", 4); - layer l = make_history_layer(params.batch, params.h, params.w, params.c, history_size, params.time_steps, params.train); - return l; -} - -connected_layer parse_connected(list *options, size_params params) -{ - int output = option_find_int(options, "output",1); - char *activation_s = option_find_str(options, "activation", "logistic"); - ACTIVATION activation = get_activation(activation_s); - int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); - - connected_layer layer = make_connected_layer(params.batch, 1, params.inputs, output, activation, batch_normalize); - - return layer; -} - -softmax_layer parse_softmax(list *options, size_params params) -{ - int groups = option_find_int_quiet(options, "groups", 1); - softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups); - layer.temperature = option_find_float_quiet(options, "temperature", 1); - char *tree_file = option_find_str(options, "tree", 0); - if (tree_file) layer.softmax_tree = read_tree(tree_file); - layer.w = params.w; - layer.h = params.h; - layer.c = params.c; - layer.spatial = option_find_float_quiet(options, "spatial", 0); - layer.noloss = option_find_int_quiet(options, "noloss", 0); - return layer; -} - -contrastive_layer parse_contrastive(list *options, size_params params) -{ - int classes = option_find_int(options, "classes", 1000); - layer *yolo_layer = NULL; - int yolo_layer_id = option_find_int_quiet(options, "yolo_layer", 0); - if (yolo_layer_id < 0) yolo_layer_id = params.index + yolo_layer_id; - if(yolo_layer_id != 0) yolo_layer = params.net.layers + yolo_layer_id; - if (yolo_layer->type != YOLO) { - printf(" Error: [contrastive] layer should point to the [yolo] layer instead of %d layer! \n", yolo_layer_id); - getchar(); - exit(0); - } - - contrastive_layer layer = make_contrastive_layer(params.batch, params.w, params.h, params.c, classes, params.inputs, yolo_layer); - layer.temperature = option_find_float_quiet(options, "temperature", 1); - layer.steps = params.time_steps; - layer.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); - layer.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 - layer.contrastive_neg_max = option_find_int_quiet(options, "contrastive_neg_max", 3); - return layer; -} - -int *parse_yolo_mask(char *a, int *num) -{ - int *mask = 0; - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == '#') break; - if (a[i] == ',') ++n; - } - mask = (int*)xcalloc(n, sizeof(int)); - for (i = 0; i < n; ++i) { - int val = atoi(a); - mask[i] = val; - a = strchr(a, ',') + 1; - } - *num = n; - } - return mask; -} - -float *get_classes_multipliers(char *cpc, const int classes, const float max_delta) -{ - float *classes_multipliers = NULL; - if (cpc) { - int classes_counters = classes; - int *counters_per_class = parse_yolo_mask(cpc, &classes_counters); - if (classes_counters != classes) { - printf(" number of values in counters_per_class = %d doesn't match with classes = %d \n", classes_counters, classes); - exit(0); - } - float max_counter = 0; - int i; - for (i = 0; i < classes_counters; ++i) { - if (counters_per_class[i] < 1) counters_per_class[i] = 1; - if (max_counter < counters_per_class[i]) max_counter = counters_per_class[i]; - } - classes_multipliers = (float *)calloc(classes_counters, sizeof(float)); - for (i = 0; i < classes_counters; ++i) { - classes_multipliers[i] = max_counter / counters_per_class[i]; - if(classes_multipliers[i] > max_delta) classes_multipliers[i] = max_delta; - } - free(counters_per_class); - printf(" classes_multipliers: "); - for (i = 0; i < classes_counters; ++i) printf("%.1f, ", classes_multipliers[i]); - printf("\n"); - } - return classes_multipliers; -} - -layer parse_yolo(list *options, size_params params) -{ - int classes = option_find_int(options, "classes", 20); - int total = option_find_int(options, "num", 1); - int num = total; - char *a = option_find_str(options, "mask", 0); - int *mask = parse_yolo_mask(a, &num); - int max_boxes = option_find_int_quiet(options, "max", 200); - layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); - if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n"); - exit(EXIT_FAILURE); - } - //assert(l.outputs == params.inputs); - - l.show_details = option_find_int_quiet(options, "show_details", 0); - l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 - char *cpc = option_find_str(options, "counters_per_class", 0); - l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta); - - l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); - l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); - l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0); - l.new_coords = option_find_int_quiet(options, "new_coords", 0); - l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); - l.obj_normalizer = option_find_float_quiet(options, "obj_normalizer", 1); - l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); - l.delta_normalizer = option_find_float_quiet(options, "delta_normalizer", 1); - char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); - - if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; - else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; - else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU; - else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU; - else l.iou_loss = IOU; - fprintf(stderr, "[yolo] params: iou loss: %s (%d), iou_norm: %2.2f, obj_norm: %2.2f, cls_norm: %2.2f, delta_norm: %2.2f, scale_x_y: %2.2f\n", - iou_loss, l.iou_loss, l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, l.delta_normalizer, l.scale_x_y); - - char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou"); - if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU; - else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU; - else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU; - else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU; - else { - fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str); - l.iou_thresh_kind = IOU; - } - - l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6); - char *nms_kind = option_find_str_quiet(options, "nms_kind", "default"); - if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS; - else { - if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS; - else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS; - else l.nms_kind = DEFAULT_NMS; - printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms); - } - - l.jitter = option_find_float(options, "jitter", .2); - l.resize = option_find_float_quiet(options, "resize", 1.0); - l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); - - l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); - l.truth_thresh = option_find_float(options, "truth_thresh", 1); - l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] - l.random = option_find_float_quiet(options, "random", 0); - - l.track_history_size = option_find_int_quiet(options, "track_history_size", 5); - l.sim_thresh = option_find_int_quiet(options, "sim_thresh", 0.8); - l.dets_for_track = option_find_int_quiet(options, "dets_for_track", 1); - l.dets_for_show = option_find_int_quiet(options, "dets_for_show", 1); - l.track_ciou_norm = option_find_float_quiet(options, "track_ciou_norm", 0.01); - int embedding_layer_id = option_find_int_quiet(options, "embedding_layer", 999999); - if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id; - if (embedding_layer_id != 999999) { - printf(" embedding_layer_id = %d, ", embedding_layer_id); - layer le = params.net.layers[embedding_layer_id]; - l.embedding_layer_id = embedding_layer_id; - l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float)); - l.embedding_size = le.n / l.n; - printf(" embedding_size = %d \n", l.embedding_size); - if (le.n % l.n != 0) { - printf(" Warning: filters=%d number in embedding_layer=%d isn't divisable by number of anchors %d \n", le.n, embedding_layer_id, l.n); - getchar(); - } - } - - char *map_file = option_find_str(options, "map", 0); - if (map_file) l.map = read_map(map_file); - - a = option_find_str(options, "anchors", 0); - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == '#') break; - if (a[i] == ',') ++n; - } - for (i = 0; i < n && i < total*2; ++i) { - float bias = atof(a); - l.biases[i] = bias; - a = strchr(a, ',') + 1; - } - } - return l; -} - - -int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3 -{ - int *mask = 0; - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == '#') break; - if (a[i] == ',') ++n; - } - mask = (int *)calloc(n, sizeof(int)); - for (i = 0; i < n; ++i) { - int val = atoi(a); - mask[i] = val; - a = strchr(a, ',') + 1; - } - *num = n; - } - return mask; -} - - -layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3 -{ - int classes = option_find_int(options, "classes", 20); - int max_boxes = option_find_int_quiet(options, "max", 200); - int total = option_find_int(options, "num", 1); - int num = total; - - char *a = option_find_str(options, "mask", 0); - int *mask = parse_gaussian_yolo_mask(a, &num); - layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); - if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n"); - exit(EXIT_FAILURE); - } - //assert(l.outputs == params.inputs); - l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX); // set 10 - char *cpc = option_find_str(options, "counters_per_class", 0); - l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta); - - l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); - l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1); - l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0); - l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0); - l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75); - l.obj_normalizer = option_find_float_quiet(options, "obj_normalizer", 1.0); - l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1); - l.delta_normalizer = option_find_float_quiet(options, "delta_normalizer", 1); - char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse"); // "iou"); - - if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE; - else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU; - else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU; - else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU; - else l.iou_loss = IOU; - - char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou"); - if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU; - else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU; - else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU; - else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU; - else { - fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str); - l.iou_thresh_kind = IOU; - } - - l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6); - char *nms_kind = option_find_str_quiet(options, "nms_kind", "default"); - if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS; - else { - if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS; - else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS; - else if (strcmp(nms_kind, "cornersnms") == 0) l.nms_kind = CORNERS_NMS; - else l.nms_kind = DEFAULT_NMS; - printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms); - } - - char *yolo_point = option_find_str_quiet(options, "yolo_point", "center"); - if (strcmp(yolo_point, "left_top") == 0) l.yolo_point = YOLO_LEFT_TOP; - else if (strcmp(yolo_point, "right_bottom") == 0) l.yolo_point = YOLO_RIGHT_BOTTOM; - else l.yolo_point = YOLO_CENTER; - - fprintf(stderr, "[Gaussian_yolo] iou loss: %s (%d), iou_norm: %2.2f, obj_norm: %2.2f, cls_norm: %2.2f, delta_norm: %2.2f, scale: %2.2f, point: %d\n", - iou_loss, l.iou_loss, l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, l.delta_normalizer, l.scale_x_y, l.yolo_point); - - l.jitter = option_find_float(options, "jitter", .2); - l.resize = option_find_float_quiet(options, "resize", 1.0); - - l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); - l.truth_thresh = option_find_float(options, "truth_thresh", 1); - l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo] - l.random = option_find_float_quiet(options, "random", 0); - - char *map_file = option_find_str(options, "map", 0); - if (map_file) l.map = read_map(map_file); - - a = option_find_str(options, "anchors", 0); - if (a) { - int len = strlen(a); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (a[i] == ',') ++n; - } - for (i = 0; i < n; ++i) { - float bias = atof(a); - l.biases[i] = bias; - a = strchr(a, ',') + 1; - } - } - return l; -} - -layer parse_region(list *options, size_params params) -{ - int coords = option_find_int(options, "coords", 4); - int classes = option_find_int(options, "classes", 20); - int num = option_find_int(options, "num", 1); - int max_boxes = option_find_int_quiet(options, "max", 200); - - layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes); - if (l.outputs != params.inputs) { - printf("Error: l.outputs == params.inputs \n"); - printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n"); - exit(EXIT_FAILURE); - } - //assert(l.outputs == params.inputs); - - l.log = option_find_int_quiet(options, "log", 0); - l.sqrt = option_find_int_quiet(options, "sqrt", 0); - - l.softmax = option_find_int(options, "softmax", 0); - l.focal_loss = option_find_int_quiet(options, "focal_loss", 0); - //l.max_boxes = option_find_int_quiet(options, "max",30); - l.jitter = option_find_float(options, "jitter", .2); - l.resize = option_find_float_quiet(options, "resize", 1.0); - l.rescore = option_find_int_quiet(options, "rescore",0); - - l.thresh = option_find_float(options, "thresh", .5); - l.classfix = option_find_int_quiet(options, "classfix", 0); - l.absolute = option_find_int_quiet(options, "absolute", 0); - l.random = option_find_float_quiet(options, "random", 0); - - l.coord_scale = option_find_float(options, "coord_scale", 1); - l.object_scale = option_find_float(options, "object_scale", 1); - l.noobject_scale = option_find_float(options, "noobject_scale", 1); - l.mask_scale = option_find_float(options, "mask_scale", 1); - l.class_scale = option_find_float(options, "class_scale", 1); - l.bias_match = option_find_int_quiet(options, "bias_match",0); - - char *tree_file = option_find_str(options, "tree", 0); - if (tree_file) l.softmax_tree = read_tree(tree_file); - char *map_file = option_find_str(options, "map", 0); - if (map_file) l.map = read_map(map_file); - - char *a = option_find_str(options, "anchors", 0); - if(a){ - int len = strlen(a); - int n = 1; - int i; - for(i = 0; i < len; ++i){ - if (a[i] == ',') ++n; - } - for(i = 0; i < n && i < num*2; ++i){ - float bias = atof(a); - l.biases[i] = bias; - a = strchr(a, ',')+1; - } - } - return l; -} -detection_layer parse_detection(list *options, size_params params) -{ - int coords = option_find_int(options, "coords", 1); - int classes = option_find_int(options, "classes", 1); - int rescore = option_find_int(options, "rescore", 0); - int num = option_find_int(options, "num", 1); - int side = option_find_int(options, "side", 7); - detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore); - - layer.softmax = option_find_int(options, "softmax", 0); - layer.sqrt = option_find_int(options, "sqrt", 0); - - layer.max_boxes = option_find_int_quiet(options, "max",200); - layer.coord_scale = option_find_float(options, "coord_scale", 1); - layer.forced = option_find_int(options, "forced", 0); - layer.object_scale = option_find_float(options, "object_scale", 1); - layer.noobject_scale = option_find_float(options, "noobject_scale", 1); - layer.class_scale = option_find_float(options, "class_scale", 1); - layer.jitter = option_find_float(options, "jitter", .2); - layer.resize = option_find_float_quiet(options, "resize", 1.0); - layer.random = option_find_float_quiet(options, "random", 0); - layer.reorg = option_find_int_quiet(options, "reorg", 0); - return layer; -} - -cost_layer parse_cost(list *options, size_params params) -{ - char *type_s = option_find_str(options, "type", "sse"); - COST_TYPE type = get_cost_type(type_s); - float scale = option_find_float_quiet(options, "scale",1); - cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale); - layer.ratio = option_find_float_quiet(options, "ratio",0); - return layer; -} - -crop_layer parse_crop(list *options, size_params params) -{ - int crop_height = option_find_int(options, "crop_height",1); - int crop_width = option_find_int(options, "crop_width",1); - int flip = option_find_int(options, "flip",0); - float angle = option_find_float(options, "angle",0); - float saturation = option_find_float(options, "saturation",1); - float exposure = option_find_float(options, "exposure",1); - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before crop layer must output image."); - - int noadjust = option_find_int_quiet(options, "noadjust",0); - - crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure); - l.shift = option_find_float(options, "shift", 0); - l.noadjust = noadjust; - return l; -} - -layer parse_reorg(list *options, size_params params) -{ - int stride = option_find_int(options, "stride",1); - int reverse = option_find_int_quiet(options, "reverse",0); - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before reorg layer must output image."); - - layer layer = make_reorg_layer(batch,w,h,c,stride,reverse); - return layer; -} - -layer parse_reorg_old(list *options, size_params params) -{ - printf("\n reorg_old \n"); - int stride = option_find_int(options, "stride", 1); - int reverse = option_find_int_quiet(options, "reverse", 0); - - int batch, h, w, c; - h = params.h; - w = params.w; - c = params.c; - batch = params.batch; - if (!(h && w && c)) error("Layer before reorg layer must output image."); - - layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse); - return layer; -} - -maxpool_layer parse_local_avgpool(list *options, size_params params) -{ - int stride = option_find_int(options, "stride", 1); - int stride_x = option_find_int_quiet(options, "stride_x", stride); - int stride_y = option_find_int_quiet(options, "stride_y", stride); - int size = option_find_int(options, "size", stride); - int padding = option_find_int_quiet(options, "padding", size - 1); - int maxpool_depth = 0; - int out_channels = 1; - int antialiasing = 0; - const int avgpool = 1; - - int batch, h, w, c; - h = params.h; - w = params.w; - c = params.c; - batch = params.batch; - if (!(h && w && c)) error("Layer before [local_avgpool] layer must output image."); - - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train); - return layer; -} - -maxpool_layer parse_maxpool(list *options, size_params params) -{ - int stride = option_find_int(options, "stride",1); - int stride_x = option_find_int_quiet(options, "stride_x", stride); - int stride_y = option_find_int_quiet(options, "stride_y", stride); - int size = option_find_int(options, "size",stride); - int padding = option_find_int_quiet(options, "padding", size-1); - int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0); - int out_channels = option_find_int_quiet(options, "out_channels", 1); - int antialiasing = option_find_int_quiet(options, "antialiasing", 0); - const int avgpool = 0; - - int batch,h,w,c; - h = params.h; - w = params.w; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before [maxpool] layer must output image."); - - maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train); - layer.maxpool_zero_nonmax = option_find_int_quiet(options, "maxpool_zero_nonmax", 0); - return layer; -} - -avgpool_layer parse_avgpool(list *options, size_params params) -{ - int batch,w,h,c; - w = params.w; - h = params.h; - c = params.c; - batch=params.batch; - if(!(h && w && c)) error("Layer before avgpool layer must output image."); - - avgpool_layer layer = make_avgpool_layer(batch,w,h,c); - return layer; -} - -dropout_layer parse_dropout(list *options, size_params params) -{ - float probability = option_find_float(options, "probability", .2); - int dropblock = option_find_int_quiet(options, "dropblock", 0); - float dropblock_size_rel = option_find_float_quiet(options, "dropblock_size_rel", 0); - int dropblock_size_abs = option_find_float_quiet(options, "dropblock_size_abs", 0); - if (dropblock_size_abs > params.w || dropblock_size_abs > params.h) { - printf(" [dropout] - dropblock_size_abs = %d that is bigger than layer size %d x %d \n", dropblock_size_abs, params.w, params.h); - dropblock_size_abs = min_val_cmp(params.w, params.h); - } - if (dropblock && !dropblock_size_rel && !dropblock_size_abs) { - printf(" [dropout] - None of the parameters (dropblock_size_rel or dropblock_size_abs) are set, will be used: dropblock_size_abs = 7 \n"); - dropblock_size_abs = 7; - } - if (dropblock_size_rel && dropblock_size_abs) { - printf(" [dropout] - Both parameters are set, only the parameter will be used: dropblock_size_abs = %d \n", dropblock_size_abs); - dropblock_size_rel = 0; - } - dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability, dropblock, dropblock_size_rel, dropblock_size_abs, params.w, params.h, params.c); - layer.out_w = params.w; - layer.out_h = params.h; - layer.out_c = params.c; - return layer; -} - -layer parse_normalization(list *options, size_params params) -{ - float alpha = option_find_float(options, "alpha", .0001); - float beta = option_find_float(options, "beta" , .75); - float kappa = option_find_float(options, "kappa", 1); - int size = option_find_int(options, "size", 5); - layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa); - return l; -} - -layer parse_batchnorm(list *options, size_params params) -{ - layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c, params.train); - return l; -} - -layer parse_shortcut(list *options, size_params params, network net) -{ - char *activation_s = option_find_str(options, "activation", "linear"); - ACTIVATION activation = get_activation(activation_s); - - char *weights_type_str = option_find_str_quiet(options, "weights_type", "none"); - WEIGHTS_TYPE_T weights_type = NO_WEIGHTS; - if(strcmp(weights_type_str, "per_feature") == 0 || strcmp(weights_type_str, "per_layer") == 0) weights_type = PER_FEATURE; - else if (strcmp(weights_type_str, "per_channel") == 0) weights_type = PER_CHANNEL; - else if (strcmp(weights_type_str, "none") != 0) { - printf("Error: Incorrect weights_type = %s \n Use one of: none, per_feature, per_channel \n", weights_type_str); - getchar(); - exit(0); - } - - char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none"); - WEIGHTS_NORMALIZATION_T weights_normalization = NO_NORMALIZATION; - if (strcmp(weights_normalization_str, "relu") == 0 || strcmp(weights_normalization_str, "avg_relu") == 0) weights_normalization = RELU_NORMALIZATION; - else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION; - else if (strcmp(weights_type_str, "none") != 0) { - printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str); - getchar(); - exit(0); - } - - char *l = option_find(options, "from"); - int len = strlen(l); - if (!l) error("Route Layer must specify input layers: from = ..."); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (l[i] == ',') ++n; - } - - int* layers = (int*)calloc(n, sizeof(int)); - int* sizes = (int*)calloc(n, sizeof(int)); - float **layers_output = (float **)calloc(n, sizeof(float *)); - float **layers_delta = (float **)calloc(n, sizeof(float *)); - float **layers_output_gpu = (float **)calloc(n, sizeof(float *)); - float **layers_delta_gpu = (float **)calloc(n, sizeof(float *)); - - for (i = 0; i < n; ++i) { - int index = atoi(l); - l = strchr(l, ',') + 1; - if (index < 0) index = params.index + index; - layers[i] = index; - sizes[i] = params.net.layers[index].outputs; - layers_output[i] = params.net.layers[index].output; - layers_delta[i] = params.net.layers[index].delta; - } - -#ifdef GPU - for (i = 0; i < n; ++i) { - layers_output_gpu[i] = params.net.layers[layers[i]].output_gpu; - layers_delta_gpu[i] = params.net.layers[layers[i]].delta_gpu; - } -#endif// GPU - - layer s = make_shortcut_layer(params.batch, n, layers, sizes, params.w, params.h, params.c, layers_output, layers_delta, - layers_output_gpu, layers_delta_gpu, weights_type, weights_normalization, activation, params.train); - - free(layers_output_gpu); - free(layers_delta_gpu); - - for (i = 0; i < n; ++i) { - int index = layers[i]; - assert(params.w == net.layers[index].out_w && params.h == net.layers[index].out_h); - - if (params.w != net.layers[index].out_w || params.h != net.layers[index].out_h || params.c != net.layers[index].out_c) - fprintf(stderr, " (%4d x%4d x%4d) + (%4d x%4d x%4d) \n", - params.w, params.h, params.c, net.layers[index].out_w, net.layers[index].out_h, params.net.layers[index].out_c); - } - - return s; -} - - -layer parse_scale_channels(list *options, size_params params, network net) -{ - char *l = option_find(options, "from"); - int index = atoi(l); - if (index < 0) index = params.index + index; - int scale_wh = option_find_int_quiet(options, "scale_wh", 0); - - int batch = params.batch; - layer from = net.layers[index]; - - layer s = make_scale_channels_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, scale_wh); - - char *activation_s = option_find_str_quiet(options, "activation", "linear"); - ACTIVATION activation = get_activation(activation_s); - s.activation = activation; - if (activation == SWISH || activation == MISH) { - printf(" [scale_channels] layer doesn't support SWISH or MISH activations \n"); - } - return s; -} - -layer parse_sam(list *options, size_params params, network net) -{ - char *l = option_find(options, "from"); - int index = atoi(l); - if (index < 0) index = params.index + index; - - int batch = params.batch; - layer from = net.layers[index]; - - layer s = make_sam_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c); - - char *activation_s = option_find_str_quiet(options, "activation", "linear"); - ACTIVATION activation = get_activation(activation_s); - s.activation = activation; - if (activation == SWISH || activation == MISH) { - printf(" [sam] layer doesn't support SWISH or MISH activations \n"); - } - return s; -} - - -layer parse_activation(list *options, size_params params) -{ - char *activation_s = option_find_str(options, "activation", "linear"); - ACTIVATION activation = get_activation(activation_s); - - layer l = make_activation_layer(params.batch, params.inputs, activation); - - l.out_h = params.h; - l.out_w = params.w; - l.out_c = params.c; - l.h = params.h; - l.w = params.w; - l.c = params.c; - - return l; -} - -layer parse_upsample(list *options, size_params params, network net) -{ - - int stride = option_find_int(options, "stride", 2); - layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); - l.scale = option_find_float_quiet(options, "scale", 1); - return l; -} - -route_layer parse_route(list *options, size_params params) -{ - char *l = option_find(options, "layers"); - if(!l) error("Route Layer must specify input layers"); - int len = strlen(l); - int n = 1; - int i; - for(i = 0; i < len; ++i){ - if (l[i] == ',') ++n; - } - - int* layers = (int*)xcalloc(n, sizeof(int)); - int* sizes = (int*)xcalloc(n, sizeof(int)); - for(i = 0; i < n; ++i){ - int index = atoi(l); - l = strchr(l, ',')+1; - if(index < 0) index = params.index + index; - layers[i] = index; - sizes[i] = params.net.layers[index].outputs; - } - int batch = params.batch; - - int groups = option_find_int_quiet(options, "groups", 1); - int group_id = option_find_int_quiet(options, "group_id", 0); - - route_layer layer = make_route_layer(batch, n, layers, sizes, groups, group_id); - - convolutional_layer first = params.net.layers[layers[0]]; - layer.out_w = first.out_w; - layer.out_h = first.out_h; - layer.out_c = first.out_c; - for(i = 1; i < n; ++i){ - int index = layers[i]; - convolutional_layer next = params.net.layers[index]; - if(next.out_w == first.out_w && next.out_h == first.out_h){ - layer.out_c += next.out_c; - }else{ - fprintf(stderr, " The width and height of the input layers are different. \n"); - layer.out_h = layer.out_w = layer.out_c = 0; - } - } - layer.out_c = layer.out_c / layer.groups; - - layer.w = first.w; - layer.h = first.h; - layer.c = layer.out_c; - - if (n > 3) fprintf(stderr, " \t "); - else if (n > 1) fprintf(stderr, " \t "); - else fprintf(stderr, " \t\t "); - - fprintf(stderr, " "); - if (layer.groups > 1) fprintf(stderr, "%d/%d", layer.group_id, layer.groups); - else fprintf(stderr, " "); - fprintf(stderr, " -> %4d x%4d x%4d \n", layer.out_w, layer.out_h, layer.out_c); - - return layer; -} - -learning_rate_policy get_policy(char *s) -{ - if (strcmp(s, "random")==0) return RANDOM; - if (strcmp(s, "poly")==0) return POLY; - if (strcmp(s, "constant")==0) return CONSTANT; - if (strcmp(s, "step")==0) return STEP; - if (strcmp(s, "exp")==0) return EXP; - if (strcmp(s, "sigmoid")==0) return SIG; - if (strcmp(s, "steps")==0) return STEPS; - if (strcmp(s, "sgdr")==0) return SGDR; - fprintf(stderr, "Couldn't find policy %s, going with constant\n", s); - return CONSTANT; -} - -void parse_net_options(list *options, network *net) -{ - net->max_batches = option_find_int(options, "max_batches", 0); - net->batch = option_find_int(options, "batch",1); - net->learning_rate = option_find_float(options, "learning_rate", .001); - net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001); - net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", net->max_batches); - net->batches_cycle_mult = option_find_int_quiet(options, "sgdr_mult", 2); - net->momentum = option_find_float(options, "momentum", .9); - net->decay = option_find_float(options, "decay", .0001); - int subdivs = option_find_int(options, "subdivisions",1); - net->time_steps = option_find_int_quiet(options, "time_steps",1); - net->track = option_find_int_quiet(options, "track", 0); - net->augment_speed = option_find_int_quiet(options, "augment_speed", 2); - net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs); - if (net->sequential_subdivisions > subdivs) net->init_sequential_subdivisions = net->sequential_subdivisions = subdivs; - net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0); - net->batch /= subdivs; // mini_batch - const int mini_batch = net->batch; - net->batch *= net->time_steps; // mini_batch * time_steps - net->subdivisions = subdivs; // number of mini_batches - - net->weights_reject_freq = option_find_int_quiet(options, "weights_reject_freq", 0); - net->equidistant_point = option_find_int_quiet(options, "equidistant_point", 0); - net->badlabels_rejection_percentage = option_find_float_quiet(options, "badlabels_rejection_percentage", 0); - net->num_sigmas_reject_badlabels = option_find_float_quiet(options, "num_sigmas_reject_badlabels", 0); - net->ema_alpha = option_find_float_quiet(options, "ema_alpha", 0); - *net->badlabels_reject_threshold = 0; - *net->delta_rolling_max = 0; - *net->delta_rolling_avg = 0; - *net->delta_rolling_std = 0; - *net->seen = 0; - *net->cur_iteration = 0; - net->loss_scale = option_find_float_quiet(options, "loss_scale", 1); - net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0); - net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0); - net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024); // 1024 MB by default - - net->adam = option_find_int_quiet(options, "adam", 0); - if(net->adam){ - net->B1 = option_find_float(options, "B1", .9); - net->B2 = option_find_float(options, "B2", .999); - net->eps = option_find_float(options, "eps", .000001); - } - - net->h = option_find_int_quiet(options, "height",0); - net->w = option_find_int_quiet(options, "width",0); - net->c = option_find_int_quiet(options, "channels",0); - net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c); - net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2); - net->min_crop = option_find_int_quiet(options, "min_crop",net->w); - net->flip = option_find_int_quiet(options, "flip", 1); - net->blur = option_find_int_quiet(options, "blur", 0); - net->gaussian_noise = option_find_int_quiet(options, "gaussian_noise", 0); - net->mixup = option_find_int_quiet(options, "mixup", 0); - int cutmix = option_find_int_quiet(options, "cutmix", 0); - int mosaic = option_find_int_quiet(options, "mosaic", 0); - if (mosaic && cutmix) net->mixup = 4; - else if (cutmix) net->mixup = 2; - else if (mosaic) net->mixup = 3; - net->letter_box = option_find_int_quiet(options, "letter_box", 0); - net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0); - net->contrastive = option_find_int_quiet(options, "contrastive", 0); - net->contrastive_jit_flip = option_find_int_quiet(options, "contrastive_jit_flip", 0); - net->contrastive_color = option_find_int_quiet(options, "contrastive_color", 0); - net->unsupervised = option_find_int_quiet(options, "unsupervised", 0); - if (net->contrastive && mini_batch < 2) { - printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n"); - exit(0); - } - net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f); - net->resize_step = option_find_float_quiet(options, "resize_step", 32); - net->attention = option_find_int_quiet(options, "attention", 0); - net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0); - net->max_chart_loss = option_find_float_quiet(options, "max_chart_loss", 20.0); - - net->angle = option_find_float_quiet(options, "angle", 0); - net->aspect = option_find_float_quiet(options, "aspect", 1); - net->saturation = option_find_float_quiet(options, "saturation", 1); - net->exposure = option_find_float_quiet(options, "exposure", 1); - net->hue = option_find_float_quiet(options, "hue", 0); - net->power = option_find_float_quiet(options, "power", 4); - - if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied"); - - char *policy_s = option_find_str(options, "policy", "constant"); - net->policy = get_policy(policy_s); - net->burn_in = option_find_int_quiet(options, "burn_in", 0); -#ifdef GPU - if (net->gpu_index >= 0) { - char device_name[1024]; - int compute_capability = get_gpu_compute_capability(net->gpu_index, device_name); -#ifdef CUDNN_HALF - if (compute_capability >= 700) net->cudnn_half = 1; - else net->cudnn_half = 0; -#endif// CUDNN_HALF - fprintf(stderr, " %d : compute_capability = %d, cudnn_half = %d, GPU: %s \n", net->gpu_index, compute_capability, net->cudnn_half, device_name); - } - else fprintf(stderr, " GPU isn't used \n"); -#endif// GPU - if(net->policy == STEP){ - net->step = option_find_int(options, "step", 1); - net->scale = option_find_float(options, "scale", 1); - } else if (net->policy == STEPS || net->policy == SGDR){ - char *l = option_find(options, "steps"); - char *p = option_find(options, "scales"); - char *s = option_find(options, "seq_scales"); - if(net->policy == STEPS && (!l || !p)) error("STEPS policy must have steps and scales in cfg file"); - - if (l) { - int len = strlen(l); - int n = 1; - int i; - for (i = 0; i < len; ++i) { - if (l[i] == '#') break; - if (l[i] == ',') ++n; - } - int* steps = (int*)xcalloc(n, sizeof(int)); - float* scales = (float*)xcalloc(n, sizeof(float)); - float* seq_scales = (float*)xcalloc(n, sizeof(float)); - for (i = 0; i < n; ++i) { - float scale = 1.0; - if (p) { - scale = atof(p); - p = strchr(p, ',') + 1; - } - float sequence_scale = 1.0; - if (s) { - sequence_scale = atof(s); - s = strchr(s, ',') + 1; - } - int step = atoi(l); - l = strchr(l, ',') + 1; - steps[i] = step; - scales[i] = scale; - seq_scales[i] = sequence_scale; - } - net->scales = scales; - net->steps = steps; - net->seq_scales = seq_scales; - net->num_steps = n; - } - } else if (net->policy == EXP){ - net->gamma = option_find_float(options, "gamma", 1); - } else if (net->policy == SIG){ - net->gamma = option_find_float(options, "gamma", 1); - net->step = option_find_int(options, "step", 1); - } else if (net->policy == POLY || net->policy == RANDOM){ - //net->power = option_find_float(options, "power", 1); - } - -} - -int is_network(section *s) -{ - return (strcmp(s->type, "[net]")==0 - || strcmp(s->type, "[network]")==0); -} - -void set_train_only_bn(network net) -{ - int train_only_bn = 0; - int i; - for (i = net.n - 1; i >= 0; --i) { - if (net.layers[i].train_only_bn) train_only_bn = net.layers[i].train_only_bn; // set l.train_only_bn for all previous layers - if (train_only_bn) { - net.layers[i].train_only_bn = train_only_bn; - - if (net.layers[i].type == CONV_LSTM) { - net.layers[i].wf->train_only_bn = train_only_bn; - net.layers[i].wi->train_only_bn = train_only_bn; - net.layers[i].wg->train_only_bn = train_only_bn; - net.layers[i].wo->train_only_bn = train_only_bn; - net.layers[i].uf->train_only_bn = train_only_bn; - net.layers[i].ui->train_only_bn = train_only_bn; - net.layers[i].ug->train_only_bn = train_only_bn; - net.layers[i].uo->train_only_bn = train_only_bn; - if (net.layers[i].peephole) { - net.layers[i].vf->train_only_bn = train_only_bn; - net.layers[i].vi->train_only_bn = train_only_bn; - net.layers[i].vo->train_only_bn = train_only_bn; - } - } - else if (net.layers[i].type == CRNN) { - net.layers[i].input_layer->train_only_bn = train_only_bn; - net.layers[i].self_layer->train_only_bn = train_only_bn; - net.layers[i].output_layer->train_only_bn = train_only_bn; - } - } - } -} - -network parse_network_cfg(char *filename) -{ - return parse_network_cfg_custom(filename, 0, 0); -} - -network parse_network_cfg_custom(char *filename, int batch, int time_steps) -{ - list *sections = read_cfg(filename); - node *n = sections->front; - if(!n) error("Config file has no sections"); - network net = make_network(sections->size - 1); - net.gpu_index = gpu_index; - size_params params; - - if (batch > 0) params.train = 0; // allocates memory for Detection only - else params.train = 1; // allocates memory for Detection & Training - - section *s = (section *)n->val; - list *options = s->options; - if(!is_network(s)) error("First section must be [net] or [network]"); - parse_net_options(options, &net); - -#ifdef GPU - printf("net.optimized_memory = %d \n", net.optimized_memory); - if (net.optimized_memory >= 2 && params.train) { - pre_allocate_pinned_memory((size_t)1024 * 1024 * 1024 * 8); // pre-allocate 8 GB CPU-RAM for pinned memory - } -#endif // GPU - - params.h = net.h; - params.w = net.w; - params.c = net.c; - params.inputs = net.inputs; - if (batch > 0) net.batch = batch; - if (time_steps > 0) net.time_steps = time_steps; - if (net.batch < 1) net.batch = 1; - if (net.time_steps < 1) net.time_steps = 1; - if (net.batch < net.time_steps) net.batch = net.time_steps; - params.batch = net.batch; - params.time_steps = net.time_steps; - params.net = net; - printf("mini_batch = %d, batch = %d, time_steps = %d, train = %d \n", net.batch, net.batch * net.subdivisions, net.time_steps, params.train); - - int avg_outputs = 0; - int avg_counter = 0; - float bflops = 0; - size_t workspace_size = 0; - size_t max_inputs = 0; - size_t max_outputs = 0; - int receptive_w = 1, receptive_h = 1; - int receptive_w_scale = 1, receptive_h_scale = 1; - const int show_receptive_field = option_find_float_quiet(options, "show_receptive_field", 0); - - n = n->next; - int count = 0; - free_section(s); - fprintf(stderr, " layer filters size/strd(dil) input output\n"); - while(n){ - params.index = count; - fprintf(stderr, "%4d ", count); - s = (section *)n->val; - options = s->options; - layer l = { (LAYER_TYPE)0 }; - LAYER_TYPE lt = string_to_layer_type(s->type); - if(lt == CONVOLUTIONAL){ - l = parse_convolutional(options, params); - }else if(lt == LOCAL){ - l = parse_local(options, params); - }else if(lt == ACTIVE){ - l = parse_activation(options, params); - }else if(lt == RNN){ - l = parse_rnn(options, params); - }else if(lt == GRU){ - l = parse_gru(options, params); - }else if(lt == LSTM){ - l = parse_lstm(options, params); - }else if (lt == CONV_LSTM) { - l = parse_conv_lstm(options, params); - }else if (lt == HISTORY) { - l = parse_history(options, params); - }else if(lt == CRNN){ - l = parse_crnn(options, params); - }else if(lt == CONNECTED){ - l = parse_connected(options, params); - }else if(lt == CROP){ - l = parse_crop(options, params); - }else if(lt == COST){ - l = parse_cost(options, params); - l.keep_delta_gpu = 1; - }else if(lt == REGION){ - l = parse_region(options, params); - l.keep_delta_gpu = 1; - }else if (lt == YOLO) { - l = parse_yolo(options, params); - l.keep_delta_gpu = 1; - }else if (lt == GAUSSIAN_YOLO) { - l = parse_gaussian_yolo(options, params); - l.keep_delta_gpu = 1; - }else if(lt == DETECTION){ - l = parse_detection(options, params); - }else if(lt == SOFTMAX){ - l = parse_softmax(options, params); - net.hierarchy = l.softmax_tree; - l.keep_delta_gpu = 1; - }else if (lt == CONTRASTIVE) { - l = parse_contrastive(options, params); - l.keep_delta_gpu = 1; - }else if(lt == NORMALIZATION){ - l = parse_normalization(options, params); - }else if(lt == BATCHNORM){ - l = parse_batchnorm(options, params); - }else if(lt == MAXPOOL){ - l = parse_maxpool(options, params); - }else if (lt == LOCAL_AVGPOOL) { - l = parse_local_avgpool(options, params); - }else if(lt == REORG){ - l = parse_reorg(options, params); } - else if (lt == REORG_OLD) { - l = parse_reorg_old(options, params); - }else if(lt == AVGPOOL){ - l = parse_avgpool(options, params); - }else if(lt == ROUTE){ - l = parse_route(options, params); - int k; - for (k = 0; k < l.n; ++k) { - net.layers[l.input_layers[k]].use_bin_output = 0; - net.layers[l.input_layers[k]].keep_delta_gpu = 1; - } - }else if (lt == UPSAMPLE) { - l = parse_upsample(options, params, net); - }else if(lt == SHORTCUT){ - l = parse_shortcut(options, params, net); - net.layers[count - 1].use_bin_output = 0; - net.layers[l.index].use_bin_output = 0; - net.layers[l.index].keep_delta_gpu = 1; - }else if (lt == SCALE_CHANNELS) { - l = parse_scale_channels(options, params, net); - net.layers[count - 1].use_bin_output = 0; - net.layers[l.index].use_bin_output = 0; - net.layers[l.index].keep_delta_gpu = 1; - } - else if (lt == SAM) { - l = parse_sam(options, params, net); - net.layers[count - 1].use_bin_output = 0; - net.layers[l.index].use_bin_output = 0; - net.layers[l.index].keep_delta_gpu = 1; - }else if(lt == DROPOUT){ - l = parse_dropout(options, params); - l.output = net.layers[count-1].output; - l.delta = net.layers[count-1].delta; -#ifdef GPU - l.output_gpu = net.layers[count-1].output_gpu; - l.delta_gpu = net.layers[count-1].delta_gpu; - l.keep_delta_gpu = 1; -#endif - } - else if (lt == EMPTY) { - layer empty_layer = {(LAYER_TYPE)0}; - empty_layer.out_w = params.w; - empty_layer.out_h = params.h; - empty_layer.out_c = params.c; - l = empty_layer; - l.output = net.layers[count - 1].output; - l.delta = net.layers[count - 1].delta; -#ifdef GPU - l.output_gpu = net.layers[count - 1].output_gpu; - l.delta_gpu = net.layers[count - 1].delta_gpu; -#endif - }else{ - fprintf(stderr, "Type not recognized: %s\n", s->type); - } - - // calculate receptive field - if(show_receptive_field) - { - int dilation = max_val_cmp(1, l.dilation); - int stride = max_val_cmp(1, l.stride); - int size = max_val_cmp(1, l.size); - - if (l.type == UPSAMPLE || (l.type == REORG)) - { - - l.receptive_w = receptive_w; - l.receptive_h = receptive_h; - l.receptive_w_scale = receptive_w_scale = receptive_w_scale / stride; - l.receptive_h_scale = receptive_h_scale = receptive_h_scale / stride; - - } - else { - if (l.type == ROUTE) { - receptive_w = receptive_h = receptive_w_scale = receptive_h_scale = 0; - int k; - for (k = 0; k < l.n; ++k) { - layer route_l = net.layers[l.input_layers[k]]; - receptive_w = max_val_cmp(receptive_w, route_l.receptive_w); - receptive_h = max_val_cmp(receptive_h, route_l.receptive_h); - receptive_w_scale = max_val_cmp(receptive_w_scale, route_l.receptive_w_scale); - receptive_h_scale = max_val_cmp(receptive_h_scale, route_l.receptive_h_scale); - } - } - else - { - int increase_receptive = size + (dilation - 1) * 2 - 1;// stride; - increase_receptive = max_val_cmp(0, increase_receptive); - - receptive_w += increase_receptive * receptive_w_scale; - receptive_h += increase_receptive * receptive_h_scale; - receptive_w_scale *= stride; - receptive_h_scale *= stride; - } - - l.receptive_w = receptive_w; - l.receptive_h = receptive_h; - l.receptive_w_scale = receptive_w_scale; - l.receptive_h_scale = receptive_h_scale; - } - //printf(" size = %d, dilation = %d, stride = %d, receptive_w = %d, receptive_w_scale = %d - ", size, dilation, stride, receptive_w, receptive_w_scale); - - int cur_receptive_w = receptive_w; - int cur_receptive_h = receptive_h; - - fprintf(stderr, "%4d - receptive field: %d x %d \n", count, cur_receptive_w, cur_receptive_h); - } - -#ifdef GPU - // futher GPU-memory optimization: net.optimized_memory == 2 - l.optimized_memory = net.optimized_memory; - if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT) - { - if (l.output_gpu) { - cuda_free(l.output_gpu); - //l.output_gpu = cuda_make_array_pinned(l.output, l.batch*l.outputs); // l.steps - l.output_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps - } - if (l.activation_input_gpu) { - cuda_free(l.activation_input_gpu); - l.activation_input_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps - } - - if (l.x_gpu) { - cuda_free(l.x_gpu); - l.x_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps - } - - // maximum optimization - if (net.optimized_memory >= 3 && l.type != DROPOUT) { - if (l.delta_gpu) { - cuda_free(l.delta_gpu); - //l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps - //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs); - } - } - - if (l.type == CONVOLUTIONAL) { - set_specified_workspace_limit(&l, net.workspace_size_limit); // workspace size limit 1 GB - } - } -#endif // GPU - - l.clip = option_find_float_quiet(options, "clip", 0); - l.dynamic_minibatch = net.dynamic_minibatch; - l.onlyforward = option_find_int_quiet(options, "onlyforward", 0); - l.dont_update = option_find_int_quiet(options, "dont_update", 0); - l.burnin_update = option_find_int_quiet(options, "burnin_update", 0); - l.stopbackward = option_find_int_quiet(options, "stopbackward", 0); - l.train_only_bn = option_find_int_quiet(options, "train_only_bn", 0); - l.dontload = option_find_int_quiet(options, "dontload", 0); - l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0); - l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1); - option_unused(options); - net.layers[count] = l; - if (l.workspace_size > workspace_size) workspace_size = l.workspace_size; - if (l.inputs > max_inputs) max_inputs = l.inputs; - if (l.outputs > max_outputs) max_outputs = l.outputs; - free_section(s); - n = n->next; - ++count; - if(n){ - if (l.antialiasing) { - params.h = l.input_layer->out_h; - params.w = l.input_layer->out_w; - params.c = l.input_layer->out_c; - params.inputs = l.input_layer->outputs; - } - else { - params.h = l.out_h; - params.w = l.out_w; - params.c = l.out_c; - params.inputs = l.outputs; - } - } - if (l.bflops > 0) bflops += l.bflops; - - if (l.w > 1 && l.h > 1) { - avg_outputs += l.outputs; - avg_counter++; - } - } - free_list(sections); - -#ifdef GPU - if (net.optimized_memory && params.train) - { - int k; - for (k = 0; k < net.n; ++k) { - layer l = net.layers[k]; - // delta GPU-memory optimization: net.optimized_memory == 1 - if (!l.keep_delta_gpu) { - const size_t delta_size = l.outputs*l.batch; // l.steps - if (net.max_delta_gpu_size < delta_size) { - net.max_delta_gpu_size = delta_size; - if (net.global_delta_gpu) cuda_free(net.global_delta_gpu); - if (net.state_delta_gpu) cuda_free(net.state_delta_gpu); - assert(net.max_delta_gpu_size > 0); - net.global_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size); - net.state_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size); - } - if (l.delta_gpu) { - if (net.optimized_memory >= 3) {} - else cuda_free(l.delta_gpu); - } - l.delta_gpu = net.global_delta_gpu; - } - - // maximum optimization - if (net.optimized_memory >= 3 && l.type != DROPOUT) { - if (l.delta_gpu && l.keep_delta_gpu) { - //cuda_free(l.delta_gpu); // already called above - l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps - //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs); - } - } - - net.layers[k] = l; - } - } -#endif - - set_train_only_bn(net); // set l.train_only_bn for all required layers - - net.outputs = get_network_output_size(net); - net.output = get_network_output(net); - avg_outputs = avg_outputs / avg_counter; - fprintf(stderr, "Total BFLOPS %5.3f \n", bflops); - fprintf(stderr, "avg_outputs = %d \n", avg_outputs); -#ifdef GPU - get_cuda_stream(); - get_cuda_memcpy_stream(); - if (gpu_index >= 0) - { - int size = get_network_input_size(net) * net.batch; - net.input_state_gpu = cuda_make_array(0, size); - if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1; - else { - cudaGetLastError(); // reset CUDA-error - net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float)); - } - - // pre-allocate memory for inference on Tensor Cores (fp16) - *net.max_input16_size = 0; - *net.max_output16_size = 0; - if (net.cudnn_half) { - *net.max_input16_size = max_inputs; - CHECK_CUDA(cudaMalloc((void **)net.input16_gpu, *net.max_input16_size * sizeof(short))); //sizeof(half) - *net.max_output16_size = max_outputs; - CHECK_CUDA(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half) - } - if (workspace_size) { - fprintf(stderr, " Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000); - net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1); - } - else { - net.workspace = (float*)xcalloc(1, workspace_size); - } - } -#else - if (workspace_size) { - net.workspace = (float*)xcalloc(1, workspace_size); - } -#endif - - LAYER_TYPE lt = net.layers[net.n - 1].type; - if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) { - printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n", - net.w, net.h); - } - return net; -} - - - -list *read_cfg(char *filename) -{ - FILE *file = fopen(filename, "r"); - if(file == 0) file_error(filename); - char *line; - int nu = 0; - list *sections = make_list(); - section *current = 0; - while((line=fgetl(file)) != 0){ - ++ nu; - strip(line); - switch(line[0]){ - case '[': - current = (section*)xmalloc(sizeof(section)); - list_insert(sections, current); - current->options = make_list(); - current->type = line; - break; - case '\0': - case '#': - case ';': - free(line); - break; - default: - if(!read_option(line, current->options)){ - fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line); - free(line); - } - break; - } - } - fclose(file); - return sections; -} - -void save_convolutional_weights_binary(layer l, FILE *fp) -{ -#ifdef GPU - if(gpu_index >= 0){ - pull_convolutional_layer(l); - } -#endif - int size = (l.c/l.groups)*l.size*l.size; - binarize_weights(l.weights, l.n, size, l.binary_weights); - int i, j, k; - fwrite(l.biases, sizeof(float), l.n, fp); - if (l.batch_normalize){ - fwrite(l.scales, sizeof(float), l.n, fp); - fwrite(l.rolling_mean, sizeof(float), l.n, fp); - fwrite(l.rolling_variance, sizeof(float), l.n, fp); - } - for(i = 0; i < l.n; ++i){ - float mean = l.binary_weights[i*size]; - if(mean < 0) mean = -mean; - fwrite(&mean, sizeof(float), 1, fp); - for(j = 0; j < size/8; ++j){ - int index = i*size + j*8; - unsigned char c = 0; - for(k = 0; k < 8; ++k){ - if (j*8 + k >= size) break; - if (l.binary_weights[index + k] > 0) c = (c | 1<= 0) { - pull_shortcut_layer(l); - printf("\n pull_shortcut_layer \n"); - } -#endif - int i; - //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]); - //printf(" l.nweights = %d - update \n", l.nweights); - for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); - printf(" l.nweights = %d \n\n", l.nweights); - - int num = l.nweights; - fwrite(l.weights, sizeof(float), num, fp); -} - -void save_convolutional_weights(layer l, FILE *fp) -{ - if(l.binary){ - //save_convolutional_weights_binary(l, fp); - //return; - } -#ifdef GPU - if(gpu_index >= 0){ - pull_convolutional_layer(l); - } -#endif - int num = l.nweights; - fwrite(l.biases, sizeof(float), l.n, fp); - if (l.batch_normalize){ - fwrite(l.scales, sizeof(float), l.n, fp); - fwrite(l.rolling_mean, sizeof(float), l.n, fp); - fwrite(l.rolling_variance, sizeof(float), l.n, fp); - } - fwrite(l.weights, sizeof(float), num, fp); - //if(l.adam){ - // fwrite(l.m, sizeof(float), num, fp); - // fwrite(l.v, sizeof(float), num, fp); - //} -} - -void save_convolutional_weights_ema(layer l, FILE *fp) -{ - if (l.binary) { - //save_convolutional_weights_binary(l, fp); - //return; - } -#ifdef GPU - if (gpu_index >= 0) { - pull_convolutional_layer(l); - } -#endif - int num = l.nweights; - fwrite(l.biases_ema, sizeof(float), l.n, fp); - if (l.batch_normalize) { - fwrite(l.scales_ema, sizeof(float), l.n, fp); - fwrite(l.rolling_mean, sizeof(float), l.n, fp); - fwrite(l.rolling_variance, sizeof(float), l.n, fp); - } - fwrite(l.weights_ema, sizeof(float), num, fp); - //if(l.adam){ - // fwrite(l.m, sizeof(float), num, fp); - // fwrite(l.v, sizeof(float), num, fp); - //} -} - -void save_batchnorm_weights(layer l, FILE *fp) -{ -#ifdef GPU - if(gpu_index >= 0){ - pull_batchnorm_layer(l); - } -#endif - fwrite(l.biases, sizeof(float), l.c, fp); - fwrite(l.scales, sizeof(float), l.c, fp); - fwrite(l.rolling_mean, sizeof(float), l.c, fp); - fwrite(l.rolling_variance, sizeof(float), l.c, fp); -} - -void save_connected_weights(layer l, FILE *fp) -{ -#ifdef GPU - if(gpu_index >= 0){ - pull_connected_layer(l); - } -#endif - fwrite(l.biases, sizeof(float), l.outputs, fp); - fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp); - if (l.batch_normalize){ - fwrite(l.scales, sizeof(float), l.outputs, fp); - fwrite(l.rolling_mean, sizeof(float), l.outputs, fp); - fwrite(l.rolling_variance, sizeof(float), l.outputs, fp); - } -} - -void save_weights_upto(network net, char *filename, int cutoff, int save_ema) -{ -#ifdef GPU - if(net.gpu_index >= 0){ - cuda_set_device(net.gpu_index); - } -#endif - fprintf(stderr, "Saving weights to %s\n", filename); - FILE *fp = fopen(filename, "wb"); - if(!fp) file_error(filename); - - int major = MAJOR_VERSION; - int minor = MINOR_VERSION; - int revision = PATCH_VERSION; - fwrite(&major, sizeof(int), 1, fp); - fwrite(&minor, sizeof(int), 1, fp); - fwrite(&revision, sizeof(int), 1, fp); - (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration - fwrite(net.seen, sizeof(uint64_t), 1, fp); - - int i; - for(i = 0; i < net.n && i < cutoff; ++i){ - layer l = net.layers[i]; - if (l.type == CONVOLUTIONAL && l.share_layer == NULL) { - if (save_ema) { - save_convolutional_weights_ema(l, fp); - } - else { - save_convolutional_weights(l, fp); - } - } if (l.type == SHORTCUT && l.nweights > 0) { - save_shortcut_weights(l, fp); - } if(l.type == CONNECTED){ - save_connected_weights(l, fp); - } if(l.type == BATCHNORM){ - save_batchnorm_weights(l, fp); - } if(l.type == RNN){ - save_connected_weights(*(l.input_layer), fp); - save_connected_weights(*(l.self_layer), fp); - save_connected_weights(*(l.output_layer), fp); - } if(l.type == GRU){ - save_connected_weights(*(l.input_z_layer), fp); - save_connected_weights(*(l.input_r_layer), fp); - save_connected_weights(*(l.input_h_layer), fp); - save_connected_weights(*(l.state_z_layer), fp); - save_connected_weights(*(l.state_r_layer), fp); - save_connected_weights(*(l.state_h_layer), fp); - } if(l.type == LSTM){ - save_connected_weights(*(l.wf), fp); - save_connected_weights(*(l.wi), fp); - save_connected_weights(*(l.wg), fp); - save_connected_weights(*(l.wo), fp); - save_connected_weights(*(l.uf), fp); - save_connected_weights(*(l.ui), fp); - save_connected_weights(*(l.ug), fp); - save_connected_weights(*(l.uo), fp); - } if (l.type == CONV_LSTM) { - if (l.peephole) { - save_convolutional_weights(*(l.vf), fp); - save_convolutional_weights(*(l.vi), fp); - save_convolutional_weights(*(l.vo), fp); - } - save_convolutional_weights(*(l.wf), fp); - if (!l.bottleneck) { - save_convolutional_weights(*(l.wi), fp); - save_convolutional_weights(*(l.wg), fp); - save_convolutional_weights(*(l.wo), fp); - } - save_convolutional_weights(*(l.uf), fp); - save_convolutional_weights(*(l.ui), fp); - save_convolutional_weights(*(l.ug), fp); - save_convolutional_weights(*(l.uo), fp); - } if(l.type == CRNN){ - save_convolutional_weights(*(l.input_layer), fp); - save_convolutional_weights(*(l.self_layer), fp); - save_convolutional_weights(*(l.output_layer), fp); - } if(l.type == LOCAL){ -#ifdef GPU - if(gpu_index >= 0){ - pull_local_layer(l); - } -#endif - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - fwrite(l.biases, sizeof(float), l.outputs, fp); - fwrite(l.weights, sizeof(float), size, fp); - } - } - fclose(fp); -} -void save_weights(network net, char *filename) -{ - save_weights_upto(net, filename, net.n, 0); -} - -void transpose_matrix(float *a, int rows, int cols) -{ - float* transpose = (float*)xcalloc(rows * cols, sizeof(float)); - int x, y; - for(x = 0; x < rows; ++x){ - for(y = 0; y < cols; ++y){ - transpose[y*rows + x] = a[x*cols + y]; - } - } - memcpy(a, transpose, rows*cols*sizeof(float)); - free(transpose); -} - -void load_connected_weights(layer l, FILE *fp, int transpose) -{ - fread(l.biases, sizeof(float), l.outputs, fp); - fread(l.weights, sizeof(float), l.outputs*l.inputs, fp); - if(transpose){ - transpose_matrix(l.weights, l.inputs, l.outputs); - } - //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs)); - //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs)); - if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales, sizeof(float), l.outputs, fp); - fread(l.rolling_mean, sizeof(float), l.outputs, fp); - fread(l.rolling_variance, sizeof(float), l.outputs, fp); - //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs)); - //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs)); - //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs)); - } -#ifdef GPU - if(gpu_index >= 0){ - push_connected_layer(l); - } -#endif -} - -void load_batchnorm_weights(layer l, FILE *fp) -{ - fread(l.biases, sizeof(float), l.c, fp); - fread(l.scales, sizeof(float), l.c, fp); - fread(l.rolling_mean, sizeof(float), l.c, fp); - fread(l.rolling_variance, sizeof(float), l.c, fp); -#ifdef GPU - if(gpu_index >= 0){ - push_batchnorm_layer(l); - } -#endif -} - -void load_convolutional_weights_binary(layer l, FILE *fp) -{ - fread(l.biases, sizeof(float), l.n, fp); - if (l.batch_normalize && (!l.dontloadscales)){ - fread(l.scales, sizeof(float), l.n, fp); - fread(l.rolling_mean, sizeof(float), l.n, fp); - fread(l.rolling_variance, sizeof(float), l.n, fp); - } - int size = (l.c / l.groups)*l.size*l.size; - int i, j, k; - for(i = 0; i < l.n; ++i){ - float mean = 0; - fread(&mean, sizeof(float), 1, fp); - for(j = 0; j < size/8; ++j){ - int index = i*size + j*8; - unsigned char c = 0; - fread(&c, sizeof(char), 1, fp); - for(k = 0; k < 8; ++k){ - if (j*8 + k >= size) break; - l.weights[index + k] = (c & 1<= 0){ - push_convolutional_layer(l); - } -#endif -} - -void load_convolutional_weights(layer l, FILE *fp) -{ - if(l.binary){ - //load_convolutional_weights_binary(l, fp); - //return; - } - int num = l.nweights; - int read_bytes; - read_bytes = fread(l.biases, sizeof(float), l.n, fp); - if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index); - //fread(l.weights, sizeof(float), num, fp); // as in connected layer - if (l.batch_normalize && (!l.dontloadscales)){ - read_bytes = fread(l.scales, sizeof(float), l.n, fp); - if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index); - read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp); - if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index); - read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp); - if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index); - if(0){ - int i; - for(i = 0; i < l.n; ++i){ - printf("%g, ", l.rolling_mean[i]); - } - printf("\n"); - for(i = 0; i < l.n; ++i){ - printf("%g, ", l.rolling_variance[i]); - } - printf("\n"); - } - if(0){ - fill_cpu(l.n, 0, l.rolling_mean, 1); - fill_cpu(l.n, 0, l.rolling_variance, 1); - } - } - read_bytes = fread(l.weights, sizeof(float), num, fp); - if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); - //if(l.adam){ - // fread(l.m, sizeof(float), num, fp); - // fread(l.v, sizeof(float), num, fp); - //} - //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1); - if (l.flipped) { - transpose_matrix(l.weights, (l.c/l.groups)*l.size*l.size, l.n); - } - //if (l.binary) binarize_weights(l.weights, l.n, (l.c/l.groups)*l.size*l.size, l.weights); -#ifdef GPU - if(gpu_index >= 0){ - push_convolutional_layer(l); - } -#endif -} - -void load_shortcut_weights(layer l, FILE *fp) -{ - int num = l.nweights; - int read_bytes; - read_bytes = fread(l.weights, sizeof(float), num, fp); - if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index); - //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); - //printf(" read_bytes = %d \n\n", read_bytes); -#ifdef GPU - if (gpu_index >= 0) { - push_shortcut_layer(l); - } -#endif -} - -void load_weights_upto(network *net, char *filename, int cutoff) -{ -#ifdef GPU - if(net->gpu_index >= 0){ - cuda_set_device(net->gpu_index); - } -#endif - fprintf(stderr, "Loading weights from %s...", filename); - fflush(stdout); - FILE *fp = fopen(filename, "rb"); - if(!fp) file_error(filename); - - int major; - int minor; - int revision; - fread(&major, sizeof(int), 1, fp); - fread(&minor, sizeof(int), 1, fp); - fread(&revision, sizeof(int), 1, fp); - if ((major * 10 + minor) >= 2) { - printf("\n seen 64"); - uint64_t iseen = 0; - fread(&iseen, sizeof(uint64_t), 1, fp); - *net->seen = iseen; - } - else { - printf("\n seen 32"); - uint32_t iseen = 0; - fread(&iseen, sizeof(uint32_t), 1, fp); - *net->seen = iseen; - } - *net->cur_iteration = get_current_batch(*net); - printf(", trained: %.0f K-images (%.0f Kilo-batches_64) \n", (float)(*net->seen / 1000), (float)(*net->seen / 64000)); - int transpose = (major > 1000) || (minor > 1000); - - int i; - for(i = 0; i < net->n && i < cutoff; ++i){ - layer l = net->layers[i]; - if (l.dontload) continue; - if(l.type == CONVOLUTIONAL && l.share_layer == NULL){ - load_convolutional_weights(l, fp); - } - if (l.type == SHORTCUT && l.nweights > 0) { - load_shortcut_weights(l, fp); - } - if(l.type == CONNECTED){ - load_connected_weights(l, fp, transpose); - } - if(l.type == BATCHNORM){ - load_batchnorm_weights(l, fp); - } - if(l.type == CRNN){ - load_convolutional_weights(*(l.input_layer), fp); - load_convolutional_weights(*(l.self_layer), fp); - load_convolutional_weights(*(l.output_layer), fp); - } - if(l.type == RNN){ - load_connected_weights(*(l.input_layer), fp, transpose); - load_connected_weights(*(l.self_layer), fp, transpose); - load_connected_weights(*(l.output_layer), fp, transpose); - } - if(l.type == GRU){ - load_connected_weights(*(l.input_z_layer), fp, transpose); - load_connected_weights(*(l.input_r_layer), fp, transpose); - load_connected_weights(*(l.input_h_layer), fp, transpose); - load_connected_weights(*(l.state_z_layer), fp, transpose); - load_connected_weights(*(l.state_r_layer), fp, transpose); - load_connected_weights(*(l.state_h_layer), fp, transpose); - } - if(l.type == LSTM){ - load_connected_weights(*(l.wf), fp, transpose); - load_connected_weights(*(l.wi), fp, transpose); - load_connected_weights(*(l.wg), fp, transpose); - load_connected_weights(*(l.wo), fp, transpose); - load_connected_weights(*(l.uf), fp, transpose); - load_connected_weights(*(l.ui), fp, transpose); - load_connected_weights(*(l.ug), fp, transpose); - load_connected_weights(*(l.uo), fp, transpose); - } - if (l.type == CONV_LSTM) { - if (l.peephole) { - load_convolutional_weights(*(l.vf), fp); - load_convolutional_weights(*(l.vi), fp); - load_convolutional_weights(*(l.vo), fp); - } - load_convolutional_weights(*(l.wf), fp); - if (!l.bottleneck) { - load_convolutional_weights(*(l.wi), fp); - load_convolutional_weights(*(l.wg), fp); - load_convolutional_weights(*(l.wo), fp); - } - load_convolutional_weights(*(l.uf), fp); - load_convolutional_weights(*(l.ui), fp); - load_convolutional_weights(*(l.ug), fp); - load_convolutional_weights(*(l.uo), fp); - } - if(l.type == LOCAL){ - int locations = l.out_w*l.out_h; - int size = l.size*l.size*l.c*l.n*locations; - fread(l.biases, sizeof(float), l.outputs, fp); - fread(l.weights, sizeof(float), size, fp); -#ifdef GPU - if(gpu_index >= 0){ - push_local_layer(l); - } -#endif - } - if (feof(fp)) break; - } - fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i); - fclose(fp); -} - -void load_weights(network *net, char *filename) -{ - load_weights_upto(net, filename, net->n); -} - -// load network & force - set batch size -network *load_network_custom(char *cfg, char *weights, int clear, int batch) -{ - printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear); - network* net = (network*)xcalloc(1, sizeof(network)); - *net = parse_network_cfg_custom(cfg, batch, 1); - if (weights && weights[0] != 0) { - printf(" Try to load weights: %s \n", weights); - load_weights(net, weights); - } - fuse_conv_batchnorm(*net); - if (clear) { - (*net->seen) = 0; - (*net->cur_iteration) = 0; - } - return net; -} - -// load network & get batch size from cfg-file -network *load_network(char *cfg, char *weights, int clear) -{ - printf(" Try to load cfg: %s, clear = %d \n", cfg, clear); - network* net = (network*)xcalloc(1, sizeof(network)); - *net = parse_network_cfg(cfg); - if (weights && weights[0] != 0) { - printf(" Try to load weights: %s \n", weights); - load_weights(net, weights); - } - if (clear) { - (*net->seen) = 0; - (*net->cur_iteration) = 0; - } - return net; -} diff --git a/src/Detector/darknet/src/parser.h b/src/Detector/darknet/src/parser.h deleted file mode 100644 index 05241167d..000000000 --- a/src/Detector/darknet/src/parser.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -network parse_network_cfg(char *filename); -network parse_network_cfg_custom(char *filename, int batch, int time_steps); -void save_network(network net, char *filename); -void save_weights(network net, char *filename); -void save_weights_upto(network net, char *filename, int cutoff, int save_ema); -void save_weights_double(network net, char *filename); -void load_weights(network *net, char *filename); -void load_weights_upto(network *net, char *filename, int cutoff); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/region_layer.c b/src/Detector/darknet/src/region_layer.c deleted file mode 100644 index 7aa1a196f..000000000 --- a/src/Detector/darknet/src/region_layer.c +++ /dev/null @@ -1,596 +0,0 @@ -#include "region_layer.h" -#include "activations.h" -#include "blas.h" -#include "box.h" -#include "dark_cuda.h" -#include "utils.h" -#include -#include -#include -#include - -#define DOABS 1 - -region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes) -{ - region_layer l = { (LAYER_TYPE)0 }; - l.type = REGION; - - l.n = n; - l.batch = batch; - l.h = h; - l.w = w; - l.classes = classes; - l.coords = coords; - l.cost = (float*)xcalloc(1, sizeof(float)); - l.biases = (float*)xcalloc(n * 2, sizeof(float)); - l.bias_updates = (float*)xcalloc(n * 2, sizeof(float)); - l.outputs = h*w*n*(classes + coords + 1); - l.inputs = l.outputs; - l.max_boxes = max_boxes; - l.truth_size = 4 + 2; - l.truths = max_boxes*l.truth_size; - l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float)); - l.output = (float*)xcalloc(batch * l.outputs, sizeof(float)); - int i; - for(i = 0; i < n*2; ++i){ - l.biases[i] = .5; - } - - l.forward = forward_region_layer; - l.backward = backward_region_layer; -#ifdef GPU - l.forward_gpu = forward_region_layer_gpu; - l.backward_gpu = backward_region_layer_gpu; - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); - l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); -#endif - - fprintf(stderr, "detection\n"); - srand(time(0)); - - return l; -} - -void resize_region_layer(layer *l, int w, int h) -{ -#ifdef GPU - int old_w = l->w; - int old_h = l->h; -#endif - l->w = w; - l->h = h; - - l->outputs = h*w*l->n*(l->classes + l->coords + 1); - l->inputs = l->outputs; - - l->output = (float*)xrealloc(l->output, l->batch * l->outputs * sizeof(float)); - l->delta = (float*)xrealloc(l->delta, l->batch * l->outputs * sizeof(float)); - -#ifdef GPU - //if (old_w < w || old_h < h) - { - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - - l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); - l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); - } -#endif -} - -box get_region_box(float *x, float *biases, int n, int index, int i, int j, int w, int h) -{ - box b; - b.x = (i + logistic_activate(x[index + 0])) / w; - b.y = (j + logistic_activate(x[index + 1])) / h; - b.w = exp(x[index + 2]) * biases[2*n]; - b.h = exp(x[index + 3]) * biases[2*n+1]; - if(DOABS){ - b.w = exp(x[index + 2]) * biases[2*n] / w; - b.h = exp(x[index + 3]) * biases[2*n+1] / h; - } - return b; -} - -float delta_region_box(box truth, float *x, float *biases, int n, int index, int i, int j, int w, int h, float *delta, float scale) -{ - box pred = get_region_box(x, biases, n, index, i, j, w, h); - float iou = box_iou(pred, truth); - - float tx = (truth.x*w - i); - float ty = (truth.y*h - j); - float tw = log(truth.w / biases[2*n]); - float th = log(truth.h / biases[2*n + 1]); - if(DOABS){ - tw = log(truth.w*w / biases[2*n]); - th = log(truth.h*h / biases[2*n + 1]); - } - - delta[index + 0] = scale * (tx - logistic_activate(x[index + 0])) * logistic_gradient(logistic_activate(x[index + 0])); - delta[index + 1] = scale * (ty - logistic_activate(x[index + 1])) * logistic_gradient(logistic_activate(x[index + 1])); - delta[index + 2] = scale * (tw - x[index + 2]); - delta[index + 3] = scale * (th - x[index + 3]); - return iou; -} - -void delta_region_class(float *output, float *delta, int index, int class_id, int classes, tree *hier, float scale, float *avg_cat, int focal_loss) -{ - int i, n; - if(hier){ - float pred = 1; - while(class_id >= 0){ - pred *= output[index + class_id]; - int g = hier->group[class_id]; - int offset = hier->group_offset[g]; - for(i = 0; i < hier->group_size[g]; ++i){ - delta[index + offset + i] = scale * (0 - output[index + offset + i]); - } - delta[index + class_id] = scale * (1 - output[index + class_id]); - - class_id = hier->parent[class_id]; - } - *avg_cat += pred; - } else { - // Focal loss - if (focal_loss) { - // Focal Loss - float alpha = 0.5; // 0.25 or 0.5 - //float gamma = 2; // hardcoded in many places of the grad-formula - - int ti = index + class_id; - float pt = output[ti] + 0.000000000000001F; - // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d - float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 - //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss - - for (n = 0; n < classes; ++n) { - delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); - - delta[index + n] *= alpha*grad; - - if (n == class_id) *avg_cat += output[index + n]; - } - } - else { - // default - for (n = 0; n < classes; ++n) { - delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); - if (n == class_id) *avg_cat += output[index + n]; - } - } - } -} - -float logit(float x) -{ - return log(x/(1.-x)); -} - -float tisnan(float x) -{ - return (x != x); -} - -static int entry_index(layer l, int batch, int location, int entry) -{ - int n = location / (l.w*l.h); - int loc = location % (l.w*l.h); - return batch*l.outputs + n*l.w*l.h*(l.coords + l.classes + 1) + entry*l.w*l.h + loc; -} - -void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output); -void forward_region_layer(const region_layer l, network_state state) -{ - int i,j,b,t,n; - int size = l.coords + l.classes + 1; - memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); - #ifndef GPU - flatten(l.output, l.w*l.h, size*l.n, l.batch, 1); - #endif - for (b = 0; b < l.batch; ++b){ - for(i = 0; i < l.h*l.w*l.n; ++i){ - int index = size*i + b*l.outputs; - l.output[index + 4] = logistic_activate(l.output[index + 4]); - } - } - - -#ifndef GPU - if (l.softmax_tree){ - for (b = 0; b < l.batch; ++b){ - for(i = 0; i < l.h*l.w*l.n; ++i){ - int index = size*i + b*l.outputs; - softmax_tree(l.output + index + 5, 1, 0, 1, l.softmax_tree, l.output + index + 5); - } - } - } else if (l.softmax){ - for (b = 0; b < l.batch; ++b){ - for(i = 0; i < l.h*l.w*l.n; ++i){ - int index = size*i + b*l.outputs; - softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1); - } - } - } -#endif - if(!state.train) return; - memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); - float avg_iou = 0; - float recall = 0; - float avg_cat = 0; - float avg_obj = 0; - float avg_anyobj = 0; - int count = 0; - int class_count = 0; - *(l.cost) = 0; - for (b = 0; b < l.batch; ++b) { - if(l.softmax_tree){ - int onlyclass_id = 0; - for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box(state.truth + t*l.truth_size + b*l.truths); - if(!truth.x) break; // continue; - int class_id = state.truth[t*l.truth_size + b*l.truths + 4]; - float maxp = 0; - int maxi = 0; - if(truth.x > 100000 && truth.y > 100000){ - for(n = 0; n < l.n*l.w*l.h; ++n){ - int index = size*n + b*l.outputs + 5; - float scale = l.output[index-1]; - float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class_id); - if(p > maxp){ - maxp = p; - maxi = n; - } - } - int index = size*maxi + b*l.outputs + 5; - delta_region_class(l.output, l.delta, index, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); - ++class_count; - onlyclass_id = 1; - break; - } - } - if(onlyclass_id) continue; - } - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; - box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); - float best_iou = 0; - int best_class_id = -1; - for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box(state.truth + t*l.truth_size + b*l.truths); - int class_id = state.truth[t * l.truth_size + b*l.truths + 4]; - if (class_id >= l.classes) continue; // if label contains class_id more than number of classes in the cfg-file - if(!truth.x) break; // continue; - float iou = box_iou(pred, truth); - if (iou > best_iou) { - best_class_id = state.truth[t*l.truth_size + b*l.truths + 4]; - best_iou = iou; - } - } - avg_anyobj += l.output[index + 4]; - l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); - if(l.classfix == -1) l.delta[index + 4] = l.noobject_scale * ((best_iou - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); - else{ - if (best_iou > l.thresh) { - l.delta[index + 4] = 0; - if(l.classfix > 0){ - delta_region_class(l.output, l.delta, index + 5, best_class_id, l.classes, l.softmax_tree, l.class_scale*(l.classfix == 2 ? l.output[index + 4] : 1), &avg_cat, l.focal_loss); - ++class_count; - } - } - } - - if(*(state.net.seen) < 12800){ - box truth = {0}; - truth.x = (i + .5)/l.w; - truth.y = (j + .5)/l.h; - truth.w = l.biases[2*n]; - truth.h = l.biases[2*n+1]; - if(DOABS){ - truth.w = l.biases[2*n]/l.w; - truth.h = l.biases[2*n+1]/l.h; - } - delta_region_box(truth, l.output, l.biases, n, index, i, j, l.w, l.h, l.delta, .01); - } - } - } - } - for(t = 0; t < l.max_boxes; ++t){ - box truth = float_to_box(state.truth + t*l.truth_size + b*l.truths); - int class_id = state.truth[t * l.truth_size + b*l.truths + 4]; - if (class_id >= l.classes) { - printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); - getchar(); - continue; // if label contains class_id more than number of classes in the cfg-file - } - - if(!truth.x) break; // continue; - float best_iou = 0; - int best_index = 0; - int best_n = 0; - i = (truth.x * l.w); - j = (truth.y * l.h); - //printf("%d %f %d %f\n", i, truth.x*l.w, j, truth.y*l.h); - box truth_shift = truth; - truth_shift.x = 0; - truth_shift.y = 0; - //printf("index %d %d\n",i, j); - for(n = 0; n < l.n; ++n){ - int index = size*(j*l.w*l.n + i*l.n + n) + b*l.outputs; - box pred = get_region_box(l.output, l.biases, n, index, i, j, l.w, l.h); - if(l.bias_match){ - pred.w = l.biases[2*n]; - pred.h = l.biases[2*n+1]; - if(DOABS){ - pred.w = l.biases[2*n]/l.w; - pred.h = l.biases[2*n+1]/l.h; - } - } - //printf("pred: (%f, %f) %f x %f\n", pred.x, pred.y, pred.w, pred.h); - pred.x = 0; - pred.y = 0; - float iou = box_iou(pred, truth_shift); - if (iou > best_iou){ - best_index = index; - best_iou = iou; - best_n = n; - } - } - //printf("%d %f (%f, %f) %f x %f\n", best_n, best_iou, truth.x, truth.y, truth.w, truth.h); - - float iou = delta_region_box(truth, l.output, l.biases, best_n, best_index, i, j, l.w, l.h, l.delta, l.coord_scale); - if(iou > .5) recall += 1; - avg_iou += iou; - - //l.delta[best_index + 4] = iou - l.output[best_index + 4]; - avg_obj += l.output[best_index + 4]; - l.delta[best_index + 4] = l.object_scale * (1 - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); - if (l.rescore) { - l.delta[best_index + 4] = l.object_scale * (iou - l.output[best_index + 4]) * logistic_gradient(l.output[best_index + 4]); - } - - if (l.map) class_id = l.map[class_id]; - delta_region_class(l.output, l.delta, best_index + 5, class_id, l.classes, l.softmax_tree, l.class_scale, &avg_cat, l.focal_loss); - ++count; - ++class_count; - } - } - //printf("\n"); - #ifndef GPU - flatten(l.delta, l.w*l.h, size*l.n, l.batch, 0); - #endif - *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - printf("Region Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, Avg Recall: %f, count: %d\n", avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, count); -} - -void backward_region_layer(const region_layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); -} - -void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map) -{ - int i; - float *const predictions = l.output; - #pragma omp parallel for - for (i = 0; i < l.w*l.h; ++i){ - int j, n; - int row = i / l.w; - int col = i % l.w; - for(n = 0; n < l.n; ++n){ - int index = i*l.n + n; - int p_index = index * (l.classes + 5) + 4; - float scale = predictions[p_index]; - if(l.classfix == -1 && scale < .5) scale = 0; - int box_index = index * (l.classes + 5); - boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h); - boxes[index].x *= w; - boxes[index].y *= h; - boxes[index].w *= w; - boxes[index].h *= h; - - int class_index = index * (l.classes + 5) + 5; - if(l.softmax_tree){ - - hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0); - int found = 0; - if(map){ - for(j = 0; j < 200; ++j){ - float prob = scale*predictions[class_index+map[j]]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - } else { - for(j = l.classes - 1; j >= 0; --j){ - if(!found && predictions[class_index + j] > .5){ - found = 1; - } else { - predictions[class_index + j] = 0; - } - float prob = predictions[class_index+j]; - probs[index][j] = (scale > thresh) ? prob : 0; - } - } - } else { - for(j = 0; j < l.classes; ++j){ - float prob = scale*predictions[class_index+j]; - probs[index][j] = (prob > thresh) ? prob : 0; - } - } - if(only_objectness){ - probs[index][0] = scale; - } - } - } -} - -#ifdef GPU - -void forward_region_layer_gpu(const region_layer l, network_state state) -{ - /* - if(!state.train){ - copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); - return; - } - */ - flatten_ongpu(state.input, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 1, l.output_gpu); - if(l.softmax_tree){ - int i; - int count = 5; - for (i = 0; i < l.softmax_tree->groups; ++i) { - int group_size = l.softmax_tree->group_size[i]; - softmax_gpu(l.output_gpu+count, group_size, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + count); - count += group_size; - } - }else if (l.softmax){ - softmax_gpu(l.output_gpu+5, l.classes, l.classes + 5, l.w*l.h*l.n*l.batch, 1, l.output_gpu + 5); - } - - float* in_cpu = (float*)xcalloc(l.batch * l.inputs, sizeof(float)); - float *truth_cpu = 0; - if(state.truth){ - int num_truth = l.batch*l.truths; - truth_cpu = (float*)xcalloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - cuda_pull_array(l.output_gpu, in_cpu, l.batch*l.inputs); - //cudaStreamSynchronize(get_cuda_stream()); - network_state cpu_state = state; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - forward_region_layer(l, cpu_state); - //cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs); - free(cpu_state.input); - if(!state.train) return; - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - //cudaStreamSynchronize(get_cuda_stream()); - if(cpu_state.truth) free(cpu_state.truth); -} - -void backward_region_layer_gpu(region_layer l, network_state state) -{ - flatten_ongpu(l.delta_gpu, l.h*l.w, l.n*(l.coords + l.classes + 1), l.batch, 0, state.delta); -} -#endif - - -void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) -{ - int i; - int new_w = 0; - int new_h = 0; - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - for (i = 0; i < n; ++i) { - box b = dets[i].bbox; - b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw); - b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth); - b.w *= (float)netw / new_w; - b.h *= (float)neth / new_h; - if (!relative) { - b.x *= w; - b.w *= w; - b.y *= h; - b.h *= h; - } - dets[i].bbox = b; - } -} - - -void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) -{ - int i, j, n, z; - float *predictions = l.output; - if (l.batch == 2) { - float *flip = l.output + l.outputs; - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w / 2; ++i) { - for (n = 0; n < l.n; ++n) { - for (z = 0; z < l.classes + l.coords + 1; ++z) { - int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; - int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); - float swap = flip[i1]; - flip[i1] = flip[i2]; - flip[i2] = swap; - if (z == 0) { - flip[i1] = -flip[i1]; - flip[i2] = -flip[i2]; - } - } - } - } - } - for (i = 0; i < l.outputs; ++i) { - l.output[i] = (l.output[i] + flip[i]) / 2.; - } - } - for (i = 0; i < l.w*l.h; ++i) { - int row = i / l.w; - int col = i % l.w; - for (n = 0; n < l.n; ++n) { - int index = n*l.w*l.h + i; - for (j = 0; j < l.classes; ++j) { - dets[index].prob[j] = 0; - } - int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); - int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); - int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); - float scale = l.background ? 1 : predictions[obj_index]; - dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h);// , l.w*l.h); - dets[index].objectness = scale > thresh ? scale : 0; - if (dets[index].mask) { - for (j = 0; j < l.coords - 4; ++j) { - dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; - } - } - - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); - if (l.softmax_tree) { - - hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);// , l.w*l.h); - if (map) { - for (j = 0; j < 200; ++j) { - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); - float prob = scale*predictions[class_index]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - else { - int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); - dets[index].prob[j] = (scale > thresh) ? scale : 0; - } - } - else { - if (dets[index].objectness) { - for (j = 0; j < l.classes; ++j) { - int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); - float prob = scale*predictions[class_index]; - dets[index].prob[j] = (prob > thresh) ? prob : 0; - } - } - } - } - } - correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); -} - -void zero_objectness(layer l) -{ - int i, n; - for (i = 0; i < l.w*l.h; ++i) { - for (n = 0; n < l.n; ++n) { - int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); - l.output[obj_index] = 0; - } - } -} diff --git a/src/Detector/darknet/src/region_layer.h b/src/Detector/darknet/src/region_layer.h deleted file mode 100644 index e616624e2..000000000 --- a/src/Detector/darknet/src/region_layer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef REGION_LAYER_H -#define REGION_LAYER_H - -#include "layer.h" -#include "network.h" - -typedef layer region_layer; - -#ifdef __cplusplus -extern "C" { -#endif -region_layer make_region_layer(int batch, int w, int h, int n, int classes, int coords, int max_boxes); -void forward_region_layer(const region_layer l, network_state state); -void backward_region_layer(const region_layer l, network_state state); -void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map); -void resize_region_layer(layer *l, int w, int h); -void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); -void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative); -void zero_objectness(layer l); - -#ifdef GPU -void forward_region_layer_gpu(const region_layer l, network_state state); -void backward_region_layer_gpu(region_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/reorg_layer.c b/src/Detector/darknet/src/reorg_layer.c deleted file mode 100644 index 7a4c0aecc..000000000 --- a/src/Detector/darknet/src/reorg_layer.c +++ /dev/null @@ -1,119 +0,0 @@ -#include "reorg_layer.h" -#include "dark_cuda.h" -#include "blas.h" -#include "utils.h" -#include - - -layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse) -{ - layer l = { (LAYER_TYPE)0 }; - l.type = REORG; - l.batch = batch; - l.stride = stride; - l.h = h; - l.w = w; - l.c = c; - if(reverse){ - l.out_w = w*stride; - l.out_h = h*stride; - l.out_c = c/(stride*stride); - }else{ - l.out_w = w/stride; - l.out_h = h/stride; - l.out_c = c*(stride*stride); - } - l.reverse = reverse; - fprintf(stderr, "reorg /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = h*w*c; - int output_size = l.out_h * l.out_w * l.out_c * batch; - l.output = (float*)xcalloc(output_size, sizeof(float)); - l.delta = (float*)xcalloc(output_size, sizeof(float)); - - l.forward = forward_reorg_layer; - l.backward = backward_reorg_layer; -#ifdef GPU - l.forward_gpu = forward_reorg_layer_gpu; - l.backward_gpu = backward_reorg_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, output_size); - l.delta_gpu = cuda_make_array(l.delta, output_size); -#endif - return l; -} - -void resize_reorg_layer(layer *l, int w, int h) -{ - int stride = l->stride; - int c = l->c; - - l->h = h; - l->w = w; - - if(l->reverse){ - l->out_w = w*stride; - l->out_h = h*stride; - l->out_c = c/(stride*stride); - }else{ - l->out_w = w/stride; - l->out_h = h/stride; - l->out_c = c*(stride*stride); - } - - l->outputs = l->out_h * l->out_w * l->out_c; - l->inputs = l->outputs; - int output_size = l->outputs * l->batch; - - l->output = (float*)xrealloc(l->output, output_size * sizeof(float)); - l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, output_size); - l->delta_gpu = cuda_make_array(l->delta, output_size); -#endif -} - -void forward_reorg_layer(const layer l, network_state state) -{ - if (l.reverse) { - reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output); - } - else { - reorg_cpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output); - } -} - -void backward_reorg_layer(const layer l, network_state state) -{ - if (l.reverse) { - reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_cpu(l.delta, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); - } -} - -#ifdef GPU -void forward_reorg_layer_gpu(layer l, network_state state) -{ - if (l.reverse) { - reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, l.output_gpu); - } - else { - reorg_ongpu(state.input, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, l.output_gpu); - } -} - -void backward_reorg_layer_gpu(layer l, network_state state) -{ - if (l.reverse) { - reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_ongpu(l.delta_gpu, l.out_w, l.out_h, l.out_c, l.batch, l.stride, 1, state.delta); - } -} -#endif diff --git a/src/Detector/darknet/src/reorg_layer.h b/src/Detector/darknet/src/reorg_layer.h deleted file mode 100644 index 631856863..000000000 --- a/src/Detector/darknet/src/reorg_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef REORG_LAYER_H -#define REORG_LAYER_H - -#include "image.h" -#include "dark_cuda.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse); -void resize_reorg_layer(layer *l, int w, int h); -void forward_reorg_layer(const layer l, network_state state); -void backward_reorg_layer(const layer l, network_state state); - -#ifdef GPU -void forward_reorg_layer_gpu(layer l, network_state state); -void backward_reorg_layer_gpu(layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/reorg_old_layer.c b/src/Detector/darknet/src/reorg_old_layer.c deleted file mode 100644 index cb715e6e8..000000000 --- a/src/Detector/darknet/src/reorg_old_layer.c +++ /dev/null @@ -1,119 +0,0 @@ -#include "reorg_old_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include - - -layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int reverse) -{ - layer l = { (LAYER_TYPE)0 }; - l.type = REORG_OLD; - l.batch = batch; - l.stride = stride; - l.h = h; - l.w = w; - l.c = c; - if(reverse){ - l.out_w = w*stride; - l.out_h = h*stride; - l.out_c = c/(stride*stride); - }else{ - l.out_w = w/stride; - l.out_h = h/stride; - l.out_c = c*(stride*stride); - } - l.reverse = reverse; - fprintf(stderr, "reorg_old /%2d %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); - l.outputs = l.out_h * l.out_w * l.out_c; - l.inputs = h*w*c; - int output_size = l.out_h * l.out_w * l.out_c * batch; - l.output = (float*)xcalloc(output_size, sizeof(float)); - l.delta = (float*)xcalloc(output_size, sizeof(float)); - - l.forward = forward_reorg_old_layer; - l.backward = backward_reorg_old_layer; -#ifdef GPU - l.forward_gpu = forward_reorg_old_layer_gpu; - l.backward_gpu = backward_reorg_old_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, output_size); - l.delta_gpu = cuda_make_array(l.delta, output_size); -#endif - return l; -} - -void resize_reorg_old_layer(layer *l, int w, int h) -{ - int stride = l->stride; - int c = l->c; - - l->h = h; - l->w = w; - - if(l->reverse){ - l->out_w = w*stride; - l->out_h = h*stride; - l->out_c = c/(stride*stride); - }else{ - l->out_w = w/stride; - l->out_h = h/stride; - l->out_c = c*(stride*stride); - } - - l->outputs = l->out_h * l->out_w * l->out_c; - l->inputs = l->outputs; - int output_size = l->outputs * l->batch; - - l->output = (float*)xrealloc(l->output, output_size * sizeof(float)); - l->delta = (float*)xrealloc(l->delta, output_size * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, output_size); - l->delta_gpu = cuda_make_array(l->delta, output_size); -#endif -} - -void forward_reorg_old_layer(const layer l, network_state state) -{ - if (l.reverse) { - reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output); - } - else { - reorg_cpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output); - } -} - -void backward_reorg_old_layer(const layer l, network_state state) -{ - if (l.reverse) { - reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_cpu(l.delta, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); - } -} - -#ifdef GPU -void forward_reorg_old_layer_gpu(layer l, network_state state) -{ - if (l.reverse) { - reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.output_gpu); - } - else { - reorg_ongpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 0, l.output_gpu); - } -} - -void backward_reorg_old_layer_gpu(layer l, network_state state) -{ - if (l.reverse) { - reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 0, state.delta); - } - else { - reorg_ongpu(l.delta_gpu, l.w, l.h, l.c, l.batch, l.stride, 1, state.delta); - } -} -#endif diff --git a/src/Detector/darknet/src/reorg_old_layer.h b/src/Detector/darknet/src/reorg_old_layer.h deleted file mode 100644 index caa8c9185..000000000 --- a/src/Detector/darknet/src/reorg_old_layer.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef REORG_OLD_LAYER_H -#define REORG_OLD_LAYER_H - -#include "image.h" -#include "dark_cuda.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_reorg_old_layer(int batch, int w, int h, int c, int stride, int reverse); -void resize_reorg_old_layer(layer *l, int w, int h); -void forward_reorg_old_layer(const layer l, network_state state); -void backward_reorg_old_layer(const layer l, network_state state); - -#ifdef GPU -void forward_reorg_old_layer_gpu(layer l, network_state state); -void backward_reorg_old_layer_gpu(layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/rnn.c b/src/Detector/darknet/src/rnn.c deleted file mode 100644 index 53fb8f320..000000000 --- a/src/Detector/darknet/src/rnn.c +++ /dev/null @@ -1,498 +0,0 @@ -#include "network.h" -#include "cost_layer.h" -#include "utils.h" -#include "blas.h" -#include "parser.h" - -typedef struct { - float *x; - float *y; -} float_pair; - -int *read_tokenized_data(char *filename, size_t *read) -{ - size_t size = 512; - size_t count = 0; - FILE *fp = fopen(filename, "r"); - int* d = (int*)xcalloc(size, sizeof(int)); - int n, one; - one = fscanf(fp, "%d", &n); - while(one == 1){ - ++count; - if(count > size){ - size = size*2; - d = (int*)xrealloc(d, size * sizeof(int)); - } - d[count-1] = n; - one = fscanf(fp, "%d", &n); - } - fclose(fp); - d = (int*)xrealloc(d, count * sizeof(int)); - *read = count; - return d; -} - -char **read_tokens(char *filename, size_t *read) -{ - size_t size = 512; - size_t count = 0; - FILE *fp = fopen(filename, "r"); - char** d = (char**)xcalloc(size, sizeof(char*)); - char *line; - while((line=fgetl(fp)) != 0){ - ++count; - if(count > size){ - size = size*2; - d = (char**)xrealloc(d, size * sizeof(char*)); - } - d[count-1] = line; - } - fclose(fp); - d = (char**)xrealloc(d, count * sizeof(char*)); - *read = count; - return d; -} - -float_pair get_rnn_token_data(int *tokens, size_t *offsets, int characters, size_t len, int batch, int steps) -{ - float* x = (float*)xcalloc(batch * steps * characters, sizeof(float)); - float* y = (float*)xcalloc(batch * steps * characters, sizeof(float)); - int i,j; - for(i = 0; i < batch; ++i){ - for(j = 0; j < steps; ++j){ - int curr = tokens[(offsets[i])%len]; - int next = tokens[(offsets[i] + 1)%len]; - - x[(j*batch + i)*characters + curr] = 1; - y[(j*batch + i)*characters + next] = 1; - - offsets[i] = (offsets[i] + 1) % len; - - if(curr >= characters || curr < 0 || next >= characters || next < 0){ - error("Bad char"); - } - } - } - float_pair p; - p.x = x; - p.y = y; - return p; -} - -float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, size_t len, int batch, int steps) -{ - float* x = (float*)xcalloc(batch * steps * characters, sizeof(float)); - float* y = (float*)xcalloc(batch * steps * characters, sizeof(float)); - int i,j; - for(i = 0; i < batch; ++i){ - for(j = 0; j < steps; ++j){ - unsigned char curr = text[(offsets[i])%len]; - unsigned char next = text[(offsets[i] + 1)%len]; - - x[(j*batch + i)*characters + curr] = 1; - y[(j*batch + i)*characters + next] = 1; - - offsets[i] = (offsets[i] + 1) % len; - - if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ - /*text[(index+j+2)%len] = 0; - printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); - printf("%s", text+index); - */ - error("Bad char"); - } - } - } - float_pair p; - p.x = x; - p.y = y; - return p; -} - -void reset_rnn_state(network net, int b) -{ - int i; - for (i = 0; i < net.n; ++i) { - #ifdef GPU - layer l = net.layers[i]; - if(l.state_gpu){ - fill_ongpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1); - } - #endif - } -} - -void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized) -{ - srand(time(0)); - unsigned char *text = 0; - int *tokens = 0; - size_t size; - if(tokenized){ - tokens = read_tokenized_data(filename, &size); - } else { - FILE *fp = fopen(filename, "rb"); - - fseek(fp, 0, SEEK_END); - size = ftell(fp); - fseek(fp, 0, SEEK_SET); - - text = (unsigned char *)xcalloc(size + 1, sizeof(char)); - fread(text, 1, size, fp); - fclose(fp); - } - - char* backup_directory = "backup/"; - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - - int inputs = get_network_input_size(net); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int batch = net.batch; - int steps = net.time_steps; - if (clear) { - *net.seen = 0; - *net.cur_iteration = 0; - } - int i = (*net.seen)/net.batch; - - int streams = batch/steps; - printf("\n batch = %d, steps = %d, streams = %d, subdivisions = %d, text_size = %ld \n", batch, steps, streams, net.subdivisions, size); - printf(" global_batch = %d \n", batch*net.subdivisions); - size_t* offsets = (size_t*)xcalloc(streams, sizeof(size_t)); - int j; - for(j = 0; j < streams; ++j){ - offsets[j] = rand_size_t()%size; - //printf(" offset[%d] = %d, ", j, offsets[j]); - } - //printf("\n"); - - clock_t time; - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - float_pair p; - if(tokenized){ - p = get_rnn_token_data(tokens, offsets, inputs, size, streams, steps); - }else{ - p = get_rnn_data(text, offsets, inputs, size, streams, steps); - } - - float loss = train_network_datum(net, p.x, p.y) / (batch); - free(p.x); - free(p.y); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - int chars = get_current_batch(net)*batch; - fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds, %f epochs\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), (float) chars/size); - - for(j = 0; j < streams; ++j){ - //printf("%d\n", j); - if(rand()%10 == 0){ - //fprintf(stderr, "Reset\n"); - offsets[j] = rand_size_t()%size; - reset_rnn_state(net, j); - } - } - - if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%10==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void print_symbol(int n, char **tokens){ - if(tokens){ - printf("%s ", tokens[n]); - } else { - printf("%c", n); - } -} - -void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float temp, int rseed, char *token_file) -{ - char **tokens = 0; - if(token_file){ - size_t n; - tokens = read_tokens(token_file, &n); - } - - srand(rseed); - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - - network net = parse_network_cfg_custom(cfgfile, 1, 1); // batch=1, time_steps=1 - if(weightfile){ - load_weights(&net, weightfile); - } - int inputs = get_network_input_size(net); - - int i, j; - for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp; - int c = 0; - int len = strlen(seed); - float* input = (float*)xcalloc(inputs, sizeof(float)); - - /* - fill_cpu(inputs, 0, input, 1); - for(i = 0; i < 10; ++i){ - network_predict(net, input); - } - fill_cpu(inputs, 0, input, 1); - */ - - for(i = 0; i < len-1; ++i){ - c = seed[i]; - input[c] = 1; - network_predict(net, input); - input[c] = 0; - print_symbol(c, tokens); - } - if(len) c = seed[len-1]; - print_symbol(c, tokens); - for(i = 0; i < num; ++i){ - input[c] = 1; - float *out = network_predict(net, input); - input[c] = 0; - for(j = 32; j < 127; ++j){ - //printf("%d %c %f\n",j, j, out[j]); - } - for(j = 0; j < inputs; ++j){ - if (out[j] < .0001) out[j] = 0; - } - c = sample_array(out, inputs); - //c = sample_array_custom(out, inputs); - //c = max_index(out, inputs); - //c = top_max_index(out, inputs, 2); - print_symbol(c, tokens); - } - printf("\n"); -} - -void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file) -{ - char **tokens = 0; - if(token_file){ - size_t n; - tokens = read_tokens(token_file, &n); - } - - srand(rseed); - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int inputs = get_network_input_size(net); - - int i, j; - for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp; - int c = 0; - float* input = (float*)xcalloc(inputs, sizeof(float)); - float *out = 0; - - while((c = getc(stdin)) != EOF){ - input[c] = 1; - out = network_predict(net, input); - input[c] = 0; - } - for(i = 0; i < num; ++i){ - for(j = 0; j < inputs; ++j){ - if (out[j] < .0001) out[j] = 0; - } - int next = sample_array(out, inputs); - if(c == '.' && next == '\n') break; - c = next; - print_symbol(c, tokens); - - input[c] = 1; - out = network_predict(net, input); - input[c] = 0; - } - printf("\n"); -} - -void valid_tactic_rnn(char *cfgfile, char *weightfile, char *seed) -{ - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int inputs = get_network_input_size(net); - - int count = 0; - int words = 1; - int c; - int len = strlen(seed); - float* input = (float*)xcalloc(inputs, sizeof(float)); - int i; - for(i = 0; i < len; ++i){ - c = seed[i]; - input[(int)c] = 1; - network_predict(net, input); - input[(int)c] = 0; - } - float sum = 0; - c = getc(stdin); - float log2 = log(2); - int in = 0; - while(c != EOF){ - int next = getc(stdin); - if(next == EOF) break; - if(next < 0 || next >= 255) error("Out of range character"); - - input[c] = 1; - float *out = network_predict(net, input); - input[c] = 0; - - if(c == '.' && next == '\n') in = 0; - if(!in) { - if(c == '>' && next == '>'){ - in = 1; - ++words; - } - c = next; - continue; - } - ++count; - sum += log(out[next])/log2; - c = next; - printf("%d %d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, words, pow(2, -sum/count), pow(2, -sum/words)); - } -} - -void valid_char_rnn(char *cfgfile, char *weightfile, char *seed) -{ - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int inputs = get_network_input_size(net); - - int count = 0; - int words = 1; - int c; - int len = strlen(seed); - float* input = (float*)xcalloc(inputs, sizeof(float)); - int i; - for(i = 0; i < len; ++i){ - c = seed[i]; - input[(int)c] = 1; - network_predict(net, input); - input[(int)c] = 0; - } - float sum = 0; - c = getc(stdin); - float log2 = log(2); - while(c != EOF){ - int next = getc(stdin); - if(next == EOF) break; - if(next < 0 || next >= 255) error("Out of range character"); - ++count; - if(next == ' ' || next == '\n' || next == '\t') ++words; - input[c] = 1; - float *out = network_predict(net, input); - input[c] = 0; - sum += log(out[next])/log2; - c = next; - printf("%d Perplexity: %4.4f Word Perplexity: %4.4f\n", count, pow(2, -sum/count), pow(2, -sum/words)); - } -} - -void vec_char_rnn(char *cfgfile, char *weightfile, char *seed) -{ - char *base = basecfg(cfgfile); - fprintf(stderr, "%s\n", base); - - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - int inputs = get_network_input_size(net); - - int c; - int seed_len = strlen(seed); - float* input = (float*)xcalloc(inputs, sizeof(float)); - int i; - char *line; - while((line=fgetl(stdin)) != 0){ - reset_rnn_state(net, 0); - for(i = 0; i < seed_len; ++i){ - c = seed[i]; - input[(int)c] = 1; - network_predict(net, input); - input[(int)c] = 0; - } - strip(line); - int str_len = strlen(line); - for(i = 0; i < str_len; ++i){ - c = line[i]; - input[(int)c] = 1; - network_predict(net, input); - input[(int)c] = 0; - } - c = ' '; - input[(int)c] = 1; - network_predict(net, input); - input[(int)c] = 0; - - layer l = net.layers[0]; - #ifdef GPU - cuda_pull_array(l.output_gpu, l.output, l.outputs); - #endif - printf("%s", line); - for(i = 0; i < l.outputs; ++i){ - printf(",%g", l.output[i]); - } - printf("\n"); - } -} - -void run_char_rnn(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - char *filename = find_char_arg(argc, argv, "-file", "data/shakespeare.txt"); - char *seed = find_char_arg(argc, argv, "-seed", "\n\n"); - int len = find_int_arg(argc, argv, "-len", 1000); - float temp = find_float_arg(argc, argv, "-temp", .7); - int rseed = find_int_arg(argc, argv, "-srand", time(0)); - int clear = find_arg(argc, argv, "-clear"); - int tokenized = find_arg(argc, argv, "-tokenized"); - char *tokens = find_char_arg(argc, argv, "-tokens", 0); - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename, clear, tokenized); - else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, seed); - else if(0==strcmp(argv[2], "validtactic")) valid_tactic_rnn(cfg, weights, seed); - else if(0==strcmp(argv[2], "vec")) vec_char_rnn(cfg, weights, seed); - else if(0==strcmp(argv[2], "generate")) test_char_rnn(cfg, weights, len, seed, temp, rseed, tokens); - else if(0==strcmp(argv[2], "generatetactic")) test_tactic_rnn(cfg, weights, len, temp, rseed, tokens); -} diff --git a/src/Detector/darknet/src/rnn_layer.c b/src/Detector/darknet/src/rnn_layer.c deleted file mode 100644 index 98f0d489e..000000000 --- a/src/Detector/darknet/src/rnn_layer.c +++ /dev/null @@ -1,289 +0,0 @@ -#include "rnn_layer.h" -#include "connected_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include "gemm.h" - -#include -#include -#include -#include - -static void increment_layer(layer *l, int steps) -{ - int num = l->outputs*l->batch*steps; - l->output += num; - l->delta += num; - l->x += num; - l->x_norm += num; - -#ifdef GPU - l->output_gpu += num; - l->delta_gpu += num; - l->x_gpu += num; - l->x_norm_gpu += num; -#endif -} - -layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log) -{ - fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); - batch = batch / steps; - layer l = { (LAYER_TYPE)0 }; - l.batch = batch; - l.type = RNN; - l.steps = steps; - l.hidden = hidden; - l.inputs = inputs; - l.out_w = 1; - l.out_h = 1; - l.out_c = outputs; - - l.state = (float*)xcalloc(batch * hidden * (steps + 1), sizeof(float)); - - l.input_layer = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.input_layer) = make_connected_layer(batch, steps, inputs, hidden, activation, batch_normalize); - l.input_layer->batch = batch; - if (l.workspace_size < l.input_layer->workspace_size) l.workspace_size = l.input_layer->workspace_size; - - l.self_layer = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.self_layer) = make_connected_layer(batch, steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize); - l.self_layer->batch = batch; - if (l.workspace_size < l.self_layer->workspace_size) l.workspace_size = l.self_layer->workspace_size; - - l.output_layer = (layer*)xcalloc(1, sizeof(layer)); - fprintf(stderr, "\t\t"); - *(l.output_layer) = make_connected_layer(batch, steps, hidden, outputs, activation, batch_normalize); - l.output_layer->batch = batch; - if (l.workspace_size < l.output_layer->workspace_size) l.workspace_size = l.output_layer->workspace_size; - - l.outputs = outputs; - l.output = l.output_layer->output; - l.delta = l.output_layer->delta; - - l.forward = forward_rnn_layer; - l.backward = backward_rnn_layer; - l.update = update_rnn_layer; -#ifdef GPU - l.forward_gpu = forward_rnn_layer_gpu; - l.backward_gpu = backward_rnn_layer_gpu; - l.update_gpu = update_rnn_layer_gpu; - l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1)); - l.output_gpu = l.output_layer->output_gpu; - l.delta_gpu = l.output_layer->delta_gpu; -#endif - - return l; -} - -void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay) -{ - update_connected_layer(*(l.input_layer), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.self_layer), batch, learning_rate, momentum, decay); - update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay); -} - -void forward_rnn_layer(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - - fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1); - fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1); - fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1); - if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1); - - for (i = 0; i < l.steps; ++i) { - - s.input = state.input; - forward_connected_layer(input_layer, s); - - s.input = l.state; - forward_connected_layer(self_layer, s); - - float *old_state = l.state; - if(state.train) l.state += l.hidden*l.batch; - if(l.shortcut){ - copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1); - }else{ - fill_cpu(l.hidden * l.batch, 0, l.state, 1); - } - axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); - - s.input = l.state; - forward_connected_layer(output_layer, s); - - state.input += l.inputs*l.batch; - increment_layer(&input_layer, 1); - increment_layer(&self_layer, 1); - increment_layer(&output_layer, 1); - } -} - -void backward_rnn_layer(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - - increment_layer(&input_layer, l.steps-1); - increment_layer(&self_layer, l.steps-1); - increment_layer(&output_layer, l.steps-1); - - l.state += l.hidden*l.batch*l.steps; - for (i = l.steps-1; i >= 0; --i) { - copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1); - - s.input = l.state; - s.delta = self_layer.delta; - backward_connected_layer(output_layer, s); - - l.state -= l.hidden*l.batch; - /* - if(i > 0){ - copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1); - axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1); - }else{ - fill_cpu(l.hidden * l.batch, 0, l.state, 1); - } - */ - - s.input = l.state; - s.delta = self_layer.delta - l.hidden*l.batch; - if (i == 0) s.delta = 0; - backward_connected_layer(self_layer, s); - - copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1); - if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1); - s.input = state.input + i*l.inputs*l.batch; - if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; - else s.delta = 0; - backward_connected_layer(input_layer, s); - - increment_layer(&input_layer, -1); - increment_layer(&self_layer, -1); - increment_layer(&output_layer, -1); - } -} - -#ifdef GPU - -void pull_rnn_layer(layer l) -{ - pull_connected_layer(*(l.input_layer)); - pull_connected_layer(*(l.self_layer)); - pull_connected_layer(*(l.output_layer)); -} - -void push_rnn_layer(layer l) -{ - push_connected_layer(*(l.input_layer)); - push_connected_layer(*(l.self_layer)); - push_connected_layer(*(l.output_layer)); -} - -void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale) -{ - update_connected_layer_gpu(*(l.input_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.self_layer), batch, learning_rate, momentum, decay, loss_scale); - update_connected_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay, loss_scale); -} - -void forward_rnn_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - - fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1); - fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1); - fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1); - if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); - - for (i = 0; i < l.steps; ++i) { - - s.input = state.input; - forward_connected_layer_gpu(input_layer, s); - - s.input = l.state_gpu; - forward_connected_layer_gpu(self_layer, s); - - float *old_state = l.state_gpu; - if(state.train) l.state_gpu += l.hidden*l.batch; - if(l.shortcut){ - copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1); - }else{ - fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1); - } - axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1); - axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); - - s.input = l.state_gpu; - forward_connected_layer_gpu(output_layer, s); - - state.input += l.inputs*l.batch; - increment_layer(&input_layer, 1); - increment_layer(&self_layer, 1); - increment_layer(&output_layer, 1); - } -} - -void backward_rnn_layer_gpu(layer l, network_state state) -{ - network_state s = {0}; - s.train = state.train; - s.workspace = state.workspace; - int i; - layer input_layer = *(l.input_layer); - layer self_layer = *(l.self_layer); - layer output_layer = *(l.output_layer); - increment_layer(&input_layer, l.steps - 1); - increment_layer(&self_layer, l.steps - 1); - increment_layer(&output_layer, l.steps - 1); - l.state_gpu += l.hidden*l.batch*l.steps; - for (i = l.steps-1; i >= 0; --i) { - - s.input = l.state_gpu; - s.delta = self_layer.delta_gpu; - backward_connected_layer_gpu(output_layer, s); - - l.state_gpu -= l.hidden*l.batch; - - copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); // the same delta for Input and Self layers - - s.input = l.state_gpu; - s.delta = self_layer.delta_gpu - l.hidden*l.batch; - if (i == 0) s.delta = 0; - backward_connected_layer_gpu(self_layer, s); - - //copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); - if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); - s.input = state.input + i*l.inputs*l.batch; - if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; - else s.delta = 0; - backward_connected_layer_gpu(input_layer, s); - - increment_layer(&input_layer, -1); - increment_layer(&self_layer, -1); - increment_layer(&output_layer, -1); - } -} -#endif diff --git a/src/Detector/darknet/src/rnn_layer.h b/src/Detector/darknet/src/rnn_layer.h deleted file mode 100644 index a2aa0f9e4..000000000 --- a/src/Detector/darknet/src/rnn_layer.h +++ /dev/null @@ -1,31 +0,0 @@ - -#ifndef RNN_LAYER_H -#define RNN_LAYER_H - -#include "activations.h" -#include "layer.h" -#include "network.h" -#define USET - -#ifdef __cplusplus -extern "C" { -#endif -layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log); - -void forward_rnn_layer(layer l, network_state state); -void backward_rnn_layer(layer l, network_state state); -void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay); - -#ifdef GPU -void forward_rnn_layer_gpu(layer l, network_state state); -void backward_rnn_layer_gpu(layer l, network_state state); -void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, float loss_scale); -void push_rnn_layer(layer l); -void pull_rnn_layer(layer l); -#endif - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/rnn_vid.c b/src/Detector/darknet/src/rnn_vid.c deleted file mode 100644 index a5ff52784..000000000 --- a/src/Detector/darknet/src/rnn_vid.c +++ /dev/null @@ -1,208 +0,0 @@ -#include "network.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "blas.h" - -#ifdef OPENCV -void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters); - - -typedef struct { - float *x; - float *y; -} float_pair; - -float_pair get_rnn_vid_data(network net, char **files, int n, int batch, int steps) -{ - int b; - assert(net.batch == steps + 1); - image out_im = get_network_image(net); - int output_size = out_im.w*out_im.h*out_im.c; - printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); - float* feats = (float*)xcalloc(net.batch * batch * output_size, sizeof(float)); - for(b = 0; b < batch; ++b){ - int input_size = net.w*net.h*net.c; - float* input = (float*)xcalloc(input_size * net.batch, sizeof(float)); - char *filename = files[rand()%n]; - cap_cv *cap = get_capture_video_stream(filename); - int frames = get_capture_frame_count_cv(cap); - int index = rand() % (frames - steps - 2); - if (frames < (steps + 4)){ - --b; - free(input); - continue; - } - - printf("frames: %d, index: %d\n", frames, index); - set_capture_position_frame_cv(cap, index); - - int i; - for(i = 0; i < net.batch; ++i){ - mat_cv *src = get_capture_frame_cv(cap); - image im = mat_to_image_cv(src); - rgbgr_image(im); - image re = resize_image(im, net.w, net.h); - //show_image(re, "loaded"); - //cvWaitKey(10); - memcpy(input + i*input_size, re.data, input_size*sizeof(float)); - free_image(im); - free_image(re); - } - float *output = network_predict(net, input); - - free(input); - - for(i = 0; i < net.batch; ++i){ - memcpy(feats + (b + i*batch)*output_size, output + i*output_size, output_size*sizeof(float)); - } - - release_capture(cap); //cvReleaseCapture(&cap); - } - - //printf("%d %d %d\n", out_im.w, out_im.h, out_im.c); - float_pair p = {0}; - p.x = feats; - p.y = feats + output_size*batch; //+ out_im.w*out_im.h*out_im.c; - - return p; -} - - -void train_vid_rnn(char *cfgfile, char *weightfile) -{ - char *train_videos = "data/vid/train.txt"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - - list *plist = get_paths(train_videos); - int N = plist->size; - char **paths = (char **)list_to_array(plist); - clock_t time; - int steps = net.time_steps; - int batch = net.batch / net.time_steps; - - network extractor = parse_network_cfg("cfg/extractor.cfg"); - load_weights(&extractor, "trained/yolo-coco.conv"); - - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - float_pair p = get_rnn_vid_data(extractor, paths, N, batch, steps); - - float loss = train_network_datum(net, p.x, p.y) / (net.batch); - - - free(p.x); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%10==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - - -image save_reconstruction(network net, image *init, float *feat, char *name, int i) -{ - image recon; - if (init) { - recon = copy_image(*init); - } else { - recon = make_random_image(net.w, net.h, 3); - } - - image update = make_image(net.w, net.h, 3); - reconstruct_picture(net, feat, recon, update, .01, .9, .1, 2, 50); - char buff[256]; - sprintf(buff, "%s%d", name, i); - save_image(recon, buff); - free_image(update); - return recon; -} - -void generate_vid_rnn(char *cfgfile, char *weightfile) -{ - network extractor = parse_network_cfg("cfg/extractor.recon.cfg"); - load_weights(&extractor, "trained/yolo-coco.conv"); - - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&extractor, 1); - set_batch_network(&net, 1); - - int i; - cap_cv *cap = get_capture_video_stream("extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); - //CvCapture* cap = cvCaptureFromFile("extra/vid/ILSVRC2015/Data/VID/snippets/val/ILSVRC2015_val_00007030.mp4"); - float *feat; - float *next; - next = NULL; - image last; - for(i = 0; i < 25; ++i){ - image im = get_image_from_stream_cpp(cap); - image re = resize_image(im, extractor.w, extractor.h); - feat = network_predict(extractor, re.data); - if(i > 0){ - printf("%f %f\n", mean_array(feat, 14*14*512), variance_array(feat, 14*14*512)); - printf("%f %f\n", mean_array(next, 14*14*512), variance_array(next, 14*14*512)); - printf("%f\n", mse_array(feat, 14*14*512)); - axpy_cpu(14*14*512, -1, feat, 1, next, 1); - printf("%f\n", mse_array(next, 14*14*512)); - } - next = network_predict(net, feat); - - free_image(im); - - free_image(save_reconstruction(extractor, 0, feat, "feat", i)); - free_image(save_reconstruction(extractor, 0, next, "next", i)); - if (i==24) last = copy_image(re); - free_image(re); - } - for(i = 0; i < 30; ++i){ - next = network_predict(net, next); - image newimage = save_reconstruction(extractor, &last, next, "newimage", i); - free_image(last); - last = newimage; - } -} - -void run_vid_rnn(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - //char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "train")) train_vid_rnn(cfg, weights); - else if(0==strcmp(argv[2], "generate")) generate_vid_rnn(cfg, weights); -} -#else -void run_vid_rnn(int argc, char **argv){} -#endif diff --git a/src/Detector/darknet/src/route_layer.c b/src/Detector/darknet/src/route_layer.c deleted file mode 100644 index 2e0699d59..000000000 --- a/src/Detector/darknet/src/route_layer.c +++ /dev/null @@ -1,153 +0,0 @@ -#include "route_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include - -route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes, int groups, int group_id) -{ - fprintf(stderr,"route "); - route_layer l = { (LAYER_TYPE)0 }; - l.type = ROUTE; - l.batch = batch; - l.n = n; - l.input_layers = input_layers; - l.input_sizes = input_sizes; - l.groups = groups; - l.group_id = group_id; - int i; - int outputs = 0; - for(i = 0; i < n; ++i){ - fprintf(stderr," %d", input_layers[i]); - outputs += input_sizes[i]; - } - outputs = outputs / groups; - l.outputs = outputs; - l.inputs = outputs; - //fprintf(stderr, " inputs = %d \t outputs = %d, groups = %d, group_id = %d \n", l.inputs, l.outputs, l.groups, l.group_id); - l.delta = (float*)xcalloc(outputs * batch, sizeof(float)); - l.output = (float*)xcalloc(outputs * batch, sizeof(float)); - - l.forward = forward_route_layer; - l.backward = backward_route_layer; - #ifdef GPU - l.forward_gpu = forward_route_layer_gpu; - l.backward_gpu = backward_route_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, outputs*batch); - l.output_gpu = cuda_make_array(l.output, outputs*batch); - #endif - return l; -} - -void resize_route_layer(route_layer *l, network *net) -{ - int i; - layer first = net->layers[l->input_layers[0]]; - l->out_w = first.out_w; - l->out_h = first.out_h; - l->out_c = first.out_c; - l->outputs = first.outputs; - l->input_sizes[0] = first.outputs; - for(i = 1; i < l->n; ++i){ - int index = l->input_layers[i]; - layer next = net->layers[index]; - l->outputs += next.outputs; - l->input_sizes[i] = next.outputs; - if(next.out_w == first.out_w && next.out_h == first.out_h){ - l->out_c += next.out_c; - }else{ - printf("Error: Different size of input layers: %d x %d, %d x %d\n", next.out_w, next.out_h, first.out_w, first.out_h); - l->out_h = l->out_w = l->out_c = 0; - exit(EXIT_FAILURE); - } - } - l->out_c = l->out_c / l->groups; - l->outputs = l->outputs / l->groups; - l->inputs = l->outputs; - l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); -#endif - -} - -void forward_route_layer(const route_layer l, network_state state) -{ - int i, j; - int offset = 0; - for(i = 0; i < l.n; ++i){ - int index = l.input_layers[i]; - float *input = state.net.layers[index].output; - int input_size = l.input_sizes[i]; - int part_input_size = input_size / l.groups; - for(j = 0; j < l.batch; ++j){ - //copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); - copy_cpu(part_input_size, input + j*input_size + part_input_size*l.group_id, 1, l.output + offset + j*l.outputs, 1); - } - //offset += input_size; - offset += part_input_size; - } -} - -void backward_route_layer(const route_layer l, network_state state) -{ - int i, j; - int offset = 0; - for(i = 0; i < l.n; ++i){ - int index = l.input_layers[i]; - float *delta = state.net.layers[index].delta; - int input_size = l.input_sizes[i]; - int part_input_size = input_size / l.groups; - for(j = 0; j < l.batch; ++j){ - //axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); - axpy_cpu(part_input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size + part_input_size*l.group_id, 1); - } - //offset += input_size; - offset += part_input_size; - } -} - -#ifdef GPU -void forward_route_layer_gpu(const route_layer l, network_state state) -{ - int i, j; - int offset = 0; - for(i = 0; i < l.n; ++i){ - int index = l.input_layers[i]; - float *input = state.net.layers[index].output_gpu; - int input_size = l.input_sizes[i]; - int part_input_size = input_size / l.groups; - for(j = 0; j < l.batch; ++j){ - //copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); - //simple_copy_ongpu(input_size, input + j*input_size, l.output_gpu + offset + j*l.outputs); - simple_copy_ongpu(part_input_size, input + j*input_size + part_input_size*l.group_id, l.output_gpu + offset + j*l.outputs); - } - //offset += input_size; - offset += part_input_size; - } -} - -void backward_route_layer_gpu(const route_layer l, network_state state) -{ - int i, j; - int offset = 0; - for(i = 0; i < l.n; ++i){ - int index = l.input_layers[i]; - float *delta = state.net.layers[index].delta_gpu; - int input_size = l.input_sizes[i]; - int part_input_size = input_size / l.groups; - for(j = 0; j < l.batch; ++j){ - //axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); - axpy_ongpu(part_input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size + part_input_size*l.group_id, 1); - } - //offset += input_size; - offset += part_input_size; - } -} -#endif diff --git a/src/Detector/darknet/src/route_layer.h b/src/Detector/darknet/src/route_layer.h deleted file mode 100644 index 2ebe39603..000000000 --- a/src/Detector/darknet/src/route_layer.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef ROUTE_LAYER_H -#define ROUTE_LAYER_H -#include "network.h" -#include "layer.h" - -typedef layer route_layer; - -#ifdef __cplusplus -extern "C" { -#endif -route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size, int groups, int group_id); -void forward_route_layer(const route_layer l, network_state state); -void backward_route_layer(const route_layer l, network_state state); -void resize_route_layer(route_layer *l, network *net); - -#ifdef GPU -void forward_route_layer_gpu(const route_layer l, network_state state); -void backward_route_layer_gpu(const route_layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/sam_layer.c b/src/Detector/darknet/src/sam_layer.c deleted file mode 100644 index ddb704656..000000000 --- a/src/Detector/darknet/src/sam_layer.c +++ /dev/null @@ -1,119 +0,0 @@ -#include "sam_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include -#include - -layer make_sam_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2) -{ - fprintf(stderr,"scale Layer: %d\n", index); - layer l = { (LAYER_TYPE)0 }; - l.type = SAM; - l.batch = batch; - l.w = w; - l.h = h; - l.c = c; - - l.out_w = w2; - l.out_h = h2; - l.out_c = c2; - assert(l.out_c == l.c); - assert(l.w == l.out_w && l.h == l.out_h); - - l.outputs = l.out_w*l.out_h*l.out_c; - l.inputs = l.outputs; - l.index = index; - - l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); - l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); - - l.forward = forward_sam_layer; - l.backward = backward_sam_layer; -#ifdef GPU - l.forward_gpu = forward_sam_layer_gpu; - l.backward_gpu = backward_sam_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); -#endif - return l; -} - -void resize_sam_layer(layer *l, int w, int h) -{ - l->out_w = w; - l->out_h = h; - l->outputs = l->out_w*l->out_h*l->out_c; - l->inputs = l->outputs; - l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); -#endif - -} - -void forward_sam_layer(const layer l, network_state state) -{ - int size = l.batch * l.out_c * l.out_w * l.out_h; - //int channel_size = 1; - float *from_output = state.net.layers[l.index].output; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - l.output[i] = state.input[i] * from_output[i]; - } - - activate_array(l.output, l.outputs*l.batch, l.activation); -} - -void backward_sam_layer(const layer l, network_state state) -{ - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - //axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); - //scale_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); - - int size = l.batch * l.out_c * l.out_w * l.out_h; - //int channel_size = 1; - float *from_output = state.net.layers[l.index].output; - float *from_delta = state.net.layers[l.index].delta; - - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - state.delta[i] += l.delta[i] * from_output[i]; // l.delta * from (should be divided by channel_size?) - - from_delta[i] = state.input[i] * l.delta[i]; // input * l.delta - } -} - -#ifdef GPU -void forward_sam_layer_gpu(const layer l, network_state state) -{ - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = 1; - - sam_gpu(state.net.layers[l.index].output_gpu, size, channel_size, state.input, l.output_gpu); - - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); -} - -void backward_sam_layer_gpu(const layer l, network_state state) -{ - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = 1; - float *from_output = state.net.layers[l.index].output_gpu; - float *from_delta = state.net.layers[l.index].delta_gpu; - - - backward_sam_gpu(l.delta_gpu, size, channel_size, state.input, from_delta, from_output, state.delta); -} -#endif diff --git a/src/Detector/darknet/src/sam_layer.h b/src/Detector/darknet/src/sam_layer.h deleted file mode 100644 index 0fa66fa26..000000000 --- a/src/Detector/darknet/src/sam_layer.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef SAM_CHANNELS_LAYER_H -#define SAM_CHANNELS_LAYER_H - -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_sam_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2); -void forward_sam_layer(const layer l, network_state state); -void backward_sam_layer(const layer l, network_state state); -void resize_sam_layer(layer *l, int w, int h); - -#ifdef GPU -void forward_sam_layer_gpu(const layer l, network_state state); -void backward_sam_layer_gpu(const layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif // SAM_CHANNELS_LAYER_H diff --git a/src/Detector/darknet/src/scale_channels_layer.c b/src/Detector/darknet/src/scale_channels_layer.c deleted file mode 100644 index c4f64105d..000000000 --- a/src/Detector/darknet/src/scale_channels_layer.c +++ /dev/null @@ -1,150 +0,0 @@ -#include "scale_channels_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include -#include - -layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int scale_wh) -{ - fprintf(stderr,"scale Layer: %d\n", index); - layer l = { (LAYER_TYPE)0 }; - l.type = SCALE_CHANNELS; - l.batch = batch; - l.scale_wh = scale_wh; - l.w = w; - l.h = h; - l.c = c; - if (!l.scale_wh) assert(w == 1 && h == 1); - else assert(c == 1); - - l.out_w = w2; - l.out_h = h2; - l.out_c = c2; - if (!l.scale_wh) assert(l.out_c == l.c); - else assert(l.out_w == l.w && l.out_h == l.h); - - l.outputs = l.out_w*l.out_h*l.out_c; - l.inputs = l.outputs; - l.index = index; - - l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); - l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); - - l.forward = forward_scale_channels_layer; - l.backward = backward_scale_channels_layer; -#ifdef GPU - l.forward_gpu = forward_scale_channels_layer_gpu; - l.backward_gpu = backward_scale_channels_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); -#endif - return l; -} - -void resize_scale_channels_layer(layer *l, network *net) -{ - layer first = net->layers[l->index]; - l->out_w = first.out_w; - l->out_h = first.out_h; - l->outputs = l->out_w*l->out_h*l->out_c; - l->inputs = l->outputs; - l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); -#endif - -} - -void forward_scale_channels_layer(const layer l, network_state state) -{ - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = l.out_w * l.out_h; - int batch_size = l.out_c * l.out_w * l.out_h; - float *from_output = state.net.layers[l.index].output; - - if (l.scale_wh) { - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int input_index = i % channel_size + (i / batch_size)*channel_size; - - l.output[i] = state.input[input_index] * from_output[i]; - } - } - else { - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - l.output[i] = state.input[i / channel_size] * from_output[i]; - } - } - - activate_array(l.output, l.outputs*l.batch, l.activation); -} - -void backward_scale_channels_layer(const layer l, network_state state) -{ - gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - //axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); - //scale_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); - - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = l.out_w * l.out_h; - int batch_size = l.out_c * l.out_w * l.out_h; - float *from_output = state.net.layers[l.index].output; - float *from_delta = state.net.layers[l.index].delta; - - if (l.scale_wh) { - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - int input_index = i % channel_size + (i / batch_size)*channel_size; - - state.delta[input_index] += l.delta[i] * from_output[i];// / l.out_c; // l.delta * from (should be divided by l.out_c?) - - from_delta[i] += state.input[input_index] * l.delta[i]; // input * l.delta - } - } - else { - int i; - #pragma omp parallel for - for (i = 0; i < size; ++i) { - state.delta[i / channel_size] += l.delta[i] * from_output[i];// / channel_size; // l.delta * from (should be divided by channel_size?) - - from_delta[i] += state.input[i / channel_size] * l.delta[i]; // input * l.delta - } - } -} - -#ifdef GPU -void forward_scale_channels_layer_gpu(const layer l, network_state state) -{ - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = l.out_w * l.out_h; - int batch_size = l.out_c * l.out_w * l.out_h; - - scale_channels_gpu(state.net.layers[l.index].output_gpu, size, channel_size, batch_size, l.scale_wh, state.input, l.output_gpu); - - activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); -} - -void backward_scale_channels_layer_gpu(const layer l, network_state state) -{ - gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - - int size = l.batch * l.out_c * l.out_w * l.out_h; - int channel_size = l.out_w * l.out_h; - int batch_size = l.out_c * l.out_w * l.out_h; - float *from_output = state.net.layers[l.index].output_gpu; - float *from_delta = state.net.layers[l.index].delta_gpu; - - backward_scale_channels_gpu(l.delta_gpu, size, channel_size, batch_size, l.scale_wh, state.input, from_delta, from_output, state.delta); -} -#endif diff --git a/src/Detector/darknet/src/scale_channels_layer.h b/src/Detector/darknet/src/scale_channels_layer.h deleted file mode 100644 index c8d51ddac..000000000 --- a/src/Detector/darknet/src/scale_channels_layer.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef SCALE_CHANNELS_LAYER_H -#define SCALE_CHANNELS_LAYER_H - -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_scale_channels_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2, int scale_wh); -void forward_scale_channels_layer(const layer l, network_state state); -void backward_scale_channels_layer(const layer l, network_state state); -void resize_scale_channels_layer(layer *l, network *net); - -#ifdef GPU -void forward_scale_channels_layer_gpu(const layer l, network_state state); -void backward_scale_channels_layer_gpu(const layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif // SCALE_CHANNELS_LAYER_H diff --git a/src/Detector/darknet/src/shortcut_layer.c b/src/Detector/darknet/src/shortcut_layer.c deleted file mode 100644 index 87f0d7e8d..000000000 --- a/src/Detector/darknet/src/shortcut_layer.c +++ /dev/null @@ -1,293 +0,0 @@ -#include "shortcut_layer.h" -#include "convolutional_layer.h" -#include "dark_cuda.h" -#include "blas.h" -#include "utils.h" -#include "gemm.h" -#include -#include - -layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c, - float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalization, - ACTIVATION activation, int train) -{ - fprintf(stderr, "Shortcut Layer: "); - int i; - for(i = 0; i < n; ++i) fprintf(stderr, "%d, ", input_layers[i]); - - layer l = { (LAYER_TYPE)0 }; - l.train = train; - l.type = SHORTCUT; - l.batch = batch; - l.activation = activation; - l.n = n; - l.input_layers = input_layers; - l.input_sizes = input_sizes; - l.layers_output = layers_output; - l.layers_delta = layers_delta; - l.weights_type = weights_type; - l.weights_normalization = weights_normalization; - l.learning_rate_scale = 1; // not necessary - - //l.w = w2; - //l.h = h2; - //l.c = c2; - l.w = l.out_w = w; - l.h = l.out_h = h; - l.c = l.out_c = c; - l.outputs = w*h*c; - l.inputs = l.outputs; - - //if(w != w2 || h != h2 || c != c2) fprintf(stderr, " w = %d, w2 = %d, h = %d, h2 = %d, c = %d, c2 = %d \n", w, w2, h, h2, c, c2); - - l.index = l.input_layers[0]; - - - if (train) l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); - l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); - - l.nweights = 0; - if (l.weights_type == PER_FEATURE) l.nweights = (l.n + 1); - else if (l.weights_type == PER_CHANNEL) l.nweights = (l.n + 1) * l.c; - - if (l.nweights > 0) { - l.weights = (float*)calloc(l.nweights, sizeof(float)); - float scale = sqrt(2. / l.nweights); - for (i = 0; i < l.nweights; ++i) l.weights[i] = 1;// +0.01*rand_uniform(-1, 1);// scale*rand_uniform(-1, 1); // rand_normal(); - - if (train) l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); - l.update = update_shortcut_layer; - } - - l.forward = forward_shortcut_layer; - l.backward = backward_shortcut_layer; -#ifndef GPU - if (l.activation == SWISH || l.activation == MISH) l.activation_input = (float*)calloc(l.batch*l.outputs, sizeof(float)); -#endif // GPU - -#ifdef GPU - if (l.activation == SWISH || l.activation == MISH) l.activation_input_gpu = cuda_make_array(l.activation_input, l.batch*l.outputs); - - l.forward_gpu = forward_shortcut_layer_gpu; - l.backward_gpu = backward_shortcut_layer_gpu; - - if (l.nweights > 0) { - l.update_gpu = update_shortcut_layer_gpu; - l.weights_gpu = cuda_make_array(l.weights, l.nweights); - if (train) l.weight_updates_gpu = cuda_make_array(l.weight_updates, l.nweights); - } - - if (train) l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); - - l.input_sizes_gpu = cuda_make_int_array_new_api(input_sizes, l.n); - l.layers_output_gpu = (float**)cuda_make_array_pointers((void**)layers_output_gpu, l.n); - l.layers_delta_gpu = (float**)cuda_make_array_pointers((void**)layers_delta_gpu, l.n); -#endif // GPU - - l.bflops = l.out_w * l.out_h * l.out_c * l.n / 1000000000.; - if (l.weights_type) l.bflops *= 2; - fprintf(stderr, " wt = %d, wn = %d, outputs:%4d x%4d x%4d %5.3f BF\n", l.weights_type, l.weights_normalization, l.out_w, l.out_h, l.out_c, l.bflops); - return l; -} - -void resize_shortcut_layer(layer *l, int w, int h, network *net) -{ - //assert(l->w == l->out_w); - //assert(l->h == l->out_h); - l->w = l->out_w = w; - l->h = l->out_h = h; - l->outputs = w*h*l->out_c; - l->inputs = l->outputs; - if (l->train) l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float)); - - int i; - for (i = 0; i < l->n; ++i) { - int index = l->input_layers[i]; - l->input_sizes[i] = net->layers[index].outputs; - l->layers_output[i] = net->layers[index].output; - l->layers_delta[i] = net->layers[index].delta; - - assert(l->w == net->layers[index].out_w && l->h == net->layers[index].out_h); - } - - if (l->activation == SWISH || l->activation == MISH) l->activation_input = (float*)realloc(l->activation_input, l->batch*l->outputs * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - - if (l->train) { - cuda_free(l->delta_gpu); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); - } - - float **layers_output_gpu = (float **)calloc(l->n, sizeof(float *)); - float **layers_delta_gpu = (float **)calloc(l->n, sizeof(float *)); - - for (i = 0; i < l->n; ++i) { - const int index = l->input_layers[i]; - layers_output_gpu[i] = net->layers[index].output_gpu; - layers_delta_gpu[i] = net->layers[index].delta_gpu; - } - - memcpy_ongpu(l->input_sizes_gpu, l->input_sizes, l->n * sizeof(int)); - memcpy_ongpu(l->layers_output_gpu, layers_output_gpu, l->n * sizeof(float*)); - memcpy_ongpu(l->layers_delta_gpu, layers_delta_gpu, l->n * sizeof(float*)); - - free(layers_output_gpu); - free(layers_delta_gpu); - - if (l->activation == SWISH || l->activation == MISH) { - cuda_free(l->activation_input_gpu); - l->activation_input_gpu = cuda_make_array(l->activation_input, l->batch*l->outputs); - } -#endif - -} - -void forward_shortcut_layer(const layer l, network_state state) -{ - int from_w = state.net.layers[l.index].w; - int from_h = state.net.layers[l.index].h; - int from_c = state.net.layers[l.index].c; - - if (l.nweights == 0 && l.n == 1 && from_w == l.w && from_h == l.h && from_c == l.c) { - int size = l.batch * l.w * l.h * l.c; - int i; - #pragma omp parallel for - for(i = 0; i < size; ++i) - l.output[i] = state.input[i] + state.net.layers[l.index].output[i]; - } - else { - shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes, l.layers_output, l.output, state.input, l.weights, l.nweights, l.weights_normalization); - } - - //copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); - //shortcut_cpu(l.batch, from_w, from_h, from_c, state.net.layers[l.index].output, l.out_w, l.out_h, l.out_c, l.output); - - //activate_array(l.output, l.outputs*l.batch, l.activation); - if (l.activation == SWISH) activate_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else if (l.activation == MISH) activate_array_mish(l.output, l.outputs*l.batch, l.activation_input, l.output); - else activate_array_cpu_custom(l.output, l.outputs*l.batch, l.activation); -} - -void backward_shortcut_layer(const layer l, network_state state) -{ - if (l.activation == SWISH) gradient_array_swish(l.output, l.outputs*l.batch, l.activation_input, l.delta); - else if (l.activation == MISH) gradient_array_mish(l.outputs*l.batch, l.activation_input, l.delta); - else gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta); - - backward_shortcut_multilayer_cpu(l.outputs * l.batch, l.outputs, l.batch, l.n, l.input_sizes, - l.layers_delta, state.delta, l.delta, l.weights, l.weight_updates, l.nweights, state.input, l.layers_output, l.weights_normalization); - - //axpy_cpu(l.outputs*l.batch, 1, l.delta, 1, state.delta, 1); - //shortcut_cpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta, l.w, l.h, l.c, state.net.layers[l.index].delta); -} - -void update_shortcut_layer(layer l, int batch, float learning_rate_init, float momentum, float decay) -{ - if (l.nweights > 0) { - float learning_rate = learning_rate_init*l.learning_rate_scale; - //float momentum = a.momentum; - //float decay = a.decay; - //int batch = a.batch; - - axpy_cpu(l.nweights, -decay*batch, l.weights, 1, l.weight_updates, 1); - axpy_cpu(l.nweights, learning_rate / batch, l.weight_updates, 1, l.weights, 1); - scal_cpu(l.nweights, momentum, l.weight_updates, 1); - } -} - -#ifdef GPU -void forward_shortcut_layer_gpu(const layer l, network_state state) -{ - //copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1); - //simple_copy_ongpu(l.outputs*l.batch, state.input, l.output_gpu); - //shortcut_gpu(l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); - - //input_shortcut_gpu(state.input, l.batch, l.w, l.h, l.c, state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); - - //----------- - //if (l.outputs == l.input_sizes[0]) - //if(l.n == 1 && l.nweights == 0) - //{ - // input_shortcut_gpu(state.input, l.batch, state.net.layers[l.index].w, state.net.layers[l.index].h, state.net.layers[l.index].c, - // state.net.layers[l.index].output_gpu, l.out_w, l.out_h, l.out_c, l.output_gpu); - //} - //else - { - shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_output_gpu, l.output_gpu, state.input, l.weights_gpu, l.nweights, l.weights_normalization); - } - - if (l.activation == SWISH) activate_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else if (l.activation == MISH) activate_array_mish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.output_gpu); - else activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); - -} - -void backward_shortcut_layer_gpu(const layer l, network_state state) -{ - if (l.activation == SWISH) gradient_array_swish_ongpu(l.output_gpu, l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else if (l.activation == MISH) gradient_array_mish_ongpu(l.outputs*l.batch, l.activation_input_gpu, l.delta_gpu); - else gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); - - backward_shortcut_multilayer_gpu(l.outputs, l.batch, l.n, l.input_sizes_gpu, l.layers_delta_gpu, state.delta, l.delta_gpu, - l.weights_gpu, l.weight_updates_gpu, l.nweights, state.input, l.layers_output_gpu, l.weights_normalization); - - //axpy_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1, state.delta, 1); - //shortcut_gpu(l.batch, l.out_w, l.out_h, l.out_c, l.delta_gpu, l.w, l.h, l.c, state.net.layers[l.index].delta_gpu); -} - -void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale) -{ - if (l.nweights > 0) { - float learning_rate = learning_rate_init*l.learning_rate_scale / loss_scale; - //float momentum = a.momentum; - //float decay = a.decay; - //int batch = a.batch; - - reset_nan_and_inf(l.weight_updates_gpu, l.nweights); - fix_nan_and_inf(l.weights_gpu, l.nweights); - - //constrain_weight_updates_ongpu(l.nweights, 1, l.weights_gpu, l.weight_updates_gpu); - constrain_ongpu(l.nweights, 1, l.weight_updates_gpu, 1); - - /* - cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); - cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); - CHECK_CUDA(cudaStreamSynchronize(get_cuda_stream())); - for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]); - printf(" l.nweights = %d - updates \n", l.nweights); - for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); - printf(" l.nweights = %d \n\n", l.nweights); - */ - - //axpy_ongpu(l.nweights, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1); - axpy_ongpu(l.nweights, learning_rate / batch, l.weight_updates_gpu, 1, l.weights_gpu, 1); - scal_ongpu(l.nweights, momentum, l.weight_updates_gpu, 1); - - //fill_ongpu(l.nweights, 0, l.weight_updates_gpu, 1); - - //if (l.clip) { - // constrain_ongpu(l.nweights, l.clip, l.weights_gpu, 1); - //} - } -} - -void pull_shortcut_layer(layer l) -{ - constrain_ongpu(l.nweights, 1, l.weight_updates_gpu, 1); - cuda_pull_array_async(l.weight_updates_gpu, l.weight_updates, l.nweights); - cuda_pull_array_async(l.weights_gpu, l.weights, l.nweights); - CHECK_CUDA(cudaPeekAtLastError()); - CHECK_CUDA(cudaStreamSynchronize(get_cuda_stream())); -} - -void push_shortcut_layer(layer l) -{ - cuda_push_array(l.weights_gpu, l.weights, l.nweights); - CHECK_CUDA(cudaPeekAtLastError()); -} -#endif diff --git a/src/Detector/darknet/src/shortcut_layer.h b/src/Detector/darknet/src/shortcut_layer.h deleted file mode 100644 index 89321370a..000000000 --- a/src/Detector/darknet/src/shortcut_layer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef SHORTCUT_LAYER_H -#define SHORTCUT_LAYER_H - -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, int w, int h, int c, - float **layers_output, float **layers_delta, float **layers_output_gpu, float **layers_delta_gpu, WEIGHTS_TYPE_T weights_type, WEIGHTS_NORMALIZATION_T weights_normalization, - ACTIVATION activation, int train); -void forward_shortcut_layer(const layer l, network_state state); -void backward_shortcut_layer(const layer l, network_state state); -void update_shortcut_layer(layer l, int batch, float learning_rate_init, float momentum, float decay); -void resize_shortcut_layer(layer *l, int w, int h, network *net); - -#ifdef GPU -void forward_shortcut_layer_gpu(const layer l, network_state state); -void backward_shortcut_layer_gpu(const layer l, network_state state); -void update_shortcut_layer_gpu(layer l, int batch, float learning_rate_init, float momentum, float decay, float loss_scale); -void pull_shortcut_layer(layer l); -void push_shortcut_layer(layer l); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/softmax_layer.c b/src/Detector/darknet/src/softmax_layer.c deleted file mode 100644 index 59f51117e..000000000 --- a/src/Detector/darknet/src/softmax_layer.c +++ /dev/null @@ -1,622 +0,0 @@ -#include "softmax_layer.h" -#include "blas.h" -#include "dark_cuda.h" -#include "utils.h" -#include "blas.h" - -#include -#include -#include -#include -#include - -#define SECRET_NUM -1234 - -void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output) -{ - int b; - for (b = 0; b < batch; ++b) { - int i; - int count = 0; - for (i = 0; i < hierarchy->groups; ++i) { - int group_size = hierarchy->group_size[i]; - softmax(input + b*inputs + count, group_size, temp, output + b*inputs + count, 1); - count += group_size; - } - } -} - -softmax_layer make_softmax_layer(int batch, int inputs, int groups) -{ - assert(inputs%groups == 0); - fprintf(stderr, "softmax %4d\n", inputs); - softmax_layer l = { (LAYER_TYPE)0 }; - l.type = SOFTMAX; - l.batch = batch; - l.groups = groups; - l.inputs = inputs; - l.outputs = inputs; - l.loss = (float*)xcalloc(inputs * batch, sizeof(float)); - l.output = (float*)xcalloc(inputs * batch, sizeof(float)); - l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); - l.cost = (float*)xcalloc(1, sizeof(float)); - - l.forward = forward_softmax_layer; - l.backward = backward_softmax_layer; -#ifdef GPU - l.forward_gpu = forward_softmax_layer_gpu; - l.backward_gpu = backward_softmax_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, inputs*batch); - l.loss_gpu = cuda_make_array(l.loss, inputs*batch); - l.delta_gpu = cuda_make_array(l.delta, inputs*batch); -#endif - return l; -} - -void forward_softmax_layer(const softmax_layer l, network_state net) -{ - if(l.softmax_tree){ - int i; - int count = 0; - for (i = 0; i < l.softmax_tree->groups; ++i) { - int group_size = l.softmax_tree->group_size[i]; - softmax_cpu(net.input + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output + count); - count += group_size; - } - } else { - softmax_cpu(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output); - } - - if(net.truth && !l.noloss){ - softmax_x_ent_cpu(l.batch*l.inputs, l.output, net.truth, l.delta, l.loss); - l.cost[0] = sum_array(l.loss, l.batch*l.inputs); - } -} - -void backward_softmax_layer(const softmax_layer l, network_state net) -{ - axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, net.delta, 1); -} - -#ifdef GPU - -void pull_softmax_layer_output(const softmax_layer layer) -{ - cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); -} - -void forward_softmax_layer_gpu(const softmax_layer l, network_state net) -{ - if(l.softmax_tree){ - softmax_tree_gpu(net.input, 1, l.batch, l.inputs, l.temperature, l.output_gpu, *l.softmax_tree); - /* - int i; - int count = 0; - for (i = 0; i < l.softmax_tree->groups; ++i) { - int group_size = l.softmax_tree->group_size[i]; - softmax_gpu(net.input_gpu + count, group_size, l.batch, l.inputs, 1, 0, 1, l.temperature, l.output_gpu + count); - count += group_size; - } - */ - } else { - if(l.spatial){ - softmax_gpu_new_api(net.input, l.c, l.batch*l.c, l.inputs/l.c, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu); - }else{ - softmax_gpu_new_api(net.input, l.inputs/l.groups, l.batch, l.inputs, l.groups, l.inputs/l.groups, 1, l.temperature, l.output_gpu); - } - } - if(net.truth && !l.noloss){ - softmax_x_ent_gpu(l.batch*l.inputs, l.output_gpu, net.truth, l.delta_gpu, l.loss_gpu); - if(l.softmax_tree){ - mask_gpu_new_api(l.batch*l.inputs, l.delta_gpu, SECRET_NUM, net.truth, 0); - mask_gpu_new_api(l.batch*l.inputs, l.loss_gpu, SECRET_NUM, net.truth, 0); - } - cuda_pull_array(l.loss_gpu, l.loss, l.batch*l.inputs); - l.cost[0] = sum_array(l.loss, l.batch*l.inputs); - } -} - -void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) -{ - axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1); -} - -#endif - -// ------------------------------------- - -// Supervised Contrastive Learning: https://arxiv.org/pdf/2004.11362.pdf -contrastive_layer make_contrastive_layer(int batch, int w, int h, int c, int classes, int inputs, layer *yolo_layer) -{ - contrastive_layer l = { (LAYER_TYPE)0 }; - l.type = CONTRASTIVE; - l.batch = batch; - l.inputs = inputs; - l.w = w; - l.h = h; - l.c = c; - l.temperature = 1; - - l.max_boxes = 0; - if (yolo_layer) { - l.detection = 1; - l.max_boxes = yolo_layer->max_boxes; - l.labels = yolo_layer->labels; // track id - l.class_ids = yolo_layer->class_ids; // class_ids - l.n = yolo_layer->n; // num of embeddings per cell = num of anchors - l.classes = yolo_layer->classes;// num of classes - classes = l.classes; - l.embedding_size = l.inputs / (l.n*l.h*l.w); - l.truths = yolo_layer->truths; - if (l.embedding_size != yolo_layer->embedding_size) { - printf(" Error: [contrastive] embedding_size=%d isn't equal to [yolo] embedding_size=%d. They should use the same [convolutional] layer \n", l.embedding_size, yolo_layer->embedding_size); - getchar(); - exit(0); - } - if (l.inputs % (l.n*l.h*l.w) != 0) { - printf(" Warning: filters= number in the previous (embedding) layer isn't divisable by number of anchors %d \n", l.n); - getchar(); - } - } - else { - l.detection = 0; - l.labels = (int*)xcalloc(l.batch, sizeof(int)); // labels - l.n = 1; // num of embeddings per cell - l.classes = classes; // num of classes - l.embedding_size = l.c; - } - l.outputs = inputs; - - l.loss = (float*)xcalloc(1, sizeof(float)); - l.output = (float*)xcalloc(inputs * batch, sizeof(float)); - l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); - l.cost = (float*)xcalloc(1, sizeof(float)); - - const size_t step = l.batch*l.n*l.h*l.w; - l.cos_sim = NULL; - l.exp_cos_sim = NULL; - l.p_constrastive = NULL; - if (!l.detection) { - l.cos_sim = (float*)xcalloc(step*step, sizeof(float)); - l.exp_cos_sim = (float*)xcalloc(step*step, sizeof(float)); - l.p_constrastive = (float*)xcalloc(step*step, sizeof(float)); - } - //l.p_constrastive = (float*)xcalloc(step*step, sizeof(float)); - //l.contrast_p_size = (int*)xcalloc(1, sizeof(int)); - //*l.contrast_p_size = step; - //l.contrast_p = (contrastive_params*)xcalloc(*l.contrast_p_size, sizeof(contrastive_params)); - - l.forward = forward_contrastive_layer; - l.backward = backward_contrastive_layer; -#ifdef GPU - l.forward_gpu = forward_contrastive_layer_gpu; - l.backward_gpu = backward_contrastive_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, inputs*batch); - l.delta_gpu = cuda_make_array(l.delta, inputs*batch); - - const int max_contr_size = (l.max_boxes*l.batch)*(l.max_boxes*l.batch) * sizeof(contrastive_params)/4; - printf(" max_contr_size = %d MB \n", max_contr_size / (1024*1024)); - l.contrast_p_gpu = (contrastive_params *)cuda_make_array(NULL, max_contr_size); -#endif - fprintf(stderr, "contrastive %4d x%4d x%4d x emb_size %4d x batch: %4d classes = %4d, step = %4d \n", w, h, l.n, l.embedding_size, batch, l.classes, step); - if(l.detection) fprintf(stderr, "detection \n"); - return l; -} - -static inline float clip_value(float val, const float max_val) -{ - if (val > max_val) { - //printf("\n val = %f > max_val = %f \n", val, max_val); - val = max_val; - } - else if (val < -max_val) { - //printf("\n val = %f < -max_val = %f \n", val, -max_val); - val = -max_val; - } - return val; -} - -void forward_contrastive_layer(contrastive_layer l, network_state state) -{ - if (!state.train) return; - const float truth_thresh = state.net.label_smooth_eps; - - const int mini_batch = l.batch / l.steps; - - int b, n, w, h; - fill_cpu(l.batch*l.inputs, 0, l.delta, 1); - - if (!l.detection) { - - for (b = 0; b < l.batch; ++b) { - if (state.net.adversarial) l.labels[b] = b % 2; - else l.labels[b] = b / 2; - } - - // set labels - for (b = 0; b < l.batch; ++b) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - // find truth with max prob (only 1 label even if mosaic is used) - float max_truth = 0; - int n; - for (n = 0; n < l.classes; ++n) { - const float truth_prob = state.truth[b*l.classes + n]; - //printf(" truth_prob = %f, ", truth_prob); - //if (truth_prob > max_truth) - if (truth_prob > truth_thresh) - { - //printf(" truth_prob = %f, max_truth = %f, n = %d; ", truth_prob, max_truth, n); - max_truth = truth_prob; - l.labels[b] = n; - } - } - //printf(", l.labels[b] = %d ", l.labels[b]); - } - } - } - - } - //printf("\n\n"); - - // set pointers to features - float **z = (float**)xcalloc(l.batch*l.n*l.h*l.w, sizeof(float*)); - - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - const int z_index = b*l.n*l.h*l.w + n*l.h*l.w + h*l.w + w; - if (l.labels[z_index] < 0) continue; - - //const int input_index = b*l.inputs + n*l.embedding_size*l.h*l.w + h*l.w + w; - //float *ptr = state.input + input_index; - //z[z_index] = ptr; - - z[z_index] = (float*)xcalloc(l.embedding_size, sizeof(float)); - get_embedding(state.input, l.w, l.h, l.c, l.embedding_size, w, h, n, b, z[z_index]); - } - } - } - } - - int b2, n2, h2, w2; - int contrast_p_index = 0; - - const size_t step = l.batch*l.n*l.h*l.w; - size_t contrast_p_size = step; - if (!l.detection) contrast_p_size = l.batch*l.batch; - contrastive_params *contrast_p = (contrastive_params*)xcalloc(contrast_p_size, sizeof(contrastive_params)); - - float *max_sim_same = (float *)xcalloc(l.batch*l.inputs, sizeof(float)); - float *max_sim_diff = (float *)xcalloc(l.batch*l.inputs, sizeof(float)); - fill_cpu(l.batch*l.inputs, -10, max_sim_same, 1); - fill_cpu(l.batch*l.inputs, -10, max_sim_diff, 1); - - // precalculate cosine similiraty - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - const int z_index = b*l.n*l.h*l.w + n*l.h*l.w + h*l.w + w; - if (l.labels[z_index] < 0) continue; - - for (b2 = 0; b2 < l.batch; ++b2) { - for (n2 = 0; n2 < l.n; ++n2) { - for (h2 = 0; h2 < l.h; ++h2) { - for (w2 = 0; w2 < l.w; ++w2) - { - const int z_index2 = b2*l.n*l.h*l.w + n2*l.h*l.w + h2*l.w + w2; - if (l.labels[z_index2] < 0) continue; - if (z_index == z_index2) continue; - if (l.detection) - if (l.class_ids[z_index] != l.class_ids[z_index2]) continue; - - const int time_step_i = b / mini_batch; - const int time_step_j = b2 / mini_batch; - if (time_step_i != time_step_j) continue; - - const size_t step = l.batch*l.n*l.h*l.w; - - const float sim = cosine_similarity(z[z_index], z[z_index2], l.embedding_size); - const float exp_sim = expf(sim / l.temperature); - if (!l.detection) { - l.cos_sim[z_index*step + z_index2] = sim; - l.exp_cos_sim[z_index*step + z_index2] = exp_sim; - } - - // calc good sim - if (l.labels[z_index] == l.labels[z_index2] && max_sim_same[z_index] < sim) max_sim_same[z_index] = sim; - if (l.labels[z_index] != l.labels[z_index2] && max_sim_diff[z_index] < sim) max_sim_diff[z_index] = sim; - //printf(" z_i = %d, z_i2 = %d, l = %d, l2 = %d, sim = %f \n", z_index, z_index2, l.labels[z_index], l.labels[z_index2], sim); - - contrast_p[contrast_p_index].sim = sim; - contrast_p[contrast_p_index].exp_sim = exp_sim; - contrast_p[contrast_p_index].i = z_index; - contrast_p[contrast_p_index].j = z_index2; - contrast_p[contrast_p_index].time_step_i = time_step_i; - contrast_p[contrast_p_index].time_step_j = time_step_j; - contrast_p_index++; - //printf(" contrast_p_index = %d, contrast_p_size = %d \n", contrast_p_index, contrast_p_size); - if ((contrast_p_index+1) >= contrast_p_size) { - contrast_p_size = contrast_p_index + 1; - //printf(" contrast_p_size = %d, z_index = %d, z_index2 = %d \n", contrast_p_size, z_index, z_index2); - contrast_p = (contrastive_params*)xrealloc(contrast_p, contrast_p_size * sizeof(contrastive_params)); - } - - if (sim > 1.001 || sim < -1.001) { - printf(" sim = %f, ", sim); getchar(); - } - } - } - } - } - } - } - } - } - - // calc contrastive accuracy - int i; - int good_sims = 0, all_sims = 0, same_sim = 0, diff_sim = 0; - for (i = 0; i < l.batch*l.inputs; ++i) { - if (max_sim_same[i] >= -1 && max_sim_diff[i] >= -1) { - if (max_sim_same[i] >= -1) same_sim++; - if (max_sim_diff[i] >= -1) diff_sim++; - ++all_sims; - //printf(" max_sim_diff[i] = %f, max_sim_same[i] = %f \n", max_sim_diff[i], max_sim_same[i]); - if (max_sim_diff[i] < max_sim_same[i]) good_sims++; - } - } - if (all_sims > 0) { - *l.loss = 100 * good_sims / all_sims; - } - else *l.loss = -1; - printf(" Contrast accuracy = %f %%, all = %d, good = %d, same = %d, diff = %d \n", *l.loss, all_sims, good_sims, same_sim, diff_sim); - free(max_sim_same); - free(max_sim_diff); - - - /* - // show near sim - float good_contrast = 0; - for (b = 0; b < l.batch; b += 2) { - float same = l.cos_sim[b*l.batch + b]; - float aug = l.cos_sim[b*l.batch + b + 1]; - float diff = l.cos_sim[b*l.batch + b + 2]; - good_contrast += (aug > diff); - //printf(" l.labels[b] = %d, l.labels[b+1] = %d, l.labels[b+2] = %d, b = %d \n", l.labels[b], l.labels[b + 1], l.labels[b + 2], b); - //printf(" same = %f, aug = %f, diff = %f, (aug > diff) = %d \n", same, aug, diff, (aug > diff)); - } - *l.loss = 100 * good_contrast / (l.batch / 2); - printf(" Contrast accuracy = %f %% \n", *l.loss); - */ - - /* - // precalculate P_contrastive - for (b = 0; b < l.batch; ++b) { - int b2; - for (b2 = 0; b2 < l.batch; ++b2) { - if (b != b2) { - const float P = P_constrastive(b, b2, l.labels, l.batch, z, l.embedding_size, l.temperature, l.cos_sim); - l.p_constrastive[b*l.batch + b2] = P; - if (P > 1 || P < -1) { - printf(" p = %f, ", P); getchar(); - } - } - } - } - */ - - - const size_t contr_size = contrast_p_index; - - if (l.detection) { -#ifdef GPU - const int max_contr_size = (l.max_boxes*l.batch)*(l.max_boxes*l.batch); - if (max_contr_size < contr_size) { - printf(" Error: too large number of bboxes: contr_size = %d > max_contr_size = %d \n", contr_size, max_contr_size); - exit(0); - } - int *labels = NULL; - if (contr_size > 2) { - cuda_push_array((float *)l.contrast_p_gpu, (float *)contrast_p, contr_size * sizeof(contrastive_params) / 4); - P_constrastive_f_det_gpu(labels, l.embedding_size, l.temperature, l.contrast_p_gpu, contr_size); - cuda_pull_array((float *)l.contrast_p_gpu, (float *)contrast_p, contr_size * sizeof(contrastive_params) / 4); - } -#else // GPU - int k; - //#pragma omp parallel for - for (k = 0; k < contr_size; ++k) { - contrast_p[k].P = P_constrastive_f_det(k, l.labels, z, l.embedding_size, l.temperature, contrast_p, contr_size); - } -#endif // GPU - } - else { - // precalculate P-contrastive - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - const int z_index = b*l.n*l.h*l.w + n*l.h*l.w + h*l.w + w; - if (l.labels[z_index] < 0) continue; - - for (b2 = 0; b2 < l.batch; ++b2) { - for (n2 = 0; n2 < l.n; ++n2) { - for (h2 = 0; h2 < l.h; ++h2) { - for (w2 = 0; w2 < l.w; ++w2) - { - const int z_index2 = b2*l.n*l.h*l.w + n2*l.h*l.w + h2*l.w + w2; - if (l.labels[z_index2] < 0) continue; - if (z_index == z_index2) continue; - if (l.detection) - if (l.class_ids[z_index] != l.class_ids[z_index2]) continue; - - const int time_step_i = b / mini_batch; - const int time_step_j = b2 / mini_batch; - if (time_step_i != time_step_j) continue; - - const size_t step = l.batch*l.n*l.h*l.w; - - float P = -10; - if (l.detection) { - P = P_constrastive_f(z_index, z_index2, l.labels, z, l.embedding_size, l.temperature, contrast_p, contr_size); - } - else { - P = P_constrastive(z_index, z_index2, l.labels, step, z, l.embedding_size, l.temperature, l.cos_sim, l.exp_cos_sim); - l.p_constrastive[z_index*step + z_index2] = P; - } - - int q; - for (q = 0; q < contr_size; ++q) - if (contrast_p[q].i == z_index && contrast_p[q].j == z_index2) { - contrast_p[q].P = P; - break; - } - - //if (q == contr_size) getchar(); - - - //if (P > 1 || P < -1) { - // printf(" p = %f, z_index = %d, z_index2 = %d ", P, z_index, z_index2); getchar(); - //} - } - } - } - } - } - } - } - } - } - - - // calc deltas - #pragma omp parallel for - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - const int z_index = b*l.n*l.h*l.w + n*l.h*l.w + h*l.w + w; - const size_t step = l.batch*l.n*l.h*l.w; - if (l.labels[z_index] < 0) continue; - - const int delta_index = b*l.embedding_size*l.n*l.h*l.w + n*l.embedding_size*l.h*l.w + h*l.w + w; - const int wh = l.w*l.h; - - if (l.detection) { - // detector - - // positive - grad_contrastive_loss_positive_f(z_index, l.class_ids, l.labels, step, z, l.embedding_size, l.temperature, l.delta + delta_index, wh, contrast_p, contr_size); - - // negative - grad_contrastive_loss_negative_f(z_index, l.class_ids, l.labels, step, z, l.embedding_size, l.temperature, l.delta + delta_index, wh, contrast_p, contr_size, l.contrastive_neg_max); - } - else { - // classifier - - // positive - grad_contrastive_loss_positive(z_index, l.labels, step, z, l.embedding_size, l.temperature, l.cos_sim, l.p_constrastive, l.delta + delta_index, wh); - - // negative - grad_contrastive_loss_negative(z_index, l.labels, step, z, l.embedding_size, l.temperature, l.cos_sim, l.p_constrastive, l.delta + delta_index, wh); - } - - } - } - } - } - - scal_cpu(l.inputs * l.batch, l.cls_normalizer, l.delta, 1); - - for (i = 0; i < l.inputs * l.batch; ++i) { - l.delta[i] = clip_value(l.delta[i], l.max_delta); - } - - *(l.cost) = pow(mag_array(l.delta, l.inputs * l.batch), 2); - if (state.net.adversarial) { - printf(" adversarial contrastive loss = %f \n\n", *(l.cost)); - } - else { - printf(" contrastive loss = %f \n\n", *(l.cost)); - } - - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - for (h = 0; h < l.h; ++h) { - for (w = 0; w < l.w; ++w) - { - const int z_index = b*l.n*l.h*l.w + n*l.h*l.w + h*l.w + w; - //if (l.labels[z_index] < 0) continue; - if (z[z_index]) free(z[z_index]); - } - } - } - } - - free(contrast_p); - free(z); -} - -void backward_contrastive_layer(contrastive_layer l, network_state state) -{ - axpy_cpu(l.inputs*l.batch, 1, l.delta, 1, state.delta, 1); -} - - -#ifdef GPU - -void pull_contrastive_layer_output(const contrastive_layer l) -{ - cuda_pull_array(l.output_gpu, l.output, l.inputs*l.batch); -} - -void push_contrastive_layer_output(const contrastive_layer l) -{ - cuda_push_array(l.delta_gpu, l.delta, l.inputs*l.batch); -} - - -void forward_contrastive_layer_gpu(contrastive_layer l, network_state state) -{ - simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu); - if (!state.train) return; - - float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float)); - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - memcpy(in_cpu, l.output, l.batch*l.outputs * sizeof(float)); - float *truth_cpu = 0; - if (state.truth) { - int num_truth = l.batch*l.classes; - if (l.detection) num_truth = l.batch*l.truths; - truth_cpu = (float *)xcalloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - network_state cpu_state = state; - cpu_state.net = state.net; - cpu_state.index = state.index; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - - forward_contrastive_layer(l, cpu_state); - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - - free(in_cpu); - if (cpu_state.truth) free(cpu_state.truth); -} - -void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state) -{ - axpy_ongpu(layer.batch*layer.inputs, state.net.loss_scale, layer.delta_gpu, 1, state.delta, 1); -} - -#endif \ No newline at end of file diff --git a/src/Detector/darknet/src/softmax_layer.h b/src/Detector/darknet/src/softmax_layer.h deleted file mode 100644 index c86997b3c..000000000 --- a/src/Detector/darknet/src/softmax_layer.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef SOFTMAX_LAYER_H -#define SOFTMAX_LAYER_H -#include "layer.h" -#include "network.h" - -typedef layer softmax_layer; -typedef layer contrastive_layer; - -#ifdef __cplusplus -extern "C" { -#endif -void softmax_array(float *input, int n, float temp, float *output); -softmax_layer make_softmax_layer(int batch, int inputs, int groups); -void forward_softmax_layer(const softmax_layer l, network_state state); -void backward_softmax_layer(const softmax_layer l, network_state state); - -#ifdef GPU -void pull_softmax_layer_output(const softmax_layer l); -void forward_softmax_layer_gpu(const softmax_layer l, network_state state); -void backward_softmax_layer_gpu(const softmax_layer l, network_state state); -#endif - -//----------------------- - -contrastive_layer make_contrastive_layer(int batch, int w, int h, int n, int classes, int inputs, layer *yolo_layer); -void forward_contrastive_layer(contrastive_layer l, network_state state); -void backward_contrastive_layer(contrastive_layer l, network_state net); - -#ifdef GPU -void pull_contrastive_layer_output(const contrastive_layer l); -void push_contrastive_layer_output(const contrastive_layer l); -void forward_contrastive_layer_gpu(contrastive_layer l, network_state state); -void backward_contrastive_layer_gpu(contrastive_layer layer, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/super.c b/src/Detector/darknet/src/super.c deleted file mode 100644 index 35e7f6cf8..000000000 --- a/src/Detector/darknet/src/super.c +++ /dev/null @@ -1,128 +0,0 @@ -#include "network.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" - - -void train_super(char *cfgfile, char *weightfile) -{ - char* train_images = "data/imagenet/imagenet1k.train.list"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - data train, buffer; - - - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.scale = 4; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.d = &buffer; - args.type = SUPER_DATA; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void test_super(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - - clock_t time; - char buff[256]; - char *input = buff; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input, 0, 0); - resize_network(&net, im.w, im.h); - printf("%d %d\n", im.w, im.h); - - float *X = im.data; - time=clock(); - network_predict(net, X); - image out = get_network_image(net); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - save_image(out, "out"); - - free_image(im); - if (filename) break; - } -} - - -void run_super(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5] : 0; - if(0==strcmp(argv[2], "train")) train_super(cfg, weights); - else if(0==strcmp(argv[2], "test")) test_super(cfg, weights, filename); - /* - else if(0==strcmp(argv[2], "valid")) validate_super(cfg, weights); - */ -} diff --git a/src/Detector/darknet/src/swag.c b/src/Detector/darknet/src/swag.c deleted file mode 100644 index 210f03f40..000000000 --- a/src/Detector/darknet/src/swag.c +++ /dev/null @@ -1,87 +0,0 @@ -#include "network.h" -#include "detection_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" - -void train_swag(char *cfgfile, char *weightfile) -{ - char *train_images = "data/voc.0712.trainval"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - data train, buffer; - - layer l = net.layers[net.n - 1]; - - int side = l.side; - int classes = l.classes; - float jitter = l.jitter; - - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.classes = classes; - args.jitter = jitter; - args.num_boxes = side; - args.d = &buffer; - args.type = REGION_DATA; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0 || i == 600){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void run_swag(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - if(0==strcmp(argv[2], "train")) train_swag(cfg, weights); -} diff --git a/src/Detector/darknet/src/tag.c b/src/Detector/darknet/src/tag.c deleted file mode 100644 index d7e1349a2..000000000 --- a/src/Detector/darknet/src/tag.c +++ /dev/null @@ -1,151 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" - -void train_tag(char *cfgfile, char *weightfile, int clear) -{ - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - char* backup_directory = "backup/"; - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - if (clear) { - *net.seen = 0; - *net.cur_iteration = 0; - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = 1024; - list* plist = get_paths("tag/train.list"); - char **paths = (char **)list_to_array(plist); - printf("%d\n", plist->size); - int N = plist->size; - clock_t time; - pthread_t load_thread; - data train; - data buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - - args.min = net.w; - args.max = net.max_crop; - args.size = net.w; - - args.paths = paths; - args.classes = net.outputs; - args.n = imgs; - args.m = N; - args.d = &buffer; - args.type = TAG_DATA; - - args.angle = net.angle; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - - fprintf(stderr, "%d classes\n", net.outputs); - - load_thread = load_data_in_thread(args); - int epoch = (*net.seen)/N; - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - - load_thread = load_data_in_thread(args); - printf("Loaded: %lf seconds\n", sec(clock()-time)); - time=clock(); - float loss = train_network(net, train); - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); - free_data(train); - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); - } - if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s.backup",backup_directory,base); - save_weights(net, buff); - } - } - char buff[256]; - sprintf(buff, "%s/%s.weights", backup_directory, base); - save_weights(net, buff); - - pthread_join(load_thread, 0); - free_data(buffer); - free_network(net); - free_ptrs((void**)paths, plist->size); - free_list(plist); - free(base); -} - -void test_tag(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - int i = 0; - char **names = get_labels("data/tags.txt"); - clock_t time; - int indexes[10]; - char buff[256]; - char *input = buff; - int size = net.w; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input, 0, 0); - image r = resize_min(im, size); - resize_network(&net, r.w, r.h); - printf("%d %d\n", r.w, r.h); - - float *X = r.data; - time=clock(); - float *predictions = network_predict(net, X); - top_predictions(net, 10, indexes); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - for(i = 0; i < 10; ++i){ - int index = indexes[i]; - printf("%.1f%%: %s\n", predictions[index]*100, names[index]); - } - if(r.data != im.data) free_image(r); - free_image(im); - if (filename) break; - } -} - - -void run_tag(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - int clear = find_arg(argc, argv, "-clear"); - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5] : 0; - if(0==strcmp(argv[2], "train")) train_tag(cfg, weights, clear); - else if(0==strcmp(argv[2], "test")) test_tag(cfg, weights, filename); -} diff --git a/src/Detector/darknet/src/tree.c b/src/Detector/darknet/src/tree.c deleted file mode 100644 index 8a2c23169..000000000 --- a/src/Detector/darknet/src/tree.c +++ /dev/null @@ -1,135 +0,0 @@ -#include -#include -#include "tree.h" -#include "utils.h" -#include "data.h" - -void change_leaves(tree *t, char *leaf_list) -{ - list *llist = get_paths(leaf_list); - char **leaves = (char **)list_to_array(llist); - int n = llist->size; - int i,j; - int found = 0; - for(i = 0; i < t->n; ++i){ - t->leaf[i] = 0; - for(j = 0; j < n; ++j){ - if (0==strcmp(t->name[i], leaves[j])){ - t->leaf[i] = 1; - ++found; - break; - } - } - } - fprintf(stderr, "Found %d leaves.\n", found); -} - -float get_hierarchy_probability(float *x, tree *hier, int c) -{ - float p = 1; - while(c >= 0){ - p = p * x[c]; - c = hier->parent[c]; - } - return p; -} - -void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves) -{ - int j; - for(j = 0; j < n; ++j){ - int parent = hier->parent[j]; - if(parent >= 0){ - predictions[j] *= predictions[parent]; - } - } - if(only_leaves){ - for(j = 0; j < n; ++j){ - if(!hier->leaf[j]) predictions[j] = 0; - } - } -} - -int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) -{ - float p = 1; - int group = 0; - int i; - while (1) { - float max = 0; - int max_i = 0; - - for (i = 0; i < hier->group_size[group]; ++i) { - int index = i + hier->group_offset[group]; - float val = predictions[(i + hier->group_offset[group])*stride]; - if (val > max) { - max_i = index; - max = val; - } - } - if (p*max > thresh) { - p = p*max; - group = hier->child[max_i]; - if (hier->child[max_i] < 0) return max_i; - } - else if (group == 0) { - return max_i; - } - else { - return hier->parent[hier->group_offset[group]]; - } - } - return 0; -} - -tree *read_tree(char *filename) -{ - tree t = {0}; - FILE *fp = fopen(filename, "r"); - - char *line; - int last_parent = -1; - int group_size = 0; - int groups = 0; - int n = 0; - while((line=fgetl(fp)) != 0){ - char* id = (char*)xcalloc(256, sizeof(char)); - int parent = -1; - sscanf(line, "%s %d", id, &parent); - t.parent = (int*)xrealloc(t.parent, (n + 1) * sizeof(int)); - t.parent[n] = parent; - - t.name = (char**)xrealloc(t.name, (n + 1) * sizeof(char*)); - t.name[n] = id; - if(parent != last_parent){ - ++groups; - t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int)); - t.group_offset[groups - 1] = n - group_size; - t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int)); - t.group_size[groups - 1] = group_size; - group_size = 0; - last_parent = parent; - } - t.group = (int*)xrealloc(t.group, (n + 1) * sizeof(int)); - t.group[n] = groups; - ++n; - ++group_size; - } - ++groups; - t.group_offset = (int*)xrealloc(t.group_offset, groups * sizeof(int)); - t.group_offset[groups - 1] = n - group_size; - t.group_size = (int*)xrealloc(t.group_size, groups * sizeof(int)); - t.group_size[groups - 1] = group_size; - t.n = n; - t.groups = groups; - t.leaf = (int*)xcalloc(n, sizeof(int)); - int i; - for(i = 0; i < n; ++i) t.leaf[i] = 1; - for(i = 0; i < n; ++i) if(t.parent[i] >= 0) t.leaf[t.parent[i]] = 0; - - fclose(fp); - tree* tree_ptr = (tree*)xcalloc(1, sizeof(tree)); - *tree_ptr = t; - //error(0); - return tree_ptr; -} diff --git a/src/Detector/darknet/src/tree.h b/src/Detector/darknet/src/tree.h deleted file mode 100644 index 863797387..000000000 --- a/src/Detector/darknet/src/tree.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef TREE_H -#define TREE_H -#include "darknet.h" - -//typedef struct{ -// int *leaf; -// int n; -// int *parent; -// int *child; -// int *group; -// char **name; -// -// int groups; -// int *group_size; -// int *group_offset; -//} tree; - -#ifdef __cplusplus -extern "C" { -#endif -//tree *read_tree(char *filename); -int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); -void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); -void change_leaves(tree *t, char *leaf_list); -float get_hierarchy_probability(float *x, tree *hier, int c); - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/upsample_layer.c b/src/Detector/darknet/src/upsample_layer.c deleted file mode 100644 index 778f5b4d5..000000000 --- a/src/Detector/darknet/src/upsample_layer.c +++ /dev/null @@ -1,107 +0,0 @@ -#include "upsample_layer.h" -#include "dark_cuda.h" -#include "utils.h" -#include "blas.h" - -#include - -layer make_upsample_layer(int batch, int w, int h, int c, int stride) -{ - layer l = { (LAYER_TYPE)0 }; - l.type = UPSAMPLE; - l.batch = batch; - l.w = w; - l.h = h; - l.c = c; - l.out_w = w*stride; - l.out_h = h*stride; - l.out_c = c; - if(stride < 0){ - stride = -stride; - l.reverse=1; - l.out_w = w/stride; - l.out_h = h/stride; - } - l.stride = stride; - l.outputs = l.out_w*l.out_h*l.out_c; - l.inputs = l.w*l.h*l.c; - l.delta = (float*)xcalloc(l.outputs * batch, sizeof(float)); - l.output = (float*)xcalloc(l.outputs * batch, sizeof(float)); - - l.forward = forward_upsample_layer; - l.backward = backward_upsample_layer; - #ifdef GPU - l.forward_gpu = forward_upsample_layer_gpu; - l.backward_gpu = backward_upsample_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); - #endif - if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); - else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); - return l; -} - -void resize_upsample_layer(layer *l, int w, int h) -{ - l->w = w; - l->h = h; - l->out_w = w*l->stride; - l->out_h = h*l->stride; - if(l->reverse){ - l->out_w = w/l->stride; - l->out_h = h/l->stride; - } - l->outputs = l->out_w*l->out_h*l->out_c; - l->inputs = l->h*l->w*l->c; - l->delta = (float*)xrealloc(l->delta, l->outputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, l->outputs * l->batch * sizeof(float)); - -#ifdef GPU - cuda_free(l->output_gpu); - cuda_free(l->delta_gpu); - l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); - l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); -#endif - -} - -void forward_upsample_layer(const layer l, network_state net) -{ - fill_cpu(l.outputs*l.batch, 0, l.output, 1); - if(l.reverse){ - upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); - }else{ - upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); - } -} - -void backward_upsample_layer(const layer l, network_state state) -{ - if(l.reverse){ - upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta); - }else{ - upsample_cpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); - } -} - -#ifdef GPU -void forward_upsample_layer_gpu(const layer l, network_state state) -{ - fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - if(l.reverse){ - upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, state.input); - }else{ - upsample_gpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); - } -} - -void backward_upsample_layer_gpu(const layer l, network_state state) -{ - if(l.reverse){ - upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta); - }else{ - upsample_gpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); - } -} -#endif diff --git a/src/Detector/darknet/src/upsample_layer.h b/src/Detector/darknet/src/upsample_layer.h deleted file mode 100644 index 4461cb15d..000000000 --- a/src/Detector/darknet/src/upsample_layer.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef UPSAMPLE_LAYER_H -#define UPSAMPLE_LAYER_H -#include "dark_cuda.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_upsample_layer(int batch, int w, int h, int c, int stride); -void forward_upsample_layer(const layer l, network_state state); -void backward_upsample_layer(const layer l, network_state state); -void resize_upsample_layer(layer *l, int w, int h); - -#ifdef GPU -void forward_upsample_layer_gpu(const layer l, network_state state); -void backward_upsample_layer_gpu(const layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/utils.c b/src/Detector/darknet/src/utils.c deleted file mode 100644 index fe5c20621..000000000 --- a/src/Detector/darknet/src/utils.c +++ /dev/null @@ -1,1049 +0,0 @@ -#ifndef _GNU_SOURCE -#define _GNU_SOURCE -#endif -#include "utils.h" -#include -#include -#include -#ifndef _USE_MATH_DEFINES -#define _USE_MATH_DEFINES -#endif -#include -#include -#include -#include -#include "darkunistd.h" -#ifdef WIN32 -#include "gettimeofday.h" -#else -#include -#include -#endif - - -#ifndef USE_CMAKE_LIBS -#pragma warning(disable: 4996) -#endif - -void *xmalloc(size_t size) { - void *ptr=malloc(size); - if(!ptr) { - malloc_error(); - } - return ptr; -} - -void *xcalloc(size_t nmemb, size_t size) { - void *ptr=calloc(nmemb,size); - if(!ptr) { - calloc_error(); - } - return ptr; -} - -void *xrealloc(void *ptr, size_t size) { - ptr=realloc(ptr,size); - if(!ptr) { - realloc_error(); - } - return ptr; -} - -double what_time_is_it_now() -{ - struct timeval time; - if (gettimeofday(&time, NULL)) { - return 0; - } - return (double)time.tv_sec + (double)time.tv_usec * .000001; -} - -int *read_map(char *filename) -{ - int n = 0; - int *map = 0; - char *str; - FILE *file = fopen(filename, "r"); - if(!file) file_error(filename); - while((str=fgetl(file))){ - ++n; - map = (int*)xrealloc(map, n * sizeof(int)); - map[n-1] = atoi(str); - free(str); - } - if (file) fclose(file); - return map; -} - -void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections) -{ - size_t i; - for(i = 0; i < sections; ++i){ - size_t start = n*i/sections; - size_t end = n*(i+1)/sections; - size_t num = end-start; - shuffle((char*)arr+(start*size), num, size); - } -} - -void shuffle(void *arr, size_t n, size_t size) -{ - size_t i; - void* swp = (void*)xcalloc(1, size); - for(i = 0; i < n-1; ++i){ - size_t j = i + random_gen()/(RAND_MAX / (n-i)+1); - memcpy(swp, (char*)arr+(j*size), size); - memcpy((char*)arr+(j*size), (char*)arr+(i*size), size); - memcpy((char*)arr+(i*size), swp, size); - } - free(swp); -} - -void del_arg(int argc, char **argv, int index) -{ - int i; - for(i = index; i < argc-1; ++i) argv[i] = argv[i+1]; - argv[i] = 0; -} - -int find_arg(int argc, char* argv[], char *arg) -{ - int i; - for(i = 0; i < argc; ++i) { - if(!argv[i]) continue; - if(0==strcmp(argv[i], arg)) { - del_arg(argc, argv, i); - return 1; - } - } - return 0; -} - -int find_int_arg(int argc, char **argv, char *arg, int def) -{ - int i; - for(i = 0; i < argc-1; ++i){ - if(!argv[i]) continue; - if(0==strcmp(argv[i], arg)){ - def = atoi(argv[i+1]); - del_arg(argc, argv, i); - del_arg(argc, argv, i); - break; - } - } - return def; -} - -float find_float_arg(int argc, char **argv, char *arg, float def) -{ - int i; - for(i = 0; i < argc-1; ++i){ - if(!argv[i]) continue; - if(0==strcmp(argv[i], arg)){ - def = atof(argv[i+1]); - del_arg(argc, argv, i); - del_arg(argc, argv, i); - break; - } - } - return def; -} - -char *find_char_arg(int argc, char **argv, char *arg, char *def) -{ - int i; - for(i = 0; i < argc-1; ++i){ - if(!argv[i]) continue; - if(0==strcmp(argv[i], arg)){ - def = argv[i+1]; - del_arg(argc, argv, i); - del_arg(argc, argv, i); - break; - } - } - return def; -} - - -char *basecfg(char *cfgfile) -{ - char *c = cfgfile; - char *next; - while((next = strchr(c, '/'))) - { - c = next+1; - } - if(!next) while ((next = strchr(c, '\\'))) { c = next + 1; } - c = copy_string(c); - next = strchr(c, '.'); - if (next) *next = 0; - return c; -} - -int alphanum_to_int(char c) -{ - return (c < 58) ? c - 48 : c-87; -} -char int_to_alphanum(int i) -{ - if (i == 36) return '.'; - return (i < 10) ? i + 48 : i + 87; -} - -void pm(int M, int N, float *A) -{ - int i,j; - for(i =0 ; i < M; ++i){ - printf("%d ", i+1); - for(j = 0; j < N; ++j){ - printf("%2.4f, ", A[i*N+j]); - } - printf("\n"); - } - printf("\n"); -} - -void find_replace(const char* str, char* orig, char* rep, char* output) -{ - char* buffer = (char*)calloc(8192, sizeof(char)); - char *p; - - sprintf(buffer, "%s", str); - if (!(p = strstr(buffer, orig))) { // Is 'orig' even in 'str'? - sprintf(output, "%s", buffer); - free(buffer); - return; - } - - *p = '\0'; - - sprintf(output, "%s%s%s", buffer, rep, p + strlen(orig)); - free(buffer); -} - -void trim(char *str) -{ - char* buffer = (char*)xcalloc(8192, sizeof(char)); - sprintf(buffer, "%s", str); - - char *p = buffer; - while (*p == ' ' || *p == '\t') ++p; - - char *end = p + strlen(p) - 1; - while (*end == ' ' || *end == '\t') { - *end = '\0'; - --end; - } - sprintf(str, "%s", p); - - free(buffer); -} - -void find_replace_extension(char *str, char *orig, char *rep, char *output) -{ - char* buffer = (char*)calloc(8192, sizeof(char)); - - sprintf(buffer, "%s", str); - char *p = strstr(buffer, orig); - int offset = (p - buffer); - int chars_from_end = strlen(buffer) - offset; - if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? - sprintf(output, "%s", buffer); - free(buffer); - return; - } - - *p = '\0'; - sprintf(output, "%s%s%s", buffer, rep, p + strlen(orig)); - free(buffer); -} - -void replace_image_to_label(const char* input_path, char* output_path) -{ - find_replace(input_path, "/images/train2017/", "/labels/train2017/", output_path); // COCO - find_replace(output_path, "/images/val2017/", "/labels/val2017/", output_path); // COCO - find_replace(output_path, "/JPEGImages/", "/labels/", output_path); // PascalVOC - find_replace(output_path, "\\images\\train2017\\", "\\labels\\train2017\\", output_path); // COCO - find_replace(output_path, "\\images\\val2017\\", "\\labels\\val2017\\", output_path); // COCO - - find_replace(output_path, "\\images\\train2014\\", "\\labels\\train2014\\", output_path); // COCO - find_replace(output_path, "\\images\\val2014\\", "\\labels\\val2014\\", output_path); // COCO - find_replace(output_path, "/images/train2014/", "/labels/train2014/", output_path); // COCO - find_replace(output_path, "/images/val2014/", "/labels/val2014/", output_path); // COCO - - find_replace(output_path, "\\JPEGImages\\", "\\labels\\", output_path); // PascalVOC - //find_replace(output_path, "/images/", "/labels/", output_path); // COCO - //find_replace(output_path, "/VOC2007/JPEGImages/", "/VOC2007/labels/", output_path); // PascalVOC - //find_replace(output_path, "/VOC2012/JPEGImages/", "/VOC2012/labels/", output_path); // PascalVOC - - //find_replace(output_path, "/raw/", "/labels/", output_path); - trim(output_path); - - // replace only ext of files - find_replace_extension(output_path, ".jpg", ".txt", output_path); - find_replace_extension(output_path, ".JPG", ".txt", output_path); // error - find_replace_extension(output_path, ".jpeg", ".txt", output_path); - find_replace_extension(output_path, ".JPEG", ".txt", output_path); - find_replace_extension(output_path, ".png", ".txt", output_path); - find_replace_extension(output_path, ".PNG", ".txt", output_path); - find_replace_extension(output_path, ".bmp", ".txt", output_path); - find_replace_extension(output_path, ".BMP", ".txt", output_path); - find_replace_extension(output_path, ".ppm", ".txt", output_path); - find_replace_extension(output_path, ".PPM", ".txt", output_path); - find_replace_extension(output_path, ".tiff", ".txt", output_path); - find_replace_extension(output_path, ".TIFF", ".txt", output_path); - - // Check file ends with txt: - if(strlen(output_path) > 4) { - char *output_path_ext = output_path + strlen(output_path) - 4; - if( strcmp(".txt", output_path_ext) != 0){ - fprintf(stderr, "Failed to infer label file name (check image extension is supported): %s \n", output_path); - } - }else{ - fprintf(stderr, "Label file name is too short: %s \n", output_path); - } -} - -float sec(clock_t clocks) -{ - return (float)clocks/CLOCKS_PER_SEC; -} - -void top_k(float *a, int n, int k, int *index) -{ - int i,j; - for(j = 0; j < k; ++j) index[j] = -1; - for(i = 0; i < n; ++i){ - int curr = i; - for(j = 0; j < k; ++j){ - if((index[j] < 0) || a[curr] > a[index[j]]){ - int swap = curr; - curr = index[j]; - index[j] = swap; - } - } - } -} - -void error(const char *s) -{ - perror(s); - assert(0); - exit(EXIT_FAILURE); -} - -void malloc_error() -{ - fprintf(stderr, "xMalloc error - possibly out of CPU RAM \n"); - exit(EXIT_FAILURE); -} - -void calloc_error() -{ - fprintf(stderr, "Calloc error - possibly out of CPU RAM \n"); - exit(EXIT_FAILURE); -} - -void realloc_error() -{ - fprintf(stderr, "Realloc error - possibly out of CPU RAM \n"); - exit(EXIT_FAILURE); -} - -void file_error(char *s) -{ - fprintf(stderr, "Couldn't open file: %s\n", s); - exit(EXIT_FAILURE); -} - -list *split_str(char *s, char delim) -{ - size_t i; - size_t len = strlen(s); - list *l = make_list(); - list_insert(l, s); - for(i = 0; i < len; ++i){ - if(s[i] == delim){ - s[i] = '\0'; - list_insert(l, &(s[i+1])); - } - } - return l; -} - -void strip(char *s) -{ - size_t i; - size_t len = strlen(s); - size_t offset = 0; - for(i = 0; i < len; ++i){ - char c = s[i]; - if(c==' '||c=='\t'||c=='\n'||c =='\r'||c==0x0d||c==0x0a) ++offset; - else s[i-offset] = c; - } - s[len-offset] = '\0'; -} - - -void strip_args(char *s) -{ - size_t i; - size_t len = strlen(s); - size_t offset = 0; - for (i = 0; i < len; ++i) { - char c = s[i]; - if (c == '\t' || c == '\n' || c == '\r' || c == 0x0d || c == 0x0a) ++offset; - else s[i - offset] = c; - } - s[len - offset] = '\0'; -} - -void strip_char(char *s, char bad) -{ - size_t i; - size_t len = strlen(s); - size_t offset = 0; - for(i = 0; i < len; ++i){ - char c = s[i]; - if(c==bad) ++offset; - else s[i-offset] = c; - } - s[len-offset] = '\0'; -} - -void free_ptrs(void **ptrs, int n) -{ - int i; - for(i = 0; i < n; ++i) free(ptrs[i]); - free(ptrs); -} - -char *fgetl(FILE *fp) -{ - if(feof(fp)) return 0; - size_t size = 512; - char* line = (char*)xmalloc(size * sizeof(char)); - if(!fgets(line, size, fp)){ - free(line); - return 0; - } - - size_t curr = strlen(line); - - while((line[curr-1] != '\n') && !feof(fp)){ - if(curr == size-1){ - size *= 2; - line = (char*)xrealloc(line, size * sizeof(char)); - } - size_t readsize = size-curr; - if(readsize > INT_MAX) readsize = INT_MAX-1; - fgets(&line[curr], readsize, fp); - curr = strlen(line); - } - if(curr >= 2) - if(line[curr-2] == 0x0d) line[curr-2] = 0x00; - - if(curr >= 1) - if(line[curr-1] == 0x0a) line[curr-1] = 0x00; - - return line; -} - -int read_int(int fd) -{ - int n = 0; - int next = read(fd, &n, sizeof(int)); - if(next <= 0) return -1; - return n; -} - -void write_int(int fd, int n) -{ - int next = write(fd, &n, sizeof(int)); - if(next <= 0) error("read failed"); -} - -int read_all_fail(int fd, char *buffer, size_t bytes) -{ - size_t n = 0; - while(n < bytes){ - int next = read(fd, buffer + n, bytes-n); - if(next <= 0) return 1; - n += next; - } - return 0; -} - -int write_all_fail(int fd, char *buffer, size_t bytes) -{ - size_t n = 0; - while(n < bytes){ - size_t next = write(fd, buffer + n, bytes-n); - if(next <= 0) return 1; - n += next; - } - return 0; -} - -void read_all(int fd, char *buffer, size_t bytes) -{ - size_t n = 0; - while(n < bytes){ - int next = read(fd, buffer + n, bytes-n); - if(next <= 0) error("read failed"); - n += next; - } -} - -void write_all(int fd, char *buffer, size_t bytes) -{ - size_t n = 0; - while(n < bytes){ - size_t next = write(fd, buffer + n, bytes-n); - if(next <= 0) error("write failed"); - n += next; - } -} - - -char *copy_string(char *s) -{ - if(!s) { - return NULL; - } - char* copy = (char*)xmalloc(strlen(s) + 1); - strncpy(copy, s, strlen(s)+1); - return copy; -} - -list *parse_csv_line(char *line) -{ - list *l = make_list(); - char *c, *p; - int in = 0; - for(c = line, p = line; *c != '\0'; ++c){ - if(*c == '"') in = !in; - else if(*c == ',' && !in){ - *c = '\0'; - list_insert(l, copy_string(p)); - p = c+1; - } - } - list_insert(l, copy_string(p)); - return l; -} - -int count_fields(char *line) -{ - int count = 0; - int done = 0; - char *c; - for(c = line; !done; ++c){ - done = (*c == '\0'); - if(*c == ',' || done) ++count; - } - return count; -} - -float *parse_fields(char *line, int n) -{ - float* field = (float*)xcalloc(n, sizeof(float)); - char *c, *p, *end; - int count = 0; - int done = 0; - for(c = line, p = line; !done; ++c){ - done = (*c == '\0'); - if(*c == ',' || done){ - *c = '\0'; - field[count] = strtod(p, &end); - if(p == c) field[count] = nan(""); - if(end != c && (end != c-1 || *end != '\r')) field[count] = nan(""); //DOS file formats! - p = c+1; - ++count; - } - } - return field; -} - -float sum_array(float *a, int n) -{ - int i; - float sum = 0; - for(i = 0; i < n; ++i) sum += a[i]; - return sum; -} - -float mean_array(float *a, int n) -{ - return sum_array(a,n)/n; -} - -void mean_arrays(float **a, int n, int els, float *avg) -{ - int i; - int j; - memset(avg, 0, els*sizeof(float)); - for(j = 0; j < n; ++j){ - for(i = 0; i < els; ++i){ - avg[i] += a[j][i]; - } - } - for(i = 0; i < els; ++i){ - avg[i] /= n; - } -} - -void print_statistics(float *a, int n) -{ - float m = mean_array(a, n); - float v = variance_array(a, n); - printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); -} - -float variance_array(float *a, int n) -{ - int i; - float sum = 0; - float mean = mean_array(a, n); - for(i = 0; i < n; ++i) sum += (a[i] - mean)*(a[i]-mean); - float variance = sum/n; - return variance; -} - -int constrain_int(int a, int min, int max) -{ - if (a < min) return min; - if (a > max) return max; - return a; -} - -float constrain(float min, float max, float a) -{ - if (a < min) return min; - if (a > max) return max; - return a; -} - -float dist_array(float *a, float *b, int n, int sub) -{ - int i; - float sum = 0; - for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2); - return sqrt(sum); -} - -float mse_array(float *a, int n) -{ - int i; - float sum = 0; - for(i = 0; i < n; ++i) sum += a[i]*a[i]; - return sqrt(sum/n); -} - -void normalize_array(float *a, int n) -{ - int i; - float mu = mean_array(a,n); - float sigma = sqrt(variance_array(a,n)); - for(i = 0; i < n; ++i){ - a[i] = (a[i] - mu)/sigma; - } - //mu = mean_array(a,n); - //sigma = sqrt(variance_array(a,n)); -} - -void translate_array(float *a, int n, float s) -{ - int i; - for(i = 0; i < n; ++i){ - a[i] += s; - } -} - -float mag_array(float *a, int n) -{ - int i; - float sum = 0; - for(i = 0; i < n; ++i){ - sum += a[i]*a[i]; - } - return sqrt(sum); -} - -// indicies to skip is a bit array -float mag_array_skip(float *a, int n, int * indices_to_skip) -{ - int i; - float sum = 0; - for (i = 0; i < n; ++i) { - if (indices_to_skip[i] != 1) { - sum += a[i] * a[i]; - } - } - return sqrt(sum); -} - -void scale_array(float *a, int n, float s) -{ - int i; - for(i = 0; i < n; ++i){ - a[i] *= s; - } -} - -int sample_array(float *a, int n) -{ - float sum = sum_array(a, n); - scale_array(a, n, 1. / sum); - float r = rand_uniform(0, 1); - int i; - for (i = 0; i < n; ++i) { - r = r - a[i]; - if (r <= 0) return i; - } - return n - 1; -} - -int sample_array_custom(float *a, int n) -{ - float sum = sum_array(a, n); - scale_array(a, n, 1./sum); - float r = rand_uniform(0, 1); - int start_index = rand_int(0, 0); - int i; - for(i = 0; i < n; ++i){ - r = r - a[(i + start_index) % n]; - if (r <= 0) return i; - } - return n-1; -} - -int max_index(float *a, int n) -{ - if(n <= 0) return -1; - int i, max_i = 0; - float max = a[0]; - for(i = 1; i < n; ++i){ - if(a[i] > max){ - max = a[i]; - max_i = i; - } - } - return max_i; -} - -int top_max_index(float *a, int n, int k) -{ - if (n <= 0) return -1; - float *values = (float*)xcalloc(k, sizeof(float)); - int *indexes = (int*)xcalloc(k, sizeof(int)); - int i, j; - for (i = 0; i < n; ++i) { - for (j = 0; j < k; ++j) { - if (a[i] > values[j]) { - values[j] = a[i]; - indexes[j] = i; - break; - } - } - } - int count = 0; - for (j = 0; j < k; ++j) if (values[j] > 0) count++; - int get_index = rand_int(0, count-1); - int val = indexes[get_index]; - free(indexes); - free(values); - return val; -} - - -int int_index(int *a, int val, int n) -{ - int i; - for (i = 0; i < n; ++i) { - if (a[i] == val) return i; - } - return -1; -} - -int rand_int(int min, int max) -{ - if (max < min){ - int s = min; - min = max; - max = s; - } - int r = (random_gen()%(max - min + 1)) + min; - return r; -} - -// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform -float rand_normal() -{ - static int haveSpare = 0; - static double rand1, rand2; - - if(haveSpare) - { - haveSpare = 0; - return sqrt(rand1) * sin(rand2); - } - - haveSpare = 1; - - rand1 = random_gen() / ((double) RAND_MAX); - if(rand1 < 1e-100) rand1 = 1e-100; - rand1 = -2 * log(rand1); - rand2 = (random_gen() / ((double)RAND_MAX)) * 2.0 * M_PI; - - return sqrt(rand1) * cos(rand2); -} - -/* - float rand_normal() - { - int n = 12; - int i; - float sum= 0; - for(i = 0; i < n; ++i) sum += (float)random_gen()/RAND_MAX; - return sum-n/2.; - } - */ - -size_t rand_size_t() -{ - return ((size_t)(random_gen()&0xff) << 56) | - ((size_t)(random_gen()&0xff) << 48) | - ((size_t)(random_gen()&0xff) << 40) | - ((size_t)(random_gen()&0xff) << 32) | - ((size_t)(random_gen()&0xff) << 24) | - ((size_t)(random_gen()&0xff) << 16) | - ((size_t)(random_gen()&0xff) << 8) | - ((size_t)(random_gen()&0xff) << 0); -} - -float rand_uniform(float min, float max) -{ - if(max < min){ - float swap = min; - min = max; - max = swap; - } - -#if (RAND_MAX < 65536) - int rnd = rand()*(RAND_MAX + 1) + rand(); - return ((float)rnd / (RAND_MAX*RAND_MAX) * (max - min)) + min; -#else - return ((float)rand() / RAND_MAX * (max - min)) + min; -#endif - //return (random_float() * (max - min)) + min; -} - -float rand_scale(float s) -{ - float scale = rand_uniform_strong(1, s); - if(random_gen()%2) return scale; - return 1./scale; -} - -float **one_hot_encode(float *a, int n, int k) -{ - int i; - float** t = (float**)xcalloc(n, sizeof(float*)); - for(i = 0; i < n; ++i){ - t[i] = (float*)xcalloc(k, sizeof(float)); - int index = (int)a[i]; - t[i][index] = 1; - } - return t; -} - -static unsigned int x = 123456789, y = 362436069, z = 521288629; - -// Marsaglia's xorshf96 generator: period 2^96-1 -unsigned int random_gen_fast(void) -{ - unsigned int t; - x ^= x << 16; - x ^= x >> 5; - x ^= x << 1; - - t = x; - x = y; - y = z; - z = t ^ x ^ y; - - return z; -} - -float random_float_fast() -{ - return ((float)random_gen_fast() / (float)UINT_MAX); -} - -int rand_int_fast(int min, int max) -{ - if (max < min) { - int s = min; - min = max; - max = s; - } - int r = (random_gen_fast() % (max - min + 1)) + min; - return r; -} - -unsigned int random_gen() -{ - unsigned int rnd = 0; -#ifdef WIN32 - rand_s(&rnd); -#else // WIN32 - rnd = rand(); -#if (RAND_MAX < 65536) - rnd = rand()*(RAND_MAX + 1) + rnd; -#endif //(RAND_MAX < 65536) -#endif // WIN32 - return rnd; -} - -float random_float() -{ - unsigned int rnd = 0; -#ifdef WIN32 - rand_s(&rnd); - return ((float)rnd / (float)UINT_MAX); -#else // WIN32 - - rnd = rand(); -#if (RAND_MAX < 65536) - rnd = rand()*(RAND_MAX + 1) + rnd; - return((float)rnd / (float)(RAND_MAX*RAND_MAX)); -#endif //(RAND_MAX < 65536) - return ((float)rnd / (float)RAND_MAX); - -#endif // WIN32 -} - -float rand_uniform_strong(float min, float max) -{ - if (max < min) { - float swap = min; - min = max; - max = swap; - } - return (random_float() * (max - min)) + min; -} - -float rand_precalc_random(float min, float max, float random_part) -{ - if (max < min) { - float swap = min; - min = max; - max = swap; - } - return (random_part * (max - min)) + min; -} - -#define RS_SCALE (1.0 / (1.0 + RAND_MAX)) - -double double_rand(void) -{ - double d; - do { - d = (((rand() * RS_SCALE) + rand()) * RS_SCALE + rand()) * RS_SCALE; - } while (d >= 1); // Round off - return d; -} - -unsigned int uint_rand(unsigned int less_than) -{ - return (unsigned int)((less_than)* double_rand()); -} - -int check_array_is_nan(float *arr, int size) -{ - int i; - for (i = 0; i < size; ++i) { - if (isnan(arr[i])) return 1; - } - return 0; -} - -int check_array_is_inf(float *arr, int size) -{ - int i; - for (i = 0; i < size; ++i) { - if (isinf(arr[i])) return 1; - } - return 0; -} - -int *random_index_order(int min, int max) -{ - int *inds = (int *)xcalloc(max - min, sizeof(int)); - int i; - for (i = min; i < max; ++i) { - inds[i - min] = i; - } - for (i = min; i < max - 1; ++i) { - int swap = inds[i - min]; - int index = i + rand() % (max - i); - inds[i - min] = inds[index - min]; - inds[index - min] = swap; - } - return inds; -} - -int max_int_index(int *a, int n) -{ - if (n <= 0) return -1; - int i, max_i = 0; - int max = a[0]; - for (i = 1; i < n; ++i) { - if (a[i] > max) { - max = a[i]; - max_i = i; - } - } - return max_i; -} - - -// Absolute box from relative coordinate bounding box and image size -boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check) -{ - boxabs ba; - ba.left = (b->x - b->w / 2.)*img_w; - ba.right = (b->x + b->w / 2.)*img_w; - ba.top = (b->y - b->h / 2.)*img_h; - ba.bot = (b->y + b->h / 2.)*img_h; - - if (bounds_check) { - if (ba.left < 0) ba.left = 0; - if (ba.right > img_w - 1) ba.right = img_w - 1; - if (ba.top < 0) ba.top = 0; - if (ba.bot > img_h - 1) ba.bot = img_h - 1; - } - - return ba; -} - -int make_directory(char *path, int mode) -{ -#ifdef WIN32 - return _mkdir(path); -#else - return mkdir(path, mode); -#endif -} - -unsigned long custom_hash(char *str) -{ - unsigned long hash = 5381; - int c; - - while (c = *str++) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - - return hash; -} diff --git a/src/Detector/darknet/src/utils.h b/src/Detector/darknet/src/utils.h deleted file mode 100644 index 9a154ea62..000000000 --- a/src/Detector/darknet/src/utils.h +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef UTILS_H -#define UTILS_H -#include "darknet.h" -#include "list.h" - -#include -#include - -#ifndef M_PI -#define M_PI 3.14159265358979323846 // pi -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -LIB_API void free_ptrs(void **ptrs, int n); -LIB_API void top_k(float *a, int n, int k, int *index); - -void *xmalloc(size_t size); -void *xcalloc(size_t nmemb, size_t size); -void *xrealloc(void *ptr, size_t size); -double what_time_is_it_now(); -int *read_map(char *filename); -void shuffle(void *arr, size_t n, size_t size); -void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections); -char *basecfg(char *cfgfile); -int alphanum_to_int(char c); -char int_to_alphanum(int i); -int read_int(int fd); -void write_int(int fd, int n); -void read_all(int fd, char *buffer, size_t bytes); -void write_all(int fd, char *buffer, size_t bytes); -int read_all_fail(int fd, char *buffer, size_t bytes); -int write_all_fail(int fd, char *buffer, size_t bytes); -LIB_API void find_replace(const char* str, char* orig, char* rep, char* output); -void replace_image_to_label(const char* input_path, char* output_path); -void error(const char *s); -void malloc_error(); -void calloc_error(); -void realloc_error(); -void file_error(char *s); -void strip(char *s); -void strip_args(char *s); -void strip_char(char *s, char bad); -list *split_str(char *s, char delim); -char *fgetl(FILE *fp); -list *parse_csv_line(char *line); -char *copy_string(char *s); -int count_fields(char *line); -float *parse_fields(char *line, int n); -void normalize_array(float *a, int n); -void scale_array(float *a, int n, float s); -void translate_array(float *a, int n, float s); -int max_index(float *a, int n); -int top_max_index(float *a, int n, int k); -float constrain(float min, float max, float a); -int constrain_int(int a, int min, int max); -float mse_array(float *a, int n); -float rand_normal(); -size_t rand_size_t(); -float rand_uniform(float min, float max); -float rand_scale(float s); -int rand_int(int min, int max); -float sum_array(float *a, int n); -float mean_array(float *a, int n); -void mean_arrays(float **a, int n, int els, float *avg); -float variance_array(float *a, int n); -float mag_array(float *a, int n); -float mag_array_skip(float *a, int n, int * indices_to_skip); -float dist_array(float *a, float *b, int n, int sub); -float **one_hot_encode(float *a, int n, int k); -float sec(clock_t clocks); -int find_int_arg(int argc, char **argv, char *arg, int def); -float find_float_arg(int argc, char **argv, char *arg, float def); -int find_arg(int argc, char* argv[], char *arg); -char *find_char_arg(int argc, char **argv, char *arg, char *def); -int sample_array(float *a, int n); -int sample_array_custom(float *a, int n); -void print_statistics(float *a, int n); -unsigned int random_gen_fast(void); -float random_float_fast(); -int rand_int_fast(int min, int max); -unsigned int random_gen(); -float random_float(); -float rand_uniform_strong(float min, float max); -float rand_precalc_random(float min, float max, float random_part); -double double_rand(void); -unsigned int uint_rand(unsigned int less_than); -int check_array_is_nan(float *arr, int size); -int check_array_is_inf(float *arr, int size); -int int_index(int *a, int val, int n); -int *random_index_order(int min, int max); -int max_int_index(int *a, int n); -boxabs box_to_boxabs(const box* b, const int img_w, const int img_h, const int bounds_check); -int make_directory(char *path, int mode); -unsigned long custom_hash(char *str); - -#define max_val_cmp(a,b) (((a) > (b)) ? (a) : (b)) -#define min_val_cmp(a,b) (((a) < (b)) ? (a) : (b)) - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/Detector/darknet/src/version.h b/src/Detector/darknet/src/version.h deleted file mode 100644 index 7b103078d..000000000 --- a/src/Detector/darknet/src/version.h +++ /dev/null @@ -1,3 +0,0 @@ -#define MAJOR_VERSION 0 -#define MINOR_VERSION 2 -#define PATCH_VERSION 5 diff --git a/src/Detector/darknet/src/version.h.in b/src/Detector/darknet/src/version.h.in deleted file mode 100644 index e90811907..000000000 --- a/src/Detector/darknet/src/version.h.in +++ /dev/null @@ -1,3 +0,0 @@ -#define MAJOR_VERSION @Darknet_MAJOR_VERSION@ -#define MINOR_VERSION @Darknet_MINOR_VERSION@ -#define PATCH_VERSION @Darknet_PATCH_VERSION@ diff --git a/src/Detector/darknet/src/voxel.c b/src/Detector/darknet/src/voxel.c deleted file mode 100644 index 9f50112be..000000000 --- a/src/Detector/darknet/src/voxel.c +++ /dev/null @@ -1,164 +0,0 @@ -#include "network.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" - -void extract_voxel(char *lfile, char *rfile, char *prefix) -{ -#ifdef OPENCV - int w = 1920; - int h = 1080; - int shift = 0; - int count = 0; - cap_cv *lcap = get_capture_video_stream(lfile); - cap_cv *rcap = get_capture_video_stream(rfile); - while(1){ - image l = get_image_from_stream_cpp(lcap); - image r = get_image_from_stream_cpp(rcap); - if(!l.w || !r.w) break; - if(count%100 == 0) { - shift = best_3d_shift_r(l, r, -l.h/100, l.h/100); - printf("%d\n", shift); - } - image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); - image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); - char buff[256]; - sprintf(buff, "%s_%05d_l", prefix, count); - save_image(ls, buff); - sprintf(buff, "%s_%05d_r", prefix, count); - save_image(rs, buff); - free_image(l); - free_image(r); - free_image(ls); - free_image(rs); - ++count; - } - -#else - printf("need OpenCV for extraction\n"); -#endif -} - -void train_voxel(char *cfgfile, char *weightfile) -{ - char* train_images = "data/imagenet/imagenet1k.train.list"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - data train, buffer; - - - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.scale = 4; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.d = &buffer; - args.type = SUPER_DATA; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void test_voxel(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - - clock_t time; - char buff[256]; - char *input = buff; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input, 0, 0); - resize_network(&net, im.w, im.h); - printf("%d %d\n", im.w, im.h); - - float *X = im.data; - time=clock(); - network_predict(net, X); - image out = get_network_image(net); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - save_image(out, "out"); - - free_image(im); - if (filename) break; - } -} - - -void run_voxel(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5] : 0; - if(0==strcmp(argv[2], "train")) train_voxel(cfg, weights); - else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename); - else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]); - /* - else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights); - */ -} diff --git a/src/Detector/darknet/src/writing.c b/src/Detector/darknet/src/writing.c deleted file mode 100644 index 29785b7b7..000000000 --- a/src/Detector/darknet/src/writing.c +++ /dev/null @@ -1,144 +0,0 @@ -#include "network.h" -#include "utils.h" -#include "parser.h" - -void train_writing(char *cfgfile, char *weightfile) -{ - char* backup_directory = "backup/"; - srand(time(0)); - float avg_loss = -1; - char *base = basecfg(cfgfile); - printf("%s\n", base); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - list *plist = get_paths("figures.list"); - char **paths = (char **)list_to_array(plist); - clock_t time; - int N = plist->size; - printf("N: %d\n", N); - image out = get_network_image(net); - - data train, buffer; - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.out_w = out.w; - args.out_h = out.h; - args.paths = paths; - args.n = imgs; - args.m = N; - args.d = &buffer; - args.type = WRITING_DATA; - - pthread_t load_thread = load_data_in_thread(args); - int epoch = (*net.seen)/N; - while(get_current_batch(net) < net.max_batches || net.max_batches == 0){ - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - printf("Loaded %lf seconds\n",sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - - /* - image pred = float_to_image(64, 64, 1, out); - print_image(pred); - */ - - /* - image im = float_to_image(256, 256, 3, train.X.vals[0]); - image lab = float_to_image(64, 64, 1, train.y.vals[0]); - image pred = float_to_image(64, 64, 1, out); - show_image(im, "image"); - show_image(lab, "label"); - print_image(lab); - show_image(pred, "pred"); - cvWaitKey(0); - */ - - if(avg_loss == -1) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen); - free_data(train); - if(get_current_batch(net)%100 == 0){ - char buff[256]; - sprintf(buff, "%s/%s_batch_%d.weights", backup_directory, base, get_current_batch(net)); - save_weights(net, buff); - } - if(*net.seen/N > epoch){ - epoch = *net.seen/N; - char buff[256]; - sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch); - save_weights(net, buff); - } - } -} - -void test_writing(char *cfgfile, char *weightfile, char *filename) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - srand(2222222); - clock_t time; - char buff[256]; - char *input = buff; - while(1){ - if(filename){ - strncpy(input, filename, 256); - }else{ - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - - image im = load_image_color(input, 0, 0); - resize_network(&net, im.w, im.h); - printf("%d %d %d\n", im.h, im.w, im.c); - float *X = im.data; - time=clock(); - network_predict(net, X); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - image pred = get_network_image(net); - - image upsampled = resize_image(pred, im.w, im.h); - image thresh = threshold_image(upsampled, .5); - pred = thresh; - - show_image(pred, "prediction"); - show_image(im, "orig"); - - wait_until_press_key_cv(); - destroy_all_windows_cv(); - - free_image(upsampled); - free_image(thresh); - free_image(im); - if (filename) break; - } -} - -void run_writing(int argc, char **argv) -{ - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5] : 0; - if(0==strcmp(argv[2], "train")) train_writing(cfg, weights); - else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename); -} diff --git a/src/Detector/darknet/src/yolo.c b/src/Detector/darknet/src/yolo.c deleted file mode 100644 index 384c48729..000000000 --- a/src/Detector/darknet/src/yolo.c +++ /dev/null @@ -1,368 +0,0 @@ -#include "network.h" -#include "detection_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" -#include "demo.h" - -char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; - -void train_yolo(char *cfgfile, char *weightfile) -{ - char* train_images = "data/voc/train.txt"; - char* backup_directory = "backup/"; - srand(time(0)); - char *base = basecfg(cfgfile); - printf("%s\n", base); - float avg_loss = -1; - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - int imgs = net.batch*net.subdivisions; - int i = *net.seen/imgs; - data train, buffer; - - - layer l = net.layers[net.n - 1]; - - int side = l.side; - int classes = l.classes; - float jitter = l.jitter; - - list *plist = get_paths(train_images); - //int N = plist->size; - char **paths = (char **)list_to_array(plist); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.paths = paths; - args.n = imgs; - args.m = plist->size; - args.classes = classes; - args.jitter = jitter; - args.num_boxes = side; - args.d = &buffer; - args.type = REGION_DATA; - - args.angle = net.angle; - args.exposure = net.exposure; - args.saturation = net.saturation; - args.hue = net.hue; - - pthread_t load_thread = load_data_in_thread(args); - clock_t time; - //while(i*imgs < N*120){ - while(get_current_batch(net) < net.max_batches){ - i += 1; - time=clock(); - pthread_join(load_thread, 0); - train = buffer; - load_thread = load_data_in_thread(args); - - printf("Loaded: %lf seconds\n", sec(clock()-time)); - - time=clock(); - float loss = train_network(net, train); - if (avg_loss < 0) avg_loss = loss; - avg_loss = avg_loss*.9 + loss*.1; - - printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); - if(i%1000==0 || (i < 1000 && i%100 == 0)){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); - save_weights(net, buff); - } - free_data(train); - } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); - save_weights(net, buff); -} - -void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h) -{ - int i, j; - for(i = 0; i < total; ++i){ - float xmin = boxes[i].x - boxes[i].w/2.; - float xmax = boxes[i].x + boxes[i].w/2.; - float ymin = boxes[i].y - boxes[i].h/2.; - float ymax = boxes[i].y + boxes[i].h/2.; - - if (xmin < 0) xmin = 0; - if (ymin < 0) ymin = 0; - if (xmax > w) xmax = w; - if (ymax > h) ymax = h; - - for(j = 0; j < classes; ++j){ - if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j], - xmin, ymin, xmax, ymax); - } - } -} - -void validate_yolo(char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); - - char *base = "results/comp4_det_test_"; - //list *plist = get_paths("data/voc.2007.test"); - list* plist = get_paths("data/voc/2007_test.txt"); - //list *plist = get_paths("data/voc.2012.test"); - char **paths = (char **)list_to_array(plist); - - layer l = net.layers[net.n-1]; - int classes = l.classes; - - int j; - FILE** fps = (FILE**)xcalloc(classes, sizeof(FILE*)); - for(j = 0; j < classes; ++j){ - char buff[1024]; - snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]); - fps[j] = fopen(buff, "w"); - } - box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(l.side * l.side * l.n, sizeof(float*)); - for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float*)xcalloc(classes, sizeof(float)); - - int m = plist->size; - int i=0; - int t; - - float thresh = .001; - int nms = 1; - float iou_thresh = .5; - - int nthreads = 8; - image* val = (image*)xcalloc(nthreads, sizeof(image)); - image* val_resized = (image*)xcalloc(nthreads, sizeof(image)); - image* buf = (image*)xcalloc(nthreads, sizeof(image)); - image* buf_resized = (image*)xcalloc(nthreads, sizeof(image)); - pthread_t* thr = (pthread_t*)xcalloc(nthreads, sizeof(pthread_t)); - - load_args args = {0}; - args.w = net.w; - args.h = net.h; - args.type = IMAGE_DATA; - - for(t = 0; t < nthreads; ++t){ - args.path = paths[i+t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - time_t start = time(0); - for(i = nthreads; i < m+nthreads; i += nthreads){ - fprintf(stderr, "%d\n", i); - for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ - pthread_join(thr[t], 0); - val[t] = buf[t]; - val_resized[t] = buf_resized[t]; - } - for(t = 0; t < nthreads && i+t < m; ++t){ - args.path = paths[i+t]; - args.im = &buf[t]; - args.resized = &buf_resized[t]; - thr[t] = load_data_in_thread(args); - } - for(t = 0; t < nthreads && i+t-nthreads < m; ++t){ - char *path = paths[i+t-nthreads]; - char *id = basecfg(path); - float *X = val_resized[t].data; - network_predict(net, X); - int w = val[t].w; - int h = val[t].h; - get_detection_boxes(l, w, h, thresh, probs, boxes, 0); - if (nms) do_nms_sort_v2(boxes, probs, l.side*l.side*l.n, classes, iou_thresh); - print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h); - free(id); - free_image(val[t]); - free_image(val_resized[t]); - } - } - - if (fps) free(fps); - if (val) free(val); - if (val_resized) free(val_resized); - if (buf) free(buf); - if (buf_resized) free(buf_resized); - if (thr) free(thr); - - fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); - for(j = 0; j < classes; ++j){ - fclose(fps[j]); - } - free(fps); -} - -void validate_yolo_recall(char *cfgfile, char *weightfile) -{ - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - set_batch_network(&net, 1); - fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); - srand(time(0)); - - list *plist = get_paths("data/voc.2007.test"); - char **paths = (char **)list_to_array(plist); - - layer l = net.layers[net.n-1]; - int classes = l.classes; - int side = l.side; - - int j, k; - box* boxes = (box*)xcalloc(side * side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(side * side * l.n, sizeof(float*)); - for(j = 0; j < side*side*l.n; ++j) { - probs[j] = (float*)xcalloc(classes, sizeof(float)); - } - - int m = plist->size; - int i=0; - - float thresh = .001; - float iou_thresh = .5; - float nms = 0; - - int total = 0; - int correct = 0; - int proposals = 0; - float avg_iou = 0; - - for(i = 0; i < m; ++i){ - char *path = paths[i]; - image orig = load_image_color(path, 0, 0); - image sized = resize_image(orig, net.w, net.h); - char *id = basecfg(path); - network_predict(net, sized.data); - get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1); - if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms); - - char labelpath[4096]; - replace_image_to_label(path, labelpath); - - int num_labels = 0; - box_label *truth = read_boxes(labelpath, &num_labels); - for(k = 0; k < side*side*l.n; ++k){ - if(probs[k][0] > thresh){ - ++proposals; - } - } - for (j = 0; j < num_labels; ++j) { - ++total; - box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h}; - float best_iou = 0; - for(k = 0; k < side*side*l.n; ++k){ - float iou = box_iou(boxes[k], t); - if(probs[k][0] > thresh && iou > best_iou){ - best_iou = iou; - } - } - avg_iou += best_iou; - if(best_iou > iou_thresh){ - ++correct; - } - } - - fprintf(stderr, "%5d %5d %5d\tRPs/Img: %.2f\tIOU: %.2f%%\tRecall:%.2f%%\n", i, correct, total, (float)proposals/(i+1), avg_iou*100/total, 100.*correct/total); - free(id); - free_image(orig); - free_image(sized); - } -} - -void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) -{ - image **alphabet = load_alphabet(); - network net = parse_network_cfg(cfgfile); - if(weightfile){ - load_weights(&net, weightfile); - } - detection_layer l = net.layers[net.n-1]; - set_batch_network(&net, 1); - srand(2222222); - char buff[256]; - char *input = buff; - int j; - float nms=.4; - box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box)); - float** probs = (float**)xcalloc(l.side * l.side * l.n, sizeof(float*)); - for(j = 0; j < l.side*l.side*l.n; ++j) { - probs[j] = (float*)xcalloc(l.classes, sizeof(float)); - } - while(1){ - if(filename){ - strncpy(input, filename, 256); - } else { - printf("Enter Image Path: "); - fflush(stdout); - input = fgets(input, 256, stdin); - if(!input) return; - strtok(input, "\n"); - } - image im = load_image_color(input,0,0); - image sized = resize_image(im, net.w, net.h); - float *X = sized.data; - clock_t time=clock(); - network_predict(net, X); - printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); - get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0); - if (nms) do_nms_sort_v2(boxes, probs, l.side*l.side*l.n, l.classes, nms); - //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20); - draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20); - save_image(im, "predictions"); - show_image(im, "predictions"); - - free_image(im); - free_image(sized); - - wait_until_press_key_cv(); - destroy_all_windows_cv(); - - if (filename) break; - } - free(boxes); - for(j = 0; j < l.side*l.side*l.n; ++j) { - free(probs[j]); - } - free(probs); -} - -void run_yolo(int argc, char **argv) -{ - int dont_show = find_arg(argc, argv, "-dont_show"); - int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1); - int json_port = find_int_arg(argc, argv, "-json_port", -1); - char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); - char *prefix = find_char_arg(argc, argv, "-prefix", 0); - float thresh = find_float_arg(argc, argv, "-thresh", .2); - float hier_thresh = find_float_arg(argc, argv, "-hier", .5); - int cam_index = find_int_arg(argc, argv, "-c", 0); - int frame_skip = find_int_arg(argc, argv, "-s", 0); - int ext_output = find_arg(argc, argv, "-ext_output"); - if(argc < 4){ - fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]); - return; - } - - char *cfg = argv[3]; - char *weights = (argc > 4) ? argv[4] : 0; - char *filename = (argc > 5) ? argv[5]: 0; - if(0==strcmp(argv[2], "test")) test_yolo(cfg, weights, filename, thresh); - else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights); - else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights); - else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights); - else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, voc_names, 20, 1, frame_skip, - prefix, out_filename, mjpeg_port, 0, json_port, dont_show, ext_output, 0, 0, 0, 0, 0); -} diff --git a/src/Detector/darknet/src/yolo_console_dll.cpp b/src/Detector/darknet/src/yolo_console_dll.cpp deleted file mode 100644 index 062860626..000000000 --- a/src/Detector/darknet/src/yolo_console_dll.cpp +++ /dev/null @@ -1,700 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include // std::mutex, std::unique_lock -#include - - -// It makes sense only for video-Camera (not for video-File) -// To use - uncomment the following line. Optical-flow is supported only by OpenCV 3.x - 4.x -//#define TRACK_OPTFLOW -//#define GPU - -// To use 3D-stereo camera ZED - uncomment the following line. ZED_SDK should be installed. -//#define ZED_STEREO - - -#include "yolo_v2_class.hpp" // imported functions from DLL - -#ifdef OPENCV -#ifdef ZED_STEREO -#include -#if ZED_SDK_MAJOR_VERSION == 2 -#define ZED_STEREO_2_COMPAT_MODE -#endif - -#undef GPU // avoid conflict with sl::MEM::GPU - -#ifdef ZED_STEREO_2_COMPAT_MODE -#pragma comment(lib, "sl_core64.lib") -#pragma comment(lib, "sl_input64.lib") -#endif -#pragma comment(lib, "sl_zed64.lib") - -float getMedian(std::vector &v) { - size_t n = v.size() / 2; - std::nth_element(v.begin(), v.begin() + n, v.end()); - return v[n]; -} - -std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) -{ - bool valid_measure; - int i, j; - const unsigned int R_max_global = 10; - - std::vector bbox3d_vect; - - for (auto &cur_box : bbox_vect) { - - const unsigned int obj_size = std::min(cur_box.w, cur_box.h); - const unsigned int R_max = std::min(R_max_global, obj_size / 2); - int center_i = cur_box.x + cur_box.w * 0.5f, center_j = cur_box.y + cur_box.h * 0.5f; - - std::vector x_vect, y_vect, z_vect; - for (int R = 0; R < R_max; R++) { - for (int y = -R; y <= R; y++) { - for (int x = -R; x <= R; x++) { - i = center_i + x; - j = center_j + y; - sl::float4 out(NAN, NAN, NAN, NAN); - if (i >= 0 && i < xyzrgba.cols && j >= 0 && j < xyzrgba.rows) { - cv::Vec4f &elem = xyzrgba.at(j, i); // x,y,z,w - out.x = elem[0]; - out.y = elem[1]; - out.z = elem[2]; - out.w = elem[3]; - } - valid_measure = std::isfinite(out.z); - if (valid_measure) - { - x_vect.push_back(out.x); - y_vect.push_back(out.y); - z_vect.push_back(out.z); - } - } - } - } - - if (x_vect.size() * y_vect.size() * z_vect.size() > 0) - { - cur_box.x_3d = getMedian(x_vect); - cur_box.y_3d = getMedian(y_vect); - cur_box.z_3d = getMedian(z_vect); - } - else { - cur_box.x_3d = NAN; - cur_box.y_3d = NAN; - cur_box.z_3d = NAN; - } - - bbox3d_vect.emplace_back(cur_box); - } - - return bbox3d_vect; -} - -cv::Mat slMat2cvMat(sl::Mat &input) { - int cv_type = -1; // Mapping between MAT_TYPE and CV_TYPE - if(input.getDataType() == -#ifdef ZED_STEREO_2_COMPAT_MODE - sl::MAT_TYPE_32F_C4 -#else - sl::MAT_TYPE::F32_C4 -#endif - ) { - cv_type = CV_32FC4; - } else cv_type = CV_8UC4; // sl::Mat used are either RGBA images or XYZ (4C) point clouds - return cv::Mat(input.getHeight(), input.getWidth(), cv_type, input.getPtr( -#ifdef ZED_STEREO_2_COMPAT_MODE - sl::MEM::MEM_CPU -#else - sl::MEM::CPU -#endif - )); -} - -cv::Mat zed_capture_rgb(sl::Camera &zed) { - sl::Mat left; - zed.retrieveImage(left); - cv::Mat left_rgb; - cv::cvtColor(slMat2cvMat(left), left_rgb, CV_RGBA2RGB); - return left_rgb; -} - -cv::Mat zed_capture_3d(sl::Camera &zed) { - sl::Mat cur_cloud; - zed.retrieveMeasure(cur_cloud, -#ifdef ZED_STEREO_2_COMPAT_MODE - sl::MEASURE_XYZ -#else - sl::MEASURE::XYZ -#endif - ); - return slMat2cvMat(cur_cloud).clone(); -} - -static sl::Camera zed; // ZED-camera - -#else // ZED_STEREO -std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xyzrgba) { - return bbox_vect; -} -#endif // ZED_STEREO - - -#include // C++ -#include -#ifndef CV_VERSION_EPOCH // OpenCV 3.x and 4.x -#include -#define OPENCV_VERSION CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR)"" CVAUX_STR(CV_VERSION_REVISION) -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "opencv_world" OPENCV_VERSION ".lib") -#ifdef TRACK_OPTFLOW -/* -#pragma comment(lib, "opencv_cudaoptflow" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_cudaimgproc" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") -*/ -#endif // TRACK_OPTFLOW -#endif // USE_CMAKE_LIBS -#else // OpenCV 2.x -#define OPENCV_VERSION CVAUX_STR(CV_VERSION_EPOCH)"" CVAUX_STR(CV_VERSION_MAJOR)"" CVAUX_STR(CV_VERSION_MINOR) -#ifndef USE_CMAKE_LIBS -#pragma comment(lib, "opencv_core" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_imgproc" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_highgui" OPENCV_VERSION ".lib") -#pragma comment(lib, "opencv_video" OPENCV_VERSION ".lib") -#endif // USE_CMAKE_LIBS -#endif // CV_VERSION_EPOCH - - -void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, - int current_det_fps = -1, int current_cap_fps = -1) -{ - int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; - - for (auto &i : result_vec) { - cv::Scalar color = obj_id_to_color(i.obj_id); - cv::rectangle(mat_img, cv::Rect(i.x, i.y, i.w, i.h), color, 2); - if (obj_names.size() > i.obj_id) { - std::string obj_name = obj_names[i.obj_id]; - if (i.track_id > 0) obj_name += " - " + std::to_string(i.track_id); - cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); - int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); - max_width = std::max(max_width, (int)i.w + 2); - //max_width = std::max(max_width, 283); - std::string coords_3d; - if (!std::isnan(i.z_3d)) { - std::stringstream ss; - ss << std::fixed << std::setprecision(2) << "x:" << i.x_3d << "m y:" << i.y_3d << "m z:" << i.z_3d << "m "; - coords_3d = ss.str(); - cv::Size const text_size_3d = getTextSize(ss.str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, 1, 0); - int const max_width_3d = (text_size_3d.width > i.w + 2) ? text_size_3d.width : (i.w + 2); - if (max_width_3d > max_width) max_width = max_width_3d; - } - - cv::rectangle(mat_img, cv::Point2f(std::max((int)i.x - 1, 0), std::max((int)i.y - 35, 0)), - cv::Point2f(std::min((int)i.x + max_width, mat_img.cols - 1), std::min((int)i.y, mat_img.rows - 1)), - color, CV_FILLED, 8, 0); - putText(mat_img, obj_name, cv::Point2f(i.x, i.y - 16), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(0, 0, 0), 2); - if(!coords_3d.empty()) putText(mat_img, coords_3d, cv::Point2f(i.x, i.y-1), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, cv::Scalar(0, 0, 0), 1); - } - } - if (current_det_fps >= 0 && current_cap_fps >= 0) { - std::string fps_str = "FPS detection: " + std::to_string(current_det_fps) + " FPS capture: " + std::to_string(current_cap_fps); - putText(mat_img, fps_str, cv::Point2f(10, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, cv::Scalar(50, 255, 0), 2); - } -} -#endif // OPENCV - - -void show_console_result(std::vector const result_vec, std::vector const obj_names, int frame_id = -1) { - if (frame_id >= 0) std::cout << " Frame: " << frame_id << std::endl; - for (auto &i : result_vec) { - if (obj_names.size() > i.obj_id) std::cout << obj_names[i.obj_id] << " - "; - std::cout << "obj_id = " << i.obj_id << ", x = " << i.x << ", y = " << i.y - << ", w = " << i.w << ", h = " << i.h - << std::setprecision(3) << ", prob = " << i.prob << std::endl; - } -} - -std::vector objects_names_from_file(std::string const filename) { - std::ifstream file(filename); - std::vector file_lines; - if (!file.is_open()) return file_lines; - for(std::string line; getline(file, line);) file_lines.push_back(line); - std::cout << "object names loaded \n"; - return file_lines; -} - -template -class send_one_replaceable_object_t { - const bool sync; - std::atomic a_ptr; -public: - - void send(T const& _obj) { - T *new_ptr = new T; - *new_ptr = _obj; - if (sync) { - while (a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); - } - std::unique_ptr old_ptr(a_ptr.exchange(new_ptr)); - } - - T receive() { - std::unique_ptr ptr; - do { - while(!a_ptr.load()) std::this_thread::sleep_for(std::chrono::milliseconds(3)); - ptr.reset(a_ptr.exchange(NULL)); - } while (!ptr); - T obj = *ptr; - return obj; - } - - bool is_object_present() { - return (a_ptr.load() != NULL); - } - - send_one_replaceable_object_t(bool _sync) : sync(_sync), a_ptr(NULL) - {} -}; - -int main(int argc, char *argv[]) -{ - std::string names_file = "data/coco.names"; - std::string cfg_file = "cfg/yolov3.cfg"; - std::string weights_file = "yolov3.weights"; - std::string filename; - - if (argc > 4) { //voc.names yolo-voc.cfg yolo-voc.weights test.mp4 - names_file = argv[1]; - cfg_file = argv[2]; - weights_file = argv[3]; - filename = argv[4]; - } - else if (argc > 1) filename = argv[1]; - - float const thresh = (argc > 5) ? std::stof(argv[5]) : 0.2; - - Detector detector(cfg_file, weights_file); - - auto obj_names = objects_names_from_file(names_file); - std::string out_videofile = "result.avi"; - bool const save_output_videofile = false; // true - for history - bool const send_network = false; // true - for remote detection - bool const use_kalman_filter = false; // true - for stationary camera - - bool detection_sync = true; // true - for video-file -#ifdef TRACK_OPTFLOW // for slow GPU - detection_sync = false; - Tracker_optflow tracker_flow; - //detector.wait_stream = true; -#endif // TRACK_OPTFLOW - - - while (true) - { - std::cout << "input image or video filename: "; - if(filename.size() == 0) std::cin >> filename; - if (filename.size() == 0) break; - - try { -#ifdef OPENCV - preview_boxes_t large_preview(100, 150, false), small_preview(50, 50, true); - bool show_small_boxes = false; - - std::string const file_ext = filename.substr(filename.find_last_of(".") + 1); - std::string const protocol = filename.substr(0, 7); - if (file_ext == "avi" || file_ext == "mp4" || file_ext == "mjpg" || file_ext == "mov" || // video file - protocol == "rtmp://" || protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || // video network stream - filename == "zed_camera" || file_ext == "svo" || filename == "web_camera") // ZED stereo camera - - { - if (protocol == "rtsp://" || protocol == "http://" || protocol == "https:/" || filename == "zed_camera" || filename == "web_camera") - detection_sync = false; - - cv::Mat cur_frame; - std::atomic fps_cap_counter(0), fps_det_counter(0); - std::atomic current_fps_cap(0), current_fps_det(0); - std::atomic exit_flag(false); - std::chrono::steady_clock::time_point steady_start, steady_end; - int video_fps = 25; - bool use_zed_camera = false; - - track_kalman_t track_kalman; - -#ifdef ZED_STEREO - sl::InitParameters init_params; - init_params.depth_minimum_distance = 0.5; - #ifdef ZED_STEREO_2_COMPAT_MODE - init_params.depth_mode = sl::DEPTH_MODE_ULTRA; - init_params.camera_resolution = sl::RESOLUTION_HD720;// sl::RESOLUTION_HD1080, sl::RESOLUTION_HD720 - init_params.coordinate_units = sl::UNIT_METER; - init_params.camera_buffer_count_linux = 2; - if (file_ext == "svo") init_params.svo_input_filename.set(filename.c_str()); - #else - init_params.depth_mode = sl::DEPTH_MODE::ULTRA; - init_params.camera_resolution = sl::RESOLUTION::HD720;// sl::RESOLUTION::HD1080, sl::RESOLUTION::HD720 - init_params.coordinate_units = sl::UNIT::METER; - if (file_ext == "svo") init_params.input.setFromSVOFile(filename.c_str()); - #endif - //init_params.sdk_cuda_ctx = (CUcontext)detector.get_cuda_context(); - init_params.sdk_gpu_id = detector.cur_gpu_id; - - if (filename == "zed_camera" || file_ext == "svo") { - std::cout << "ZED 3D Camera " << zed.open(init_params) << std::endl; - if (!zed.isOpened()) { - std::cout << " Error: ZED Camera should be connected to USB 3.0. And ZED_SDK should be installed. \n"; - getchar(); - return 0; - } - cur_frame = zed_capture_rgb(zed); - use_zed_camera = true; - } -#endif // ZED_STEREO - - cv::VideoCapture cap; - if (filename == "web_camera") { - cap.open(0); - cap >> cur_frame; - } else if (!use_zed_camera) { - cap.open(filename); - cap >> cur_frame; - } -#ifdef CV_VERSION_EPOCH // OpenCV 2.x - video_fps = cap.get(CV_CAP_PROP_FPS); -#else - video_fps = cap.get(cv::CAP_PROP_FPS); -#endif - cv::Size const frame_size = cur_frame.size(); - //cv::Size const frame_size(cap.get(CV_CAP_PROP_FRAME_WIDTH), cap.get(CV_CAP_PROP_FRAME_HEIGHT)); - std::cout << "\n Video size: " << frame_size << std::endl; - - cv::VideoWriter output_video; - if (save_output_videofile) -#ifdef CV_VERSION_EPOCH // OpenCV 2.x - output_video.open(out_videofile, CV_FOURCC('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); -#else - output_video.open(out_videofile, cv::VideoWriter::fourcc('D', 'I', 'V', 'X'), std::max(35, video_fps), frame_size, true); -#endif - - struct detection_data_t { - cv::Mat cap_frame; - std::shared_ptr det_image; - std::vector result_vec; - cv::Mat draw_frame; - bool new_detection; - uint64_t frame_id; - bool exit_flag; - cv::Mat zed_cloud; - std::queue track_optflow_queue; - detection_data_t() : new_detection(false), exit_flag(false) {} - }; - - const bool sync = detection_sync; // sync data exchange - send_one_replaceable_object_t cap2prepare(sync), cap2draw(sync), - prepare2detect(sync), detect2draw(sync), draw2show(sync), draw2write(sync), draw2net(sync); - - std::thread t_cap, t_prepare, t_detect, t_post, t_draw, t_write, t_network; - - // capture new video-frame - if (t_cap.joinable()) t_cap.join(); - t_cap = std::thread([&]() - { - uint64_t frame_id = 0; - detection_data_t detection_data; - do { - detection_data = detection_data_t(); -#ifdef ZED_STEREO - if (use_zed_camera) { - while (zed.grab() != - #ifdef ZED_STEREO_2_COMPAT_MODE - sl::SUCCESS - #else - sl::ERROR_CODE::SUCCESS - #endif - ) std::this_thread::sleep_for(std::chrono::milliseconds(2)); - detection_data.cap_frame = zed_capture_rgb(zed); - detection_data.zed_cloud = zed_capture_3d(zed); - } - else -#endif // ZED_STEREO - { - cap >> detection_data.cap_frame; - } - fps_cap_counter++; - detection_data.frame_id = frame_id++; - if (detection_data.cap_frame.empty() || exit_flag) { - std::cout << " exit_flag: detection_data.cap_frame.size = " << detection_data.cap_frame.size() << std::endl; - detection_data.exit_flag = true; - detection_data.cap_frame = cv::Mat(frame_size, CV_8UC3); - } - - if (!detection_sync) { - cap2draw.send(detection_data); // skip detection - } - cap2prepare.send(detection_data); - } while (!detection_data.exit_flag); - std::cout << " t_cap exit \n"; - }); - - - // pre-processing video frame (resize, convertion) - t_prepare = std::thread([&]() - { - std::shared_ptr det_image; - detection_data_t detection_data; - do { - detection_data = cap2prepare.receive(); - - det_image = detector.mat_to_image_resize(detection_data.cap_frame); - detection_data.det_image = det_image; - prepare2detect.send(detection_data); // detection - - } while (!detection_data.exit_flag); - std::cout << " t_prepare exit \n"; - }); - - - // detection by Yolo - if (t_detect.joinable()) t_detect.join(); - t_detect = std::thread([&]() - { - std::shared_ptr det_image; - detection_data_t detection_data; - do { - detection_data = prepare2detect.receive(); - det_image = detection_data.det_image; - std::vector result_vec; - - if(det_image) - result_vec = detector.detect_resized(*det_image, frame_size.width, frame_size.height, thresh, true); // true - fps_det_counter++; - //std::this_thread::sleep_for(std::chrono::milliseconds(150)); - - detection_data.new_detection = true; - detection_data.result_vec = result_vec; - detect2draw.send(detection_data); - } while (!detection_data.exit_flag); - std::cout << " t_detect exit \n"; - }); - - // draw rectangles (and track objects) - t_draw = std::thread([&]() - { - std::queue track_optflow_queue; - detection_data_t detection_data; - do { - - // for Video-file - if (detection_sync) { - detection_data = detect2draw.receive(); - } - // for Video-camera - else - { - // get new Detection result if present - if (detect2draw.is_object_present()) { - cv::Mat old_cap_frame = detection_data.cap_frame; // use old captured frame - detection_data = detect2draw.receive(); - if (!old_cap_frame.empty()) detection_data.cap_frame = old_cap_frame; - } - // get new Captured frame - else { - std::vector old_result_vec = detection_data.result_vec; // use old detections - detection_data = cap2draw.receive(); - detection_data.result_vec = old_result_vec; - } - } - - cv::Mat cap_frame = detection_data.cap_frame; - cv::Mat draw_frame = detection_data.cap_frame.clone(); - std::vector result_vec = detection_data.result_vec; - -#ifdef TRACK_OPTFLOW - if (detection_data.new_detection) { - tracker_flow.update_tracking_flow(detection_data.cap_frame, detection_data.result_vec); - while (track_optflow_queue.size() > 0) { - draw_frame = track_optflow_queue.back(); - result_vec = tracker_flow.tracking_flow(track_optflow_queue.front(), false); - track_optflow_queue.pop(); - } - } - else { - track_optflow_queue.push(cap_frame); - result_vec = tracker_flow.tracking_flow(cap_frame, false); - } - detection_data.new_detection = true; // to correct kalman filter -#endif //TRACK_OPTFLOW - - // track ID by using kalman filter - if (use_kalman_filter) { - if (detection_data.new_detection) { - result_vec = track_kalman.correct(result_vec); - } - else { - result_vec = track_kalman.predict(); - } - } - // track ID by using custom function - else { - int frame_story = std::max(5, current_fps_cap.load()); - result_vec = detector.tracking_id(result_vec, true, frame_story, 40); - } - - if (use_zed_camera && !detection_data.zed_cloud.empty()) { - result_vec = get_3d_coordinates(result_vec, detection_data.zed_cloud); - } - - //small_preview.set(draw_frame, result_vec); - //large_preview.set(draw_frame, result_vec); - draw_boxes(draw_frame, result_vec, obj_names, current_fps_det, current_fps_cap); - //show_console_result(result_vec, obj_names, detection_data.frame_id); - //large_preview.draw(draw_frame); - //small_preview.draw(draw_frame, true); - - detection_data.result_vec = result_vec; - detection_data.draw_frame = draw_frame; - draw2show.send(detection_data); - if (send_network) draw2net.send(detection_data); - if (output_video.isOpened()) draw2write.send(detection_data); - } while (!detection_data.exit_flag); - std::cout << " t_draw exit \n"; - }); - - - // write frame to videofile - t_write = std::thread([&]() - { - if (output_video.isOpened()) { - detection_data_t detection_data; - cv::Mat output_frame; - do { - detection_data = draw2write.receive(); - if(detection_data.draw_frame.channels() == 4) cv::cvtColor(detection_data.draw_frame, output_frame, CV_RGBA2RGB); - else output_frame = detection_data.draw_frame; - output_video << output_frame; - } while (!detection_data.exit_flag); - output_video.release(); - } - std::cout << " t_write exit \n"; - }); - - // send detection to the network - t_network = std::thread([&]() - { - if (send_network) { - detection_data_t detection_data; - do { - detection_data = draw2net.receive(); - - detector.send_json_http(detection_data.result_vec, obj_names, detection_data.frame_id, filename); - - } while (!detection_data.exit_flag); - } - std::cout << " t_network exit \n"; - }); - - - // show detection - detection_data_t detection_data; - do { - - steady_end = std::chrono::steady_clock::now(); - float time_sec = std::chrono::duration(steady_end - steady_start).count(); - if (time_sec >= 1) { - current_fps_det = fps_det_counter.load() / time_sec; - current_fps_cap = fps_cap_counter.load() / time_sec; - steady_start = steady_end; - fps_det_counter = 0; - fps_cap_counter = 0; - } - - detection_data = draw2show.receive(); - cv::Mat draw_frame = detection_data.draw_frame; - - //if (extrapolate_flag) { - // cv::putText(draw_frame, "extrapolate", cv::Point2f(10, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 1.0, cv::Scalar(50, 50, 0), 2); - //} - - cv::imshow("window name", draw_frame); - int key = cv::waitKey(3); // 3 or 16ms - if (key == 'f') show_small_boxes = !show_small_boxes; - if (key == 'p') while (true) if (cv::waitKey(100) == 'p') break; - //if (key == 'e') extrapolate_flag = !extrapolate_flag; - if (key == 27) { exit_flag = true;} - - //std::cout << " current_fps_det = " << current_fps_det << ", current_fps_cap = " << current_fps_cap << std::endl; - } while (!detection_data.exit_flag); - std::cout << " show detection exit \n"; - - cv::destroyWindow("window name"); - // wait for all threads - if (t_cap.joinable()) t_cap.join(); - if (t_prepare.joinable()) t_prepare.join(); - if (t_detect.joinable()) t_detect.join(); - if (t_post.joinable()) t_post.join(); - if (t_draw.joinable()) t_draw.join(); - if (t_write.joinable()) t_write.join(); - if (t_network.joinable()) t_network.join(); - - break; - - } - else if (file_ext == "txt") { // list of image files - std::ifstream file(filename); - if (!file.is_open()) std::cout << "File not found! \n"; - else - for (std::string line; file >> line;) { - std::cout << line << std::endl; - cv::Mat mat_img = cv::imread(line); - std::vector result_vec = detector.detect(mat_img); - show_console_result(result_vec, obj_names); - //draw_boxes(mat_img, result_vec, obj_names); - //cv::imwrite("res_" + line, mat_img); - } - - } - else { // image file - // to achive high performance for multiple images do these 2 lines in another thread - cv::Mat mat_img = cv::imread(filename); - auto det_image = detector.mat_to_image_resize(mat_img); - - auto start = std::chrono::steady_clock::now(); - std::vector result_vec = detector.detect_resized(*det_image, mat_img.size().width, mat_img.size().height); - auto end = std::chrono::steady_clock::now(); - std::chrono::duration spent = end - start; - std::cout << " Time: " << spent.count() << " sec \n"; - - //result_vec = detector.tracking_id(result_vec); // comment it - if track_id is not required - draw_boxes(mat_img, result_vec, obj_names); - cv::imshow("window name", mat_img); - show_console_result(result_vec, obj_names); - cv::waitKey(0); - } -#else // OPENCV - //std::vector result_vec = detector.detect(filename); - - auto img = detector.load_image(filename); - std::vector result_vec = detector.detect(img); - detector.free_image(img); - show_console_result(result_vec, obj_names); -#endif // OPENCV - } - catch (std::exception &e) { std::cerr << "exception: " << e.what() << "\n"; getchar(); } - catch (...) { std::cerr << "unknown exception \n"; getchar(); } - filename.clear(); - } - - return 0; -} diff --git a/src/Detector/darknet/src/yolo_layer.c b/src/Detector/darknet/src/yolo_layer.c deleted file mode 100644 index 7b1f5f13c..000000000 --- a/src/Detector/darknet/src/yolo_layer.c +++ /dev/null @@ -1,1223 +0,0 @@ -#include "yolo_layer.h" -#include "activations.h" -#include "blas.h" -#include "box.h" -#include "dark_cuda.h" -#include "utils.h" - -#include -#include -#include -#include -#include - -extern int check_mistakes; - -layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes) -{ - int i; - layer l = { (LAYER_TYPE)0 }; - l.type = YOLO; - - l.n = n; - l.total = total; - l.batch = batch; - l.h = h; - l.w = w; - l.c = n*(classes + 4 + 1); - l.out_w = l.w; - l.out_h = l.h; - l.out_c = l.c; - l.classes = classes; - l.cost = (float*)xcalloc(1, sizeof(float)); - l.biases = (float*)xcalloc(total * 2, sizeof(float)); - if(mask) l.mask = mask; - else{ - l.mask = (int*)xcalloc(n, sizeof(int)); - for(i = 0; i < n; ++i){ - l.mask[i] = i; - } - } - l.bias_updates = (float*)xcalloc(n * 2, sizeof(float)); - l.outputs = h*w*n*(classes + 4 + 1); - l.inputs = l.outputs; - l.max_boxes = max_boxes; - l.truth_size = 4 + 2; - l.truths = l.max_boxes*l.truth_size; // 90*(4 + 1); - l.labels = (int*)xcalloc(batch * l.w*l.h*l.n, sizeof(int)); - for (i = 0; i < batch * l.w*l.h*l.n; ++i) l.labels[i] = -1; - l.class_ids = (int*)xcalloc(batch * l.w*l.h*l.n, sizeof(int)); - for (i = 0; i < batch * l.w*l.h*l.n; ++i) l.class_ids[i] = -1; - - l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float)); - l.output = (float*)xcalloc(batch * l.outputs, sizeof(float)); - for(i = 0; i < total*2; ++i){ - l.biases[i] = .5; - } - - l.forward = forward_yolo_layer; - l.backward = backward_yolo_layer; -#ifdef GPU - l.forward_gpu = forward_yolo_layer_gpu; - l.backward_gpu = backward_yolo_layer_gpu; - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); - l.output_avg_gpu = cuda_make_array(l.output, batch*l.outputs); - l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); - - free(l.output); - if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; - else { - cudaGetLastError(); // reset CUDA-error - l.output = (float*)xcalloc(batch * l.outputs, sizeof(float)); - } - - free(l.delta); - if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; - else { - cudaGetLastError(); // reset CUDA-error - l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float)); - } -#endif - - fprintf(stderr, "yolo\n"); - srand(time(0)); - - return l; -} - -void resize_yolo_layer(layer *l, int w, int h) -{ - l->w = w; - l->h = h; - - l->outputs = h*w*l->n*(l->classes + 4 + 1); - l->inputs = l->outputs; - - if (l->embedding_output) l->embedding_output = (float*)xrealloc(l->output, l->batch * l->embedding_size * l->n * l->h * l->w * sizeof(float)); - if (l->labels) l->labels = (int*)xrealloc(l->labels, l->batch * l->n * l->h * l->w * sizeof(int)); - if (l->class_ids) l->class_ids = (int*)xrealloc(l->class_ids, l->batch * l->n * l->h * l->w * sizeof(int)); - - if (!l->output_pinned) l->output = (float*)xrealloc(l->output, l->batch*l->outputs * sizeof(float)); - if (!l->delta_pinned) l->delta = (float*)xrealloc(l->delta, l->batch*l->outputs*sizeof(float)); - -#ifdef GPU - if (l->output_pinned) { - CHECK_CUDA(cudaFreeHost(l->output)); - if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { - cudaGetLastError(); // reset CUDA-error - l->output = (float*)xcalloc(l->batch * l->outputs, sizeof(float)); - l->output_pinned = 0; - } - } - - if (l->delta_pinned) { - CHECK_CUDA(cudaFreeHost(l->delta)); - if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { - cudaGetLastError(); // reset CUDA-error - l->delta = (float*)xcalloc(l->batch * l->outputs, sizeof(float)); - l->delta_pinned = 0; - } - } - - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - cuda_free(l->output_avg_gpu); - - l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); - l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); - l->output_avg_gpu = cuda_make_array(l->output, l->batch*l->outputs); -#endif -} - -box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride, int new_coords) -{ - box b; - // ln - natural logarithm (base = e) - // x` = t.x * lw - i; // x = ln(x`/(1-x`)) // x - output of previous conv-layer - // y` = t.y * lh - i; // y = ln(y`/(1-y`)) // y - output of previous conv-layer - // w = ln(t.w * net.w / anchors_w); // w - output of previous conv-layer - // h = ln(t.h * net.h / anchors_h); // h - output of previous conv-layer - if (new_coords) { - b.x = (i + x[index + 0 * stride]) / lw; - b.y = (j + x[index + 1 * stride]) / lh; - b.w = x[index + 2 * stride] * x[index + 2 * stride] * 4 * biases[2 * n] / w; - b.h = x[index + 3 * stride] * x[index + 3 * stride] * 4 * biases[2 * n + 1] / h; - } - else { - b.x = (i + x[index + 0 * stride]) / lw; - b.y = (j + x[index + 1 * stride]) / lh; - b.w = exp(x[index + 2 * stride]) * biases[2 * n] / w; - b.h = exp(x[index + 3 * stride]) * biases[2 * n + 1] / h; - } - return b; -} - -static inline float fix_nan_inf(float val) -{ - if (isnan(val) || isinf(val)) val = 0; - return val; -} - -static inline float clip_value(float val, const float max_val) -{ - if (val > max_val) { - //printf("\n val = %f > max_val = %f \n", val, max_val); - val = max_val; - } - else if (val < -max_val) { - //printf("\n val = %f < -max_val = %f \n", val, -max_val); - val = -max_val; - } - return val; -} - -ious delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride, float iou_normalizer, IOU_LOSS iou_loss, int accumulate, float max_delta, int *rewritten_bbox, int new_coords) -{ - if (delta[index + 0 * stride] || delta[index + 1 * stride] || delta[index + 2 * stride] || delta[index + 3 * stride]) { - (*rewritten_bbox)++; - } - - ious all_ious = { 0 }; - // i - step in layer width - // j - step in layer height - // Returns a box in absolute coordinates - box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride, new_coords); - all_ious.iou = box_iou(pred, truth); - all_ious.giou = box_giou(pred, truth); - all_ious.diou = box_diou(pred, truth); - all_ious.ciou = box_ciou(pred, truth); - // avoid nan in dx_box_iou - if (pred.w == 0) { pred.w = 1.0; } - if (pred.h == 0) { pred.h = 1.0; } - if (iou_loss == MSE) // old loss - { - float tx = (truth.x*lw - i); - float ty = (truth.y*lh - j); - float tw = log(truth.w*w / biases[2 * n]); - float th = log(truth.h*h / biases[2 * n + 1]); - - if (new_coords) { - //tx = (truth.x*lw - i + 0.5) / 2; - //ty = (truth.y*lh - j + 0.5) / 2; - tw = sqrt(truth.w*w / (4 * biases[2 * n])); - th = sqrt(truth.h*h / (4 * biases[2 * n + 1])); - } - - //printf(" tx = %f, ty = %f, tw = %f, th = %f \n", tx, ty, tw, th); - //printf(" x = %f, y = %f, w = %f, h = %f \n", x[index + 0 * stride], x[index + 1 * stride], x[index + 2 * stride], x[index + 3 * stride]); - - // accumulate delta - delta[index + 0 * stride] += scale * (tx - x[index + 0 * stride]) * iou_normalizer; - delta[index + 1 * stride] += scale * (ty - x[index + 1 * stride]) * iou_normalizer; - delta[index + 2 * stride] += scale * (tw - x[index + 2 * stride]) * iou_normalizer; - delta[index + 3 * stride] += scale * (th - x[index + 3 * stride]) * iou_normalizer; - } - else { - // https://github.com/generalized-iou/g-darknet - // https://arxiv.org/abs/1902.09630v2 - // https://giou.stanford.edu/ - all_ious.dx_iou = dx_box_iou(pred, truth, iou_loss); - - // jacobian^t (transpose) - //float dx = (all_ious.dx_iou.dl + all_ious.dx_iou.dr); - //float dy = (all_ious.dx_iou.dt + all_ious.dx_iou.db); - //float dw = ((-0.5 * all_ious.dx_iou.dl) + (0.5 * all_ious.dx_iou.dr)); - //float dh = ((-0.5 * all_ious.dx_iou.dt) + (0.5 * all_ious.dx_iou.db)); - - // jacobian^t (transpose) - float dx = all_ious.dx_iou.dt; - float dy = all_ious.dx_iou.db; - float dw = all_ious.dx_iou.dl; - float dh = all_ious.dx_iou.dr; - - - // predict exponential, apply gradient of e^delta_t ONLY for w,h - if (new_coords) { - //dw *= 8 * x[index + 2 * stride]; - //dh *= 8 * x[index + 3 * stride]; - //dw *= 8 * x[index + 2 * stride] * biases[2 * n] / w; - //dh *= 8 * x[index + 3 * stride] * biases[2 * n + 1] / h; - - //float grad_w = 8 * exp(-x[index + 2 * stride]) / pow(exp(-x[index + 2 * stride]) + 1, 3); - //float grad_h = 8 * exp(-x[index + 3 * stride]) / pow(exp(-x[index + 3 * stride]) + 1, 3); - //dw *= grad_w; - //dh *= grad_h; - } - else { - dw *= exp(x[index + 2 * stride]); - dh *= exp(x[index + 3 * stride]); - } - - - //dw *= exp(x[index + 2 * stride]); - //dh *= exp(x[index + 3 * stride]); - - // normalize iou weight - dx *= iou_normalizer; - dy *= iou_normalizer; - dw *= iou_normalizer; - dh *= iou_normalizer; - - - dx = fix_nan_inf(dx); - dy = fix_nan_inf(dy); - dw = fix_nan_inf(dw); - dh = fix_nan_inf(dh); - - if (max_delta != FLT_MAX) { - dx = clip_value(dx, max_delta); - dy = clip_value(dy, max_delta); - dw = clip_value(dw, max_delta); - dh = clip_value(dh, max_delta); - } - - - if (!accumulate) { - delta[index + 0 * stride] = 0; - delta[index + 1 * stride] = 0; - delta[index + 2 * stride] = 0; - delta[index + 3 * stride] = 0; - } - - // accumulate delta - delta[index + 0 * stride] += dx; - delta[index + 1 * stride] += dy; - delta[index + 2 * stride] += dw; - delta[index + 3 * stride] += dh; - } - - return all_ious; -} - -void averages_yolo_deltas(int class_index, int box_index, int stride, int classes, float *delta) -{ - - int classes_in_one_box = 0; - int c; - for (c = 0; c < classes; ++c) { - if (delta[class_index + stride*c] > 0) classes_in_one_box++; - } - - if (classes_in_one_box > 0) { - delta[box_index + 0 * stride] /= classes_in_one_box; - delta[box_index + 1 * stride] /= classes_in_one_box; - delta[box_index + 2 * stride] /= classes_in_one_box; - delta[box_index + 3 * stride] /= classes_in_one_box; - } -} - -void delta_yolo_class(float *output, float *delta, int index, int class_id, int classes, int stride, float *avg_cat, int focal_loss, float label_smooth_eps, float *classes_multipliers, float cls_normalizer) -{ - int n; - if (delta[index + stride*class_id]){ - float y_true = 1; - if(label_smooth_eps) y_true = y_true * (1 - label_smooth_eps) + 0.5*label_smooth_eps; - float result_delta = y_true - output[index + stride*class_id]; - if(!isnan(result_delta) && !isinf(result_delta)) delta[index + stride*class_id] = result_delta; - //delta[index + stride*class_id] = 1 - output[index + stride*class_id]; - - if (classes_multipliers) delta[index + stride*class_id] *= classes_multipliers[class_id]; - if(avg_cat) *avg_cat += output[index + stride*class_id]; - return; - } - // Focal loss - if (focal_loss) { - // Focal Loss - float alpha = 0.5; // 0.25 or 0.5 - //float gamma = 2; // hardcoded in many places of the grad-formula - - int ti = index + stride*class_id; - float pt = output[ti] + 0.000000000000001F; - // http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d - float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 - //float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss - - for (n = 0; n < classes; ++n) { - delta[index + stride*n] = (((n == class_id) ? 1 : 0) - output[index + stride*n]); - - delta[index + stride*n] *= alpha*grad; - - if (n == class_id && avg_cat) *avg_cat += output[index + stride*n]; - } - } - else { - // default - for (n = 0; n < classes; ++n) { - float y_true = ((n == class_id) ? 1 : 0); - if (label_smooth_eps) y_true = y_true * (1 - label_smooth_eps) + 0.5*label_smooth_eps; - float result_delta = y_true - output[index + stride*n]; - if (!isnan(result_delta) && !isinf(result_delta)) delta[index + stride*n] = result_delta; - - if (classes_multipliers && n == class_id) delta[index + stride*class_id] *= classes_multipliers[class_id] * cls_normalizer; - if (n == class_id && avg_cat) *avg_cat += output[index + stride*n]; - } - } -} - -int compare_yolo_class(float *output, int classes, int class_index, int stride, float objectness, int class_id, float conf_thresh) -{ - int j; - for (j = 0; j < classes; ++j) { - //float prob = objectness * output[class_index + stride*j]; - float prob = output[class_index + stride*j]; - if (prob > conf_thresh) { - return 1; - } - } - return 0; -} - -static int entry_index(layer l, int batch, int location, int entry) -{ - int n = location / (l.w*l.h); - int loc = location % (l.w*l.h); - return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; -} - -typedef struct train_yolo_args { - layer l; - network_state state; - int b; - - float tot_iou; - int count; - int class_count; -} train_yolo_args; - -void *process_batch(void* ptr) -{ - { - train_yolo_args *args = (train_yolo_args*)ptr; - const layer l = args->l; - network_state state = args->state; - int b = args->b; - - int i, j, t, n; - - //printf(" b = %d \n", b, b); - - //float tot_iou = 0; - float tot_giou = 0; - float tot_diou = 0; - float tot_ciou = 0; - float tot_iou_loss = 0; - float tot_giou_loss = 0; - float tot_diou_loss = 0; - float tot_ciou_loss = 0; - float recall = 0; - float recall75 = 0; - float avg_cat = 0; - float avg_obj = 0; - float avg_anyobj = 0; - //int count = 0; - //int class_count = 0; - - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - const int class_index = entry_index(l, b, n * l.w * l.h + j * l.w + i, 4 + 1); - const int obj_index = entry_index(l, b, n * l.w * l.h + j * l.w + i, 4); - const int box_index = entry_index(l, b, n * l.w * l.h + j * l.w + i, 0); - const int stride = l.w * l.h; - box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w * l.h, l.new_coords); - float best_match_iou = 0; - int best_match_t = 0; - float best_iou = 0; - int best_t = 0; - for (t = 0; t < l.max_boxes; ++t) { - box truth = float_to_box_stride(state.truth + t * l.truth_size + b * l.truths, 1); - if (!truth.x) break; // continue; - int class_id = state.truth[t * l.truth_size + b * l.truths + 4]; - if (class_id >= l.classes || class_id < 0) { - printf("\n Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); - printf("\n truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f, class_id = %d \n", truth.x, truth.y, truth.w, truth.h, class_id); - if (check_mistakes) getchar(); - continue; // if label contains class_id more than number of classes in the cfg-file and class_id check garbage value - } - - float objectness = l.output[obj_index]; - if (isnan(objectness) || isinf(objectness)) l.output[obj_index] = 0; - int class_id_match = compare_yolo_class(l.output, l.classes, class_index, l.w * l.h, objectness, class_id, 0.25f); - - float iou = box_iou(pred, truth); - if (iou > best_match_iou && class_id_match == 1) { - best_match_iou = iou; - best_match_t = t; - } - if (iou > best_iou) { - best_iou = iou; - best_t = t; - } - } - - avg_anyobj += l.output[obj_index]; - l.delta[obj_index] = l.obj_normalizer * (0 - l.output[obj_index]); - if (best_match_iou > l.ignore_thresh) { - if (l.objectness_smooth) { - const float delta_obj = l.obj_normalizer * (best_match_iou - l.output[obj_index]); - if (delta_obj > l.delta[obj_index]) l.delta[obj_index] = delta_obj; - - } - else l.delta[obj_index] = 0; - } - else if (state.net.adversarial) { - int stride = l.w * l.h; - float scale = pred.w * pred.h; - if (scale > 0) scale = sqrt(scale); - l.delta[obj_index] = scale * l.obj_normalizer * (0 - l.output[obj_index]); - int cl_id; - int found_object = 0; - for (cl_id = 0; cl_id < l.classes; ++cl_id) { - if (l.output[class_index + stride * cl_id] * l.output[obj_index] > 0.25) { - l.delta[class_index + stride * cl_id] = scale * (0 - l.output[class_index + stride * cl_id]); - found_object = 1; - } - } - if (found_object) { - // don't use this loop for adversarial attack drawing - for (cl_id = 0; cl_id < l.classes; ++cl_id) - if (l.output[class_index + stride * cl_id] * l.output[obj_index] < 0.25) - l.delta[class_index + stride * cl_id] = scale * (1 - l.output[class_index + stride * cl_id]); - - l.delta[box_index + 0 * stride] += scale * (0 - l.output[box_index + 0 * stride]); - l.delta[box_index + 1 * stride] += scale * (0 - l.output[box_index + 1 * stride]); - l.delta[box_index + 2 * stride] += scale * (0 - l.output[box_index + 2 * stride]); - l.delta[box_index + 3 * stride] += scale * (0 - l.output[box_index + 3 * stride]); - } - } - if (best_iou > l.truth_thresh) { - const float iou_multiplier = best_iou * best_iou;// (best_iou - l.truth_thresh) / (1.0 - l.truth_thresh); - if (l.objectness_smooth) l.delta[obj_index] = l.obj_normalizer * (iou_multiplier - l.output[obj_index]); - else l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); - //l.delta[obj_index] = l.obj_normalizer * (1 - l.output[obj_index]); - - int class_id = state.truth[best_t * l.truth_size + b * l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, 0, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - if (l.objectness_smooth) l.delta[class_index + stride * class_id] = class_multiplier * (iou_multiplier - l.output[class_index + stride * class_id]); - box truth = float_to_box_stride(state.truth + best_t * l.truth_size + b * l.truths, 1); - delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords); - (*state.net.total_bbox)++; - } - } - } - } - for (t = 0; t < l.max_boxes; ++t) { - box truth = float_to_box_stride(state.truth + t * l.truth_size + b * l.truths, 1); - if (!truth.x) break; // continue; - if (truth.x < 0 || truth.y < 0 || truth.x > 1 || truth.y > 1 || truth.w < 0 || truth.h < 0) { - char buff[256]; - printf(" Wrong label: truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", truth.x, truth.y, truth.w, truth.h); - sprintf(buff, "echo \"Wrong label: truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f\" >> bad_label.list", - truth.x, truth.y, truth.w, truth.h); - system(buff); - } - int class_id = state.truth[t * l.truth_size + b * l.truths + 4]; - if (class_id >= l.classes || class_id < 0) continue; // if label contains class_id more than number of classes in the cfg-file and class_id check garbage value - - float best_iou = 0; - int best_n = 0; - i = (truth.x * l.w); - j = (truth.y * l.h); - box truth_shift = truth; - truth_shift.x = truth_shift.y = 0; - for (n = 0; n < l.total; ++n) { - box pred = { 0 }; - pred.w = l.biases[2 * n] / state.net.w; - pred.h = l.biases[2 * n + 1] / state.net.h; - float iou = box_iou(pred, truth_shift); - if (iou > best_iou) { - best_iou = iou; - best_n = n; - } - } - - int mask_n = int_index(l.mask, best_n, l.n); - if (mask_n >= 0) { - int class_id = state.truth[t * l.truth_size + b * l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - - int box_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 0); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - ious all_ious = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords); - (*state.net.total_bbox)++; - - const int truth_in_index = t * l.truth_size + b * l.truths + 5; - const int track_id = state.truth[truth_in_index]; - const int truth_out_index = b * l.n * l.w * l.h + mask_n * l.w * l.h + j * l.w + i; - l.labels[truth_out_index] = track_id; - l.class_ids[truth_out_index] = class_id; - //printf(" track_id = %d, t = %d, b = %d, truth_in_index = %d, truth_out_index = %d \n", track_id, t, b, truth_in_index, truth_out_index); - - // range is 0 <= 1 - args->tot_iou += all_ious.iou; - tot_iou_loss += 1 - all_ious.iou; - // range is -1 <= giou <= 1 - tot_giou += all_ious.giou; - tot_giou_loss += 1 - all_ious.giou; - - tot_diou += all_ious.diou; - tot_diou_loss += 1 - all_ious.diou; - - tot_ciou += all_ious.ciou; - tot_ciou_loss += 1 - all_ious.ciou; - - int obj_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4); - avg_obj += l.output[obj_index]; - if (l.objectness_smooth) { - float delta_obj = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - if (l.delta[obj_index] == 0) l.delta[obj_index] = delta_obj; - } - else l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - - int class_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4 + 1); - delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - - //printf(" label: class_id = %d, truth.x = %f, truth.y = %f, truth.w = %f, truth.h = %f \n", class_id, truth.x, truth.y, truth.w, truth.h); - //printf(" mask_n = %d, l.output[obj_index] = %f, l.output[class_index + class_id] = %f \n\n", mask_n, l.output[obj_index], l.output[class_index + class_id]); - - ++(args->count); - ++(args->class_count); - if (all_ious.iou > .5) recall += 1; - if (all_ious.iou > .75) recall75 += 1; - } - - // iou_thresh - for (n = 0; n < l.total; ++n) { - int mask_n = int_index(l.mask, n, l.n); - if (mask_n >= 0 && n != best_n && l.iou_thresh < 1.0f) { - box pred = { 0 }; - pred.w = l.biases[2 * n] / state.net.w; - pred.h = l.biases[2 * n + 1] / state.net.h; - float iou = box_iou_kind(pred, truth_shift, l.iou_thresh_kind); // IOU, GIOU, MSE, DIOU, CIOU - // iou, n - - if (iou > l.iou_thresh) { - int class_id = state.truth[t * l.truth_size + b * l.truths + 4]; - if (l.map) class_id = l.map[class_id]; - - int box_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 0); - const float class_multiplier = (l.classes_multipliers) ? l.classes_multipliers[class_id] : 1.0f; - ious all_ious = delta_yolo_box(truth, l.output, l.biases, n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2 - truth.w * truth.h), l.w * l.h, l.iou_normalizer * class_multiplier, l.iou_loss, 1, l.max_delta, state.net.rewritten_bbox, l.new_coords); - (*state.net.total_bbox)++; - - // range is 0 <= 1 - args->tot_iou += all_ious.iou; - tot_iou_loss += 1 - all_ious.iou; - // range is -1 <= giou <= 1 - tot_giou += all_ious.giou; - tot_giou_loss += 1 - all_ious.giou; - - tot_diou += all_ious.diou; - tot_diou_loss += 1 - all_ious.diou; - - tot_ciou += all_ious.ciou; - tot_ciou_loss += 1 - all_ious.ciou; - - int obj_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4); - avg_obj += l.output[obj_index]; - if (l.objectness_smooth) { - float delta_obj = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - if (l.delta[obj_index] == 0) l.delta[obj_index] = delta_obj; - } - else l.delta[obj_index] = class_multiplier * l.obj_normalizer * (1 - l.output[obj_index]); - - int class_index = entry_index(l, b, mask_n * l.w * l.h + j * l.w + i, 4 + 1); - delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w * l.h, &avg_cat, l.focal_loss, l.label_smooth_eps, l.classes_multipliers, l.cls_normalizer); - - ++(args->count); - ++(args->class_count); - if (all_ious.iou > .5) recall += 1; - if (all_ious.iou > .75) recall75 += 1; - } - } - } - } - - if (l.iou_thresh < 1.0f) { - // averages the deltas obtained by the function: delta_yolo_box()_accumulate - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); - int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); - int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); - const int stride = l.w*l.h; - - if (l.delta[obj_index] != 0) - averages_yolo_deltas(class_index, box_index, stride, l.classes, l.delta); - } - } - } - } - - } - - return 0; -} - - - -void forward_yolo_layer(const layer l, network_state state) -{ - //int i, j, b, t, n; - memcpy(l.output, state.input, l.outputs*l.batch * sizeof(float)); - int b, n; - -#ifndef GPU - for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { - int index = entry_index(l, b, n*l.w*l.h, 0); - if (l.new_coords) { - //activate_array(l.output + index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h - } - else { - activate_array(l.output + index, 2 * l.w*l.h, LOGISTIC); // x,y, - index = entry_index(l, b, n*l.w*l.h, 4); - activate_array(l.output + index, (1 + l.classes)*l.w*l.h, LOGISTIC); - } - scal_add_cpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output + index, 1); // scale x,y - } - } -#endif - - // delta is zeroed - memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); - if (!state.train) return; - - int i; - for (i = 0; i < l.batch * l.w*l.h*l.n; ++i) l.labels[i] = -1; - for (i = 0; i < l.batch * l.w*l.h*l.n; ++i) l.class_ids[i] = -1; - //float avg_iou = 0; - float tot_iou = 0; - float tot_giou = 0; - float tot_diou = 0; - float tot_ciou = 0; - float tot_iou_loss = 0; - float tot_giou_loss = 0; - float tot_diou_loss = 0; - float tot_ciou_loss = 0; - float recall = 0; - float recall75 = 0; - float avg_cat = 0; - float avg_obj = 0; - float avg_anyobj = 0; - int count = 0; - int class_count = 0; - *(l.cost) = 0; - - - int num_threads = l.batch; - pthread_t* threads = (pthread_t*)calloc(num_threads, sizeof(pthread_t)); - - struct train_yolo_args* yolo_args = (train_yolo_args*)xcalloc(l.batch, sizeof(struct train_yolo_args)); - - for (b = 0; b < l.batch; b++) - { - yolo_args[b].l = l; - yolo_args[b].state = state; - yolo_args[b].b = b; - - yolo_args[b].tot_iou = 0; - yolo_args[b].count = 0; - yolo_args[b].class_count = 0; - - if (pthread_create(&threads[b], 0, process_batch, &(yolo_args[b]))) error("Thread creation failed"); - } - - for (b = 0; b < l.batch; b++) - { - pthread_join(threads[b], 0); - - tot_iou += yolo_args[b].tot_iou; - count += yolo_args[b].count; - class_count += yolo_args[b].class_count; - } - - free(yolo_args); - free(threads); - - // Search for an equidistant point from the distant boundaries of the local minimum - int iteration_num = get_current_iteration(state.net); - const int start_point = state.net.max_batches * 3 / 4; - //printf(" equidistant_point ep = %d, it = %d \n", state.net.equidistant_point, iteration_num); - - if ((state.net.badlabels_rejection_percentage && start_point < iteration_num) || - (state.net.num_sigmas_reject_badlabels && start_point < iteration_num) || - (state.net.equidistant_point && state.net.equidistant_point < iteration_num)) - { - const float progress_it = iteration_num - state.net.equidistant_point; - const float progress = progress_it / (state.net.max_batches - state.net.equidistant_point); - float ep_loss_threshold = (*state.net.delta_rolling_avg) * progress; - - float cur_max = 0; - float cur_avg = 0; - float counter = 0; - for (i = 0; i < l.batch * l.outputs; ++i) { - - if (l.delta[i] != 0) { - counter++; - cur_avg += fabs(l.delta[i]); - - if (cur_max < fabs(l.delta[i])) - cur_max = fabs(l.delta[i]); - } - } - - cur_avg = cur_avg / counter; - - if (*state.net.delta_rolling_max == 0) *state.net.delta_rolling_max = cur_max; - *state.net.delta_rolling_max = *state.net.delta_rolling_max * 0.99 + cur_max * 0.01; - *state.net.delta_rolling_avg = *state.net.delta_rolling_avg * 0.99 + cur_avg * 0.01; - - // reject high loss to filter bad labels - if (state.net.num_sigmas_reject_badlabels && start_point < iteration_num) - { - const float rolling_std = (*state.net.delta_rolling_std); - const float rolling_max = (*state.net.delta_rolling_max); - const float rolling_avg = (*state.net.delta_rolling_avg); - const float progress_badlabels = (float)(iteration_num - start_point) / (start_point); - - float cur_std = 0; - float counter = 0; - for (i = 0; i < l.batch * l.outputs; ++i) { - if (l.delta[i] != 0) { - counter++; - cur_std += pow(l.delta[i] - rolling_avg, 2); - } - } - cur_std = sqrt(cur_std / counter); - - *state.net.delta_rolling_std = *state.net.delta_rolling_std * 0.99 + cur_std * 0.01; - - float final_badlebels_threshold = rolling_avg + rolling_std * state.net.num_sigmas_reject_badlabels; - float badlabels_threshold = rolling_max - progress_badlabels * fabs(rolling_max - final_badlebels_threshold); - badlabels_threshold = max_val_cmp(final_badlebels_threshold, badlabels_threshold); - for (i = 0; i < l.batch * l.outputs; ++i) { - if (fabs(l.delta[i]) > badlabels_threshold) - l.delta[i] = 0; - } - printf(" rolling_std = %f, rolling_max = %f, rolling_avg = %f \n", rolling_std, rolling_max, rolling_avg); - printf(" badlabels loss_threshold = %f, start_it = %d, progress = %f \n", badlabels_threshold, start_point, progress_badlabels *100); - - ep_loss_threshold = min_val_cmp(final_badlebels_threshold, rolling_avg) * progress; - } - - - // reject some percent of the highest deltas to filter bad labels - if (state.net.badlabels_rejection_percentage && start_point < iteration_num) { - if (*state.net.badlabels_reject_threshold == 0) - *state.net.badlabels_reject_threshold = *state.net.delta_rolling_max; - - printf(" badlabels_reject_threshold = %f \n", *state.net.badlabels_reject_threshold); - - const float num_deltas_per_anchor = (l.classes + 4 + 1); - float counter_reject = 0; - float counter_all = 0; - for (i = 0; i < l.batch * l.outputs; ++i) { - if (l.delta[i] != 0) { - counter_all++; - if (fabs(l.delta[i]) > (*state.net.badlabels_reject_threshold)) { - counter_reject++; - l.delta[i] = 0; - } - } - } - float cur_percent = 100 * (counter_reject*num_deltas_per_anchor / counter_all); - if (cur_percent > state.net.badlabels_rejection_percentage) { - *state.net.badlabels_reject_threshold += 0.01; - printf(" increase!!! \n"); - } - else if (*state.net.badlabels_reject_threshold > 0.01) { - *state.net.badlabels_reject_threshold -= 0.01; - printf(" decrease!!! \n"); - } - - printf(" badlabels_reject_threshold = %f, cur_percent = %f, badlabels_rejection_percentage = %f, delta_rolling_max = %f \n", - *state.net.badlabels_reject_threshold, cur_percent, state.net.badlabels_rejection_percentage, *state.net.delta_rolling_max); - } - - - // reject low loss to find equidistant point - if (state.net.equidistant_point && state.net.equidistant_point < iteration_num) { - printf(" equidistant_point loss_threshold = %f, start_it = %d, progress = %3.1f %% \n", ep_loss_threshold, state.net.equidistant_point, progress * 100); - for (i = 0; i < l.batch * l.outputs; ++i) { - if (fabs(l.delta[i]) < ep_loss_threshold) - l.delta[i] = 0; - } - } - } - - if (count == 0) count = 1; - if (class_count == 0) class_count = 1; - - if (l.show_details == 0) { - float loss = pow(mag_array(l.delta, l.outputs * l.batch), 2); - *(l.cost) = loss; - - loss /= l.batch; - - fprintf(stderr, "v3 (%s loss, Normalizer: (iou: %.2f, obj: %.2f, cls: %.2f) Region %d Avg (IOU: %f), count: %d, total_loss = %f \n", - (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, state.index, tot_iou / count, count, loss); - } - else { - // show detailed output - - int stride = l.w*l.h; - float* no_iou_loss_delta = (float *)calloc(l.batch * l.outputs, sizeof(float)); - memcpy(no_iou_loss_delta, l.delta, l.batch * l.outputs * sizeof(float)); - - - int j, n; - for (b = 0; b < l.batch; ++b) { - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w; ++i) { - for (n = 0; n < l.n; ++n) { - int index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); - no_iou_loss_delta[index + 0 * stride] = 0; - no_iou_loss_delta[index + 1 * stride] = 0; - no_iou_loss_delta[index + 2 * stride] = 0; - no_iou_loss_delta[index + 3 * stride] = 0; - } - } - } - } - - float classification_loss = l.obj_normalizer * pow(mag_array(no_iou_loss_delta, l.outputs * l.batch), 2); - free(no_iou_loss_delta); - float loss = pow(mag_array(l.delta, l.outputs * l.batch), 2); - float iou_loss = loss - classification_loss; - - float avg_iou_loss = 0; - *(l.cost) = loss; - /* - // gIOU loss + MSE (objectness) loss - if (l.iou_loss == MSE) { - *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); - } - else { - // Always compute classification loss both for iou + cls loss and for logging with mse loss - // TODO: remove IOU loss fields before computing MSE on class - // probably split into two arrays - if (l.iou_loss == GIOU) { - avg_iou_loss = count > 0 ? l.iou_normalizer * (tot_giou_loss / count) : 0; - } - else { - avg_iou_loss = count > 0 ? l.iou_normalizer * (tot_iou_loss / count) : 0; - } - *(l.cost) = avg_iou_loss + classification_loss; - } - */ - - loss /= l.batch; - classification_loss /= l.batch; - iou_loss /= l.batch; - - fprintf(stderr, "v3 (%s loss, Normalizer: (iou: %.2f, obj: %.2f, cls: %.2f) Region %d Avg (IOU: %f), count: %d, class_loss = %f, iou_loss = %f, total_loss = %f \n", - (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, state.index, tot_iou / count, count, classification_loss, iou_loss, loss); - - //fprintf(stderr, "v3 (%s loss, Normalizer: (iou: %.2f, cls: %.2f) Region %d Avg (IOU: %f, GIOU: %f), Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d, class_loss = %f, iou_loss = %f, total_loss = %f \n", - // (l.iou_loss == MSE ? "mse" : (l.iou_loss == GIOU ? "giou" : "iou")), l.iou_normalizer, l.obj_normalizer, state.index, tot_iou / count, tot_giou / count, avg_cat / class_count, avg_obj / count, avg_anyobj / (l.w*l.h*l.n*l.batch), recall / count, recall75 / count, count, - // classification_loss, iou_loss, loss); - } -} - -void backward_yolo_layer(const layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); -} - -// Converts output of the network to detection boxes -// w,h: image width,height -// netw,neth: network width,height -// relative: 1 (all callers seems to pass TRUE) -void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) -{ - int i; - // network height (or width) - int new_w = 0; - // network height (or width) - int new_h = 0; - // Compute scale given image w,h vs network w,h - // I think this "rotates" the image to match network to input image w/h ratio - // new_h and new_w are really just network width and height - if (letter) { - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - } - else { - new_w = netw; - new_h = neth; - } - // difference between network width and "rotated" width - float deltaw = netw - new_w; - // difference between network height and "rotated" height - float deltah = neth - new_h; - // ratio between rotated network width and network width - float ratiow = (float)new_w / netw; - // ratio between rotated network width and network width - float ratioh = (float)new_h / neth; - for (i = 0; i < n; ++i) { - - box b = dets[i].bbox; - // x = ( x - (deltaw/2)/netw ) / ratiow; - // x - [(1/2 the difference of the network width and rotated width) / (network width)] - b.x = (b.x - deltaw / 2. / netw) / ratiow; - b.y = (b.y - deltah / 2. / neth) / ratioh; - // scale to match rotation of incoming image - b.w *= 1 / ratiow; - b.h *= 1 / ratioh; - - // relative seems to always be == 1, I don't think we hit this condition, ever. - if (!relative) { - b.x *= w; - b.w *= w; - b.y *= h; - b.h *= h; - } - - dets[i].bbox = b; - } -} - -/* -void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) -{ - int i; - int new_w=0; - int new_h=0; - if (letter) { - if (((float)netw / w) < ((float)neth / h)) { - new_w = netw; - new_h = (h * netw) / w; - } - else { - new_h = neth; - new_w = (w * neth) / h; - } - } - else { - new_w = netw; - new_h = neth; - } - for (i = 0; i < n; ++i){ - box b = dets[i].bbox; - b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); - b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); - b.w *= (float)netw/new_w; - b.h *= (float)neth/new_h; - if(!relative){ - b.x *= w; - b.w *= w; - b.y *= h; - b.h *= h; - } - dets[i].bbox = b; - } -} -*/ - -int yolo_num_detections(layer l, float thresh) -{ - int i, n; - int count = 0; - for(n = 0; n < l.n; ++n){ - for (i = 0; i < l.w*l.h; ++i) { - int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); - if(l.output[obj_index] > thresh){ - ++count; - } - } - } - return count; -} - -int yolo_num_detections_batch(layer l, float thresh, int batch) -{ - int i, n; - int count = 0; - for (i = 0; i < l.w*l.h; ++i){ - for(n = 0; n < l.n; ++n){ - int obj_index = entry_index(l, batch, n*l.w*l.h + i, 4); - if(l.output[obj_index] > thresh){ - ++count; - } - } - } - return count; -} - -void avg_flipped_yolo(layer l) -{ - int i,j,n,z; - float *flip = l.output + l.outputs; - for (j = 0; j < l.h; ++j) { - for (i = 0; i < l.w/2; ++i) { - for (n = 0; n < l.n; ++n) { - for(z = 0; z < l.classes + 4 + 1; ++z){ - int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; - int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); - float swap = flip[i1]; - flip[i1] = flip[i2]; - flip[i2] = swap; - if(z == 0){ - flip[i1] = -flip[i1]; - flip[i2] = -flip[i2]; - } - } - } - } - } - for(i = 0; i < l.outputs; ++i){ - l.output[i] = (l.output[i] + flip[i])/2.; - } -} - -int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter) -{ - //printf("\n l.batch = %d, l.w = %d, l.h = %d, l.n = %d \n", l.batch, l.w, l.h, l.n); - int i,j,n; - float *predictions = l.output; - // This snippet below is not necessary - // Need to comment it in order to batch processing >= 2 images - //if (l.batch == 2) avg_flipped_yolo(l); - int count = 0; - for (i = 0; i < l.w*l.h; ++i){ - int row = i / l.w; - int col = i % l.w; - for(n = 0; n < l.n; ++n){ - int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); - float objectness = predictions[obj_index]; - //if(objectness <= thresh) continue; // incorrect behavior for Nan values - if (objectness > thresh) { - //printf("\n objectness = %f, thresh = %f, i = %d, n = %d \n", objectness, thresh, i, n); - int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); - dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h, l.new_coords); - dets[count].objectness = objectness; - dets[count].classes = l.classes; - if (l.embedding_output) { - get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, 0, dets[count].embeddings); - } - - for (j = 0; j < l.classes; ++j) { - int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); - float prob = objectness*predictions[class_index]; - dets[count].prob[j] = (prob > thresh) ? prob : 0; - } - ++count; - } - } - } - correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); - return count; -} - -int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter, int batch) -{ - int i,j,n; - float *predictions = l.output; - //if (l.batch == 2) avg_flipped_yolo(l); - int count = 0; - for (i = 0; i < l.w*l.h; ++i){ - int row = i / l.w; - int col = i % l.w; - for(n = 0; n < l.n; ++n){ - int obj_index = entry_index(l, batch, n*l.w*l.h + i, 4); - float objectness = predictions[obj_index]; - //if(objectness <= thresh) continue; // incorrect behavior for Nan values - if (objectness > thresh) { - //printf("\n objectness = %f, thresh = %f, i = %d, n = %d \n", objectness, thresh, i, n); - int box_index = entry_index(l, batch, n*l.w*l.h + i, 0); - dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h, l.new_coords); - dets[count].objectness = objectness; - dets[count].classes = l.classes; - if (l.embedding_output) { - get_embedding(l.embedding_output, l.w, l.h, l.n*l.embedding_size, l.embedding_size, col, row, n, batch, dets[count].embeddings); - } - - for (j = 0; j < l.classes; ++j) { - int class_index = entry_index(l, batch, n*l.w*l.h + i, 4 + 1 + j); - float prob = objectness*predictions[class_index]; - dets[count].prob[j] = (prob > thresh) ? prob : 0; - } - ++count; - } - } - } - correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); - return count; -} - -#ifdef GPU - -void forward_yolo_layer_gpu(const layer l, network_state state) -{ - if (l.embedding_output) { - layer le = state.net.layers[l.embedding_layer_id]; - cuda_pull_array_async(le.output_gpu, l.embedding_output, le.batch*le.outputs); - } - - //copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); - simple_copy_ongpu(l.batch*l.inputs, state.input, l.output_gpu); - int b, n; - for (b = 0; b < l.batch; ++b){ - for(n = 0; n < l.n; ++n){ - int index = entry_index(l, b, n*l.w*l.h, 0); - // y = 1./(1. + exp(-x)) - // x = ln(y/(1-y)) // ln - natural logarithm (base = e) - // if(y->1) x -> inf - // if(y->0) x -> -inf - if (l.new_coords) { - //activate_array_ongpu(l.output_gpu + index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h - } - else { - activate_array_ongpu(l.output_gpu + index, 2 * l.w*l.h, LOGISTIC); // x,y - - index = entry_index(l, b, n*l.w*l.h, 4); - activate_array_ongpu(l.output_gpu + index, (1 + l.classes)*l.w*l.h, LOGISTIC); // classes and objectness - } - if (l.scale_x_y != 1) scal_add_ongpu(2 * l.w*l.h, l.scale_x_y, -0.5*(l.scale_x_y - 1), l.output_gpu + index, 1); // scale x,y - } - } - if(!state.train || l.onlyforward){ - //cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - if (l.mean_alpha && l.output_avg_gpu) mean_array_gpu(l.output_gpu, l.batch*l.outputs, l.mean_alpha, l.output_avg_gpu); - cuda_pull_array_async(l.output_gpu, l.output, l.batch*l.outputs); - CHECK_CUDA(cudaPeekAtLastError()); - return; - } - - float *in_cpu = (float *)xcalloc(l.batch*l.inputs, sizeof(float)); - cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); - memcpy(in_cpu, l.output, l.batch*l.outputs*sizeof(float)); - float *truth_cpu = 0; - if (state.truth) { - int num_truth = l.batch*l.truths; - truth_cpu = (float *)xcalloc(num_truth, sizeof(float)); - cuda_pull_array(state.truth, truth_cpu, num_truth); - } - network_state cpu_state = state; - cpu_state.net = state.net; - cpu_state.index = state.index; - cpu_state.train = state.train; - cpu_state.truth = truth_cpu; - cpu_state.input = in_cpu; - forward_yolo_layer(l, cpu_state); - //forward_yolo_layer(l, state); - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); - free(in_cpu); - if (cpu_state.truth) free(cpu_state.truth); -} - -void backward_yolo_layer_gpu(const layer l, network_state state) -{ - axpy_ongpu(l.batch*l.inputs, state.net.loss_scale * l.delta_normalizer, l.delta_gpu, 1, state.delta, 1); -} -#endif diff --git a/src/Detector/darknet/src/yolo_layer.h b/src/Detector/darknet/src/yolo_layer.h deleted file mode 100644 index 08883b0f2..000000000 --- a/src/Detector/darknet/src/yolo_layer.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef YOLO_LAYER_H -#define YOLO_LAYER_H - -//#include "darknet.h" -#include "layer.h" -#include "network.h" - -#ifdef __cplusplus -extern "C" { -#endif -layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes, int max_boxes); -void forward_yolo_layer(const layer l, network_state state); -void backward_yolo_layer(const layer l, network_state state); -void resize_yolo_layer(layer *l, int w, int h); -int yolo_num_detections(layer l, float thresh); -int yolo_num_detections_batch(layer l, float thresh, int batch); -int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter); -int get_yolo_detections_batch(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter, int batch); -void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter); - -#ifdef GPU -void forward_yolo_layer_gpu(const layer l, network_state state); -void backward_yolo_layer_gpu(const layer l, network_state state); -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/src/Detector/darknet/src/yolo_v2_class.cpp b/src/Detector/darknet/src/yolo_v2_class.cpp deleted file mode 100644 index fbe7cd3f2..000000000 --- a/src/Detector/darknet/src/yolo_v2_class.cpp +++ /dev/null @@ -1,481 +0,0 @@ -#include "darknet.h" -#include "yolo_v2_class.hpp" - -#include "network.h" - -extern "C" { -#include "detection_layer.h" -#include "region_layer.h" -#include "cost_layer.h" -#include "utils.h" -#include "parser.h" -#include "box.h" -#include "image.h" -#include "demo.h" -#include "option_list.h" -#include "stb_image.h" -} -//#include - -#include -#include -#include -#include - -#define NFRAMES 3 - -//static Detector* detector = NULL; -static std::unique_ptr detector; - -int init(const char *configurationFilename, const char *weightsFilename, int gpu, int batch_size) -{ - detector.reset(new Detector(configurationFilename, weightsFilename, gpu, batch_size)); - return 1; -} - -int detect_image(const char *filename, bbox_t_container &container) -{ - std::vector detection = detector->detect(filename); - for (size_t i = 0; i < detection.size() && i < C_SHARP_MAX_OBJECTS; ++i) - container.candidates[i] = detection[i]; - return detection.size(); -} - -int detect_mat(const uint8_t* data, const size_t data_length, bbox_t_container &container) { -#ifdef OPENCV - std::vector vdata(data, data + data_length); - cv::Mat image = imdecode(cv::Mat(vdata), 1); - - std::vector detection = detector->detect(image); - for (size_t i = 0; i < detection.size() && i < C_SHARP_MAX_OBJECTS; ++i) - container.candidates[i] = detection[i]; - return detection.size(); -#else - return -1; -#endif // OPENCV -} - -int dispose() { - //if (detector != NULL) delete detector; - //detector = NULL; - detector.reset(); - return 1; -} - -int get_device_count() { -#ifdef GPU - int count = 0; - cudaGetDeviceCount(&count); - return count; -#else - return -1; -#endif // GPU -} - -bool built_with_cuda(){ -#ifdef GPU - return true; -#else - return false; -#endif -} - -bool built_with_cudnn(){ -#ifdef CUDNN - return true; -#else - return false; -#endif -} - -bool built_with_opencv(){ -#ifdef OPENCV - return true; -#else - return false; -#endif -} - - -int get_device_name(int gpu, char* deviceName) { -#ifdef GPU - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, gpu); - std::string result = prop.name; - std::copy(result.begin(), result.end(), deviceName); - return 1; -#else - return -1; -#endif // GPU -} - -#ifdef GPU -void check_cuda(cudaError_t status) { - if (status != cudaSuccess) { - const char *s = cudaGetErrorString(status); - printf("CUDA Error Prev: %s\n", s); - } -} -#endif - -struct detector_gpu_t { - network net; - image images[NFRAMES]; - float *avg; - float* predictions[NFRAMES]; - int demo_index; - unsigned int *track_id; -}; - -LIB_API Detector::Detector(std::string cfg_filename, std::string weight_filename, int gpu_id, int batch_size) - : cur_gpu_id(gpu_id) -{ - wait_stream = 0; -#ifdef GPU - int old_gpu_index; - check_cuda( cudaGetDevice(&old_gpu_index) ); -#endif - - detector_gpu_ptr = std::make_shared(); - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - -#ifdef GPU - //check_cuda( cudaSetDevice(cur_gpu_id) ); - cuda_set_device(cur_gpu_id); - printf(" Used GPU %d \n", cur_gpu_id); -#endif - network &net = detector_gpu.net; - net.gpu_index = cur_gpu_id; - //gpu_index = i; - - _cfg_filename = cfg_filename; - _weight_filename = weight_filename; - - char *cfgfile = const_cast(_cfg_filename.c_str()); - char *weightfile = const_cast(_weight_filename.c_str()); - - net = parse_network_cfg_custom(cfgfile, batch_size, batch_size); - if (weightfile) { - load_weights(&net, weightfile); - } - set_batch_network(&net, batch_size); - net.gpu_index = cur_gpu_id; - fuse_conv_batchnorm(net); - - layer l = net.layers[net.n - 1]; - int j; - - detector_gpu.avg = (float *)calloc(l.outputs, sizeof(float)); - for (j = 0; j < NFRAMES; ++j) detector_gpu.predictions[j] = (float*)calloc(l.outputs, sizeof(float)); - for (j = 0; j < NFRAMES; ++j) detector_gpu.images[j] = make_image(1, 1, 3); - - detector_gpu.track_id = (unsigned int *)calloc(l.classes, sizeof(unsigned int)); - for (j = 0; j < l.classes; ++j) detector_gpu.track_id[j] = 1; - -#ifdef GPU - check_cuda( cudaSetDevice(old_gpu_index) ); -#endif -} - - -LIB_API Detector::~Detector() -{ - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - //layer l = detector_gpu.net.layers[detector_gpu.net.n - 1]; - - free(detector_gpu.track_id); - - free(detector_gpu.avg); - for (int j = 0; j < NFRAMES; ++j) free(detector_gpu.predictions[j]); - for (int j = 0; j < NFRAMES; ++j) if (detector_gpu.images[j].data) free(detector_gpu.images[j].data); - -#ifdef GPU - int old_gpu_index; - cudaGetDevice(&old_gpu_index); - cuda_set_device(detector_gpu.net.gpu_index); -#endif - - free_network(detector_gpu.net); - -#ifdef GPU - cudaSetDevice(old_gpu_index); -#endif -} - -LIB_API int Detector::get_net_width() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.w; -} -LIB_API int Detector::get_net_height() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.h; -} -LIB_API int Detector::get_net_color_depth() const { - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - return detector_gpu.net.c; -} - - -LIB_API std::vector Detector::detect(std::string image_filename, float thresh, bool use_mean) -{ - std::shared_ptr image_ptr(new image_t, [](image_t *img) { if (img->data) free(img->data); delete img; }); - *image_ptr = load_image(image_filename); - return detect(*image_ptr, thresh, use_mean); -} - -static image load_image_stb(char *filename, int channels) -{ - int w, h, c; - unsigned char *data = stbi_load(filename, &w, &h, &c, channels); - if (!data) - throw std::runtime_error("file not found"); - if (channels) c = channels; - int i, j, k; - image im = make_image(w, h, c); - for (k = 0; k < c; ++k) { - for (j = 0; j < h; ++j) { - for (i = 0; i < w; ++i) { - int dst_index = i + w*j + w*h*k; - int src_index = k + c*i + c*w*j; - im.data[dst_index] = (float)data[src_index] / 255.; - } - } - } - free(data); - return im; -} - -LIB_API image_t Detector::load_image(std::string image_filename) -{ - char *input = const_cast(image_filename.c_str()); - image im = load_image_stb(input, 3); - - image_t img; - img.c = im.c; - img.data = im.data; - img.h = im.h; - img.w = im.w; - - return img; -} - - -LIB_API void Detector::free_image(image_t m) -{ - if (m.data) { - free(m.data); - } -} - -LIB_API std::vector Detector::detect(image_t img, float thresh, bool use_mean) -{ - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - network &net = detector_gpu.net; -#ifdef GPU - int old_gpu_index; - cudaGetDevice(&old_gpu_index); - if(cur_gpu_id != old_gpu_index) - cudaSetDevice(net.gpu_index); - - net.wait_stream = wait_stream; // 1 - wait CUDA-stream, 0 - not to wait -#endif - //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - - layer l = net.layers[net.n - 1]; - - float *X = img.data; - - float *prediction = network_predict(net, X); - - if (use_mean) { - memcpy(detector_gpu.predictions[detector_gpu.demo_index], prediction, l.outputs * sizeof(float)); - mean_arrays(detector_gpu.predictions, NFRAMES, l.outputs, detector_gpu.avg); - l.output = detector_gpu.avg; - detector_gpu.demo_index = (detector_gpu.demo_index + 1) % NFRAMES; - } - //get_region_boxes(l, 1, 1, thresh, detector_gpu.probs, detector_gpu.boxes, 0, 0); - //if (nms) do_nms_sort(detector_gpu.boxes, detector_gpu.probs, l.w*l.h*l.n, l.classes, nms); - - int nboxes = 0; - int letterbox = 0; - float hier_thresh = 0.5; - detection *dets = get_network_boxes(&net, img.w, img.h, thresh, hier_thresh, 0, 1, &nboxes, letterbox); - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - - std::vector bbox_vec; - - for (int i = 0; i < nboxes; ++i) { - box b = dets[i].bbox; - int const obj_id = max_index(dets[i].prob, l.classes); - float const prob = dets[i].prob[obj_id]; - - if (prob > thresh) - { - bbox_t bbox; - bbox.x = std::max((double)0, (b.x - b.w / 2.)* img.w); - bbox.y = std::max((double)0, (b.y - b.h / 2.)* img.h); - bbox.w = b.w* img.w; - bbox.h = b.h* img.h; - bbox.obj_id = obj_id; - bbox.prob = prob; - bbox.track_id = 0; - bbox.frames_counter = 0; - bbox.x_3d = NAN; - bbox.y_3d = NAN; - bbox.z_3d = NAN; - - bbox_vec.push_back(bbox); - } - } - - free_detections(dets, nboxes); - -#ifdef GPU - if (cur_gpu_id != old_gpu_index) - cudaSetDevice(old_gpu_index); -#endif - - return bbox_vec; -} - -LIB_API std::vector> Detector::detectBatch(image_t img, int batch_size, int width, int height, float thresh) -{ - detector_gpu_t &detector_gpu = *static_cast(detector_gpu_ptr.get()); - network &net = detector_gpu.net; -#ifdef GPU - int old_gpu_index; - cudaGetDevice(&old_gpu_index); - if(cur_gpu_id != old_gpu_index) - cudaSetDevice(net.gpu_index); - - net.wait_stream = wait_stream; // 1 - wait CUDA-stream, 0 - not to wait -#endif - //std::cout << "net.gpu_index = " << net.gpu_index << std::endl; - - layer l = net.layers[net.n - 1]; - - float hier_thresh = 0.5; - image in_img; - in_img.c = img.c; - in_img.w = img.w; - in_img.h = img.h; - in_img.data = img.data; - det_num_pair* prediction = network_predict_batch(&net, in_img, batch_size, width, height, thresh, hier_thresh, 0, 0, 0); - - std::vector> bbox_vec(batch_size); - - for (int bi = 0; bi < batch_size; ++bi) - { - auto dets = prediction[bi].dets; - for (int i = 0; i < prediction[bi].num; ++i) - { - box b = dets[i].bbox; - int const obj_id = max_index(dets[i].prob, l.classes); - float const prob = dets[i].prob[obj_id]; - - if (prob > thresh) - { - bbox_t bbox; - bbox.x = std::max((double)0, (b.x - b.w / 2.)); - bbox.y = std::max((double)0, (b.y - b.h / 2.)); - bbox.w = b.w; - bbox.h = b.h; - bbox.obj_id = obj_id; - bbox.prob = prob; - bbox.track_id = 0; - bbox.frames_counter = 0; - bbox.x_3d = NAN; - bbox.y_3d = NAN; - bbox.z_3d = NAN; - - bbox_vec[bi].push_back(bbox); - } - } - } - free_batch_detections(prediction, batch_size); - -#ifdef GPU - if (cur_gpu_id != old_gpu_index) - cudaSetDevice(old_gpu_index); -#endif - - return bbox_vec; -} - -LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_vec, bool const change_history, - int const frames_story, int const max_dist) -{ - detector_gpu_t &det_gpu = *static_cast(detector_gpu_ptr.get()); - - bool prev_track_id_present = false; - for (auto &i : prev_bbox_vec_deque) - if (i.size() > 0) prev_track_id_present = true; - - if (!prev_track_id_present) { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; - prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); - return cur_bbox_vec; - } - - std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); - - for (auto &prev_bbox_vec : prev_bbox_vec_deque) { - for (auto &i : prev_bbox_vec) { - int cur_index = -1; - for (size_t m = 0; m < cur_bbox_vec.size(); ++m) { - bbox_t const& k = cur_bbox_vec[m]; - if (i.obj_id == k.obj_id) { - float center_x_diff = (float)(i.x + i.w/2) - (float)(k.x + k.w/2); - float center_y_diff = (float)(i.y + i.h/2) - (float)(k.y + k.h/2); - unsigned int cur_dist = sqrt(center_x_diff*center_x_diff + center_y_diff*center_y_diff); - if (cur_dist < max_dist && (k.track_id == 0 || dist_vec[m] > cur_dist)) { - dist_vec[m] = cur_dist; - cur_index = m; - } - } - } - - bool track_id_absent = !std::any_of(cur_bbox_vec.begin(), cur_bbox_vec.end(), - [&i](bbox_t const& b) { return b.track_id == i.track_id && b.obj_id == i.obj_id; }); - - if (cur_index >= 0 && track_id_absent){ - cur_bbox_vec[cur_index].track_id = i.track_id; - cur_bbox_vec[cur_index].w = (cur_bbox_vec[cur_index].w + i.w) / 2; - cur_bbox_vec[cur_index].h = (cur_bbox_vec[cur_index].h + i.h) / 2; - } - } - } - - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) - if (cur_bbox_vec[i].track_id == 0) - cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; - - if (change_history) { - prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); - } - - return cur_bbox_vec; -} - - -void *Detector::get_cuda_context() -{ -#ifdef GPU - int old_gpu_index; - cudaGetDevice(&old_gpu_index); - if (cur_gpu_id != old_gpu_index) - cudaSetDevice(cur_gpu_id); - - void *cuda_context = cuda_get_context(); - - if (cur_gpu_id != old_gpu_index) - cudaSetDevice(old_gpu_index); - - return cuda_context; -#else // GPU - return NULL; -#endif // GPU -} diff --git a/src/Detector/pedestrians/c4-pedestrian-detector.cpp b/src/Detector/pedestrians/c4-pedestrian-detector.cpp deleted file mode 100644 index cc4febaac..000000000 --- a/src/Detector/pedestrians/c4-pedestrian-detector.cpp +++ /dev/null @@ -1,291 +0,0 @@ -#include "c4-pedestrian-detector.h" - -/*****************************************/ -// Pedestrian_ICRA.cpp -/*****************************************/ - -// --------------------------------------------------------------------- -// Helper functions - -// compute the Sobel image "ct" from "original" -void ComputeCT(IntImage& original,IntImage& ct) -{ - ct.Create(original.nrow,original.ncol); - for(int i=2; i& result) -{ - std::ifstream in(modelfile); - if(in.good()==false) - { - std::cout<<"SVM model "<>buffer; - assert(buffer=="nr_feature"); - int num_dim = m; - in>>num_dim; - assert(num_dim>0 && num_dim==m); - std::getline(in,buffer); // end of line 4 - in>>buffer; - assert(buffer=="bias"); - int bias; - in>>bias; - std::getline(in,buffer); //end of line 5; - in>>buffer; - assert(buffer=="w"); - std::getline(in,buffer); //end of line 6 - result.Create(1,num_dim); - for(int i=0; i>result.buf[i]; - double rho = 0; - if(bias>=0) in>>rho; - in.close(); - return rho; -} - -// Load SVM models -- Histogram Intersectin Kernel SVM trained by libHIK -double UseSVM_CD_FastEvaluationStructure(const char* modelfile, const int m, const int upper_bound, Array2dC& result) -{ - - std::ifstream fs(modelfile, std::fstream::binary); - if( !fs.is_open() ) - { - std::cout << "SVM model " << modelfile << " can not be loaded." << std::endl; - exit(-1); - } - // Header - int rows, cols, type, channels; - fs.read((char*)&rows, sizeof(int)); // rows - fs.read((char*)&cols, sizeof(int)); // cols - fs.read((char*)&type, sizeof(int)); // type - fs.read((char*)&channels, sizeof(int)); // channels - - // Data - cv::Mat mat(rows, cols, type); - fs.read((char*)mat.data, CV_ELEM_SIZE(type) * static_cast(rows) * static_cast(cols)); - - int num_dim = m; - - result.Create(num_dim, upper_bound); - for(int i=0; i(i, j); - } - - return -0.00455891; -} - -// End of Helper functions -// --------------------------------------------------------------------- - -// --------------------------------------------------------------------- -// Functions that load the two classifiers -void LoadCascade(std::string cascade1, std::string cascade2, DetectionScanner& ds) -{ - std::vector types; - std::vector upper_bounds; - std::vector filenames; - - types.push_back(NodeDetector::CD_LIN); // first node - upper_bounds.push_back(100); - filenames.push_back(cascade1); - types.push_back(NodeDetector::CD_HIK); // second node - upper_bounds.push_back(353); - filenames.push_back(cascade2); - - ds.LoadDetector(types,upper_bounds,filenames); - // You can adjust these parameters for different speed, accuracy etc - ds.cascade->nodes[0]->thresh += 0.8; - ds.cascade->nodes[1]->thresh -= 0.095; -} - -void DetectionScanner::LoadDetector(std::vector& types,std::vector& upper_bounds,std::vector& filenames) -{ - size_t depth = types.size(); - assert(depth>0 && depth==upper_bounds.size() && depth==filenames.size()); - if(cascade) - delete cascade; - cascade = new CascadeDetector; - assert(xdiv>0 && ydiv>0); - for(size_t i=0; iAddNode(types[i],(xdiv-EXT)*(ydiv-EXT)*baseflength,upper_bounds[i],filenames[i].c_str()); - - hist.Create(1,baseflength*(xdiv-EXT)*(ydiv-EXT)); -} - -void NodeDetector::Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename) -{ - type = _type; - index = _index; - filename = _filename; - featurelength = _featurelength; - upper_bound = _upper_bound; - if(type==CD_LIN) - thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,classifier); - else if(type==CD_HIK) - thresh = UseSVM_CD_FastEvaluationStructure(_filename,_featurelength,upper_bound,classifier); - - if(type==CD_LIN) type = LINEAR; - if(type==CD_HIK) type = HISTOGRAM; -} - -void CascadeDetector::AddNode(const NodeDetector::NodeType _type,const int _featurelength,const int _upper_bound,const char* _filename) -{ - if(length==size) - { - int newsize = size * 2; - NodeDetector** p = new NodeDetector*[newsize]; - assert(p!=NULL); - std::copy(nodes,nodes+size,p); - size = newsize; - delete[] nodes; - nodes = p; - } - nodes[length] = new NodeDetector(_type,_featurelength,_upper_bound,length,_filename); - length++; -} - -// End of functions that load the two classifiers -// --------------------------------------------------------------------- - -// --------------------------------------------------------------------- -// Detection functions - -// initialization -- compute the Census Tranform image for CENTRIST -void DetectionScanner::InitImage(IntImage& original) -{ - image = original; - image.Sobel(sobel,false,false); - ComputeCT(sobel,ct); -} - -// combine the (xdiv-1)*(ydiv-1) integral images into a single one -void DetectionScanner::InitIntegralImages(const int stepsize) -{ - if(cascade->nodes[0]->type!=NodeDetector::LINEAR) - return; // No need to prepare integral images - - const int hd = height/xdiv*2-2; - const int wd = width/ydiv*2-2; - scores.Create(ct.nrow,ct.ncol); - scores.Zero(cascade->nodes[0]->thresh/hd/wd); - double* linearweights = cascade->nodes[0]->classifier.buf; - for(int i=0; i& original,std::vector& results,const int stepsize) -{ - if(original.nrownodes[1]; - double** pc = node->classifier.p; - int oheight = original.nrow, owidth = original.ncol; - cv::Rect rect; - while(image.nrow>=height && image.ncol>=width) - { - InitIntegralImages(stepsize); - for(int i=2; i+heightthresh; - for(int k=0; kclassifier.nrow; k++) score += pc[k][hist.buf[k]]; - if(score>0) - { - rect.y = i * oheight / image.nrow; - rect.height = (oheight * height) / image.nrow + 1; - rect.x = j * owidth / image.ncol; - rect.width = (width * owidth) /image.ncol + 1; - results.push_back(rect); - } - } - } - ResizeImage(); - } - return 0; -} - -// End of Detection functions -// --------------------------------------------------------------------- diff --git a/src/Detector/pedestrians/c4-pedestrian-detector.h b/src/Detector/pedestrians/c4-pedestrian-detector.h deleted file mode 100644 index 713579781..000000000 --- a/src/Detector/pedestrians/c4-pedestrian-detector.h +++ /dev/null @@ -1,561 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define USE_DOUBLE - -#ifdef USE_DOUBLE -typedef double REAL; -#else -typedef float REAL; -#endif - -template class Array2dC; - -template -class Array2d -{ -public: - int nrow; - int ncol; - T** p; -public: - Array2d():nrow(0),ncol(0),p(NULL) { } - Array2d(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL) - { - Create(nrow,ncol); - } - Array2d(const Array2d& source); - virtual ~Array2d() - { - Clear(); - } - - Array2d& operator=(const Array2d& source); - void Create(const int _nrow,const int _ncol); - void Swap(Array2d& array2); - void Clear(); - void Zero(const T t = 0); -}; - -template -class Array2dC -{ -public: - int nrow; - int ncol; - T** p; - T* buf; -public: - Array2dC():nrow(0),ncol(0),p(NULL),buf(NULL) {} - Array2dC(const int nrow,const int ncol):nrow(0),ncol(0),p(NULL),buf(NULL) - { - Create(nrow,ncol); - } - Array2dC(const Array2dC& source); - virtual ~Array2dC() - { - Clear(); - } - - Array2dC& operator=(const Array2dC& source); - void Create(const int _nrow,const int _ncol); - void Swap(Array2dC& array2); - void Zero(const T t = 0); - void Clear(); -}; - -template -Array2d::Array2d(const Array2d& source):nrow(0),ncol(0),p(NULL) -{ - if(source.p!=NULL) - { - Create(source.nrow,source.ncol); - for(int i=0; i -Array2d& Array2d::operator=(const Array2d& source) -{ - if(source.p!=NULL) - { - Create(source.nrow,source.ncol); - for(int i=0; i -void Array2d::Create(const int _nrow,const int _ncol) -{ - assert(_nrow>0 && _ncol>0); - Clear(); - nrow = _nrow; - ncol = _ncol; - p = new T*[nrow]; - assert(p!=NULL); - for(int i=0; i -void Array2d::Swap(Array2d& array2) -{ - std::swap(nrow,array2.nrow); - std::swap(ncol,array2.ncol); - std::swap(p,array2.p); -} - -template -void Array2d::Zero(const T t) -{ - if(nrow>0) - { - for(int i=0; i -void Array2d::Clear() -{ - for(int i=0; i -Array2dC::Array2dC(const Array2dC& source):nrow(0),ncol(0),p(NULL),buf(NULL) -{ - if(source.buf!=NULL) - { - Create(source.nrow,source.ncol); - std::copy(source.buf,source.buf+nrow*ncol,buf); - } -} - -template -Array2dC& Array2dC::operator=(const Array2dC& source) -{ - if(source.buf!=NULL) - { - Create(source.nrow,source.ncol); - std::copy(source.buf,source.buf+nrow*ncol,buf); - } - else - Clear(); - return *this; -} - -template -void Array2dC::Create(const int _nrow,const int _ncol) -{ - assert(_nrow>0 && _ncol>0); - if(nrow==_nrow && ncol==_ncol) return; - Clear(); - nrow = _nrow; - ncol = _ncol; - buf = new T[nrow*ncol]; - assert(buf!=NULL); - p = new T*[nrow]; - assert(p!=NULL); - for(int i=0; i -void Array2dC::Swap(Array2dC& array2) -{ - std::swap(nrow,array2.nrow); - std::swap(ncol,array2.ncol); - std::swap(p,array2.p); - std::swap(buf,array2.buf); -} - -template -void Array2dC::Zero(const T t) -{ - if(nrow>0) std::fill(buf,buf+nrow*ncol,t); -} - -template -void Array2dC::Clear() -{ - delete[] buf; - buf = NULL; - delete[] p; - p = NULL; - nrow = ncol = 0; -} - - -/*****************************************/ -// IntImage.h -/*****************************************/ - -template -class IntImage:public Array2dC -{ -private: - IntImage(const IntImage &source) { } // prohibit copy constructor - -public: - IntImage():variance(0.0),label(-1) { } - virtual ~IntImage() - { - Clear(); - } - - virtual void Clear(void); - inline void SetSize(const int h, const int w); - bool Load(cv::Mat img); - void Save(const std::string& filename) const; - void Swap(IntImage& image2); - - void CalcIntegralImageInPlace(void); - void Resize(IntImage &result,const REAL ratio) const; - void Resize(IntImage& result,const int height,const int width) const; - - IntImage& operator=(const IntImage& source); - - void Sobel(IntImage& result,const bool useSqrt,const bool normalize); -public: - using Array2dC::nrow; - using Array2dC::ncol; - using Array2dC::buf; - using Array2dC::p; - REAL variance; - int label; -}; - -template -void IntImage::Clear(void) -{ - Array2dC::Clear(); - variance = 0.0; - label = -1; -} - -template -bool IntImage::Load(cv::Mat img) -{ - if (img.empty()) return false; - - SetSize(img.rows, img.cols); - for(int i=0,ih=img.rows,iw=img.cols; i(img.data+img.step*i); - for(int j=0; j -void IntImage::Save(const std::string& filename) const -{ -#if (CV_VERSION_MAJOR < 4) - IplImage* img; - - img = cvCreateImage(cvSize(ncol,nrow),IPL_DEPTH_8U,1); - for(int i=0,ih=img->height,iw=img->width; i(img->imageData+img->widthStep*i); - for(int j=0; j -void IntImage::SetSize(const int h,const int w) -{ - if((h == nrow) && (w == ncol)) return; - Clear(); - Array2dC::Create(h,w); -} - -template -IntImage& IntImage::operator=(const IntImage& source) -{ - if(&source==this) return *this; - SetSize(source.nrow,source.ncol); - std::copy(source.buf,source.buf+nrow*ncol,buf); - label = source.label; - variance = source.variance; - return *this; -} - -template -void IntImage::Resize(IntImage &result,const REAL ratio) const -{ - Resize(result,int(nrow*ratio),int(ncol*ratio)); -} - -template -void IntImage::Resize(IntImage& result,const int height,const int width) const -{ - assert(height>0 && width>0); - result.SetSize(height,width); - REAL ixratio = nrow*1.0/height, iyratio = ncol*1.0/width; - - REAL* p_y = new REAL[result.ncol]; - assert(p_y!=NULL); - int* p_y0 = new int[result.ncol]; - assert(p_y0!=NULL); - for(int i=0; i -void IntImage::CalcIntegralImageInPlace(void) -// We pad a zero column and a zero row, so 24*24 image will be 25*25 in size -// if the input image is not padded, the results on 1st row will be problematic -{ - for(int i=1; i -void IntImage::Swap(IntImage& image2) -{ - Array2dC::Swap(image2); - std::swap(variance,image2.variance); - std::swap(label,image2.label); -} - -template -void IntImage::Sobel(IntImage& result,const bool useSqrt,const bool normalize) -{ - // compute the Sobel gradient. For now, we just use the very inefficient way. Optimization can be done later -// if useSqrt = true, we compute the real Sobel gradient; otherwise, the square of it -// if normalize = true, the numbers are normalized to be in 0..255 - result.Create(nrow,ncol); - for(int i=0; imaxv) - maxv = result.p[i][j]; - } - } - for(int i=0; i classifier; - double thresh; - int featurelength; - int upper_bound; - int index; - std::string filename; -public: - NodeDetector(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename) - { - Load(_type,_featurelength,_upper_bound,_index,_filename); - minvalue = DBL_MAX; - maxvalue = -minvalue; - } - ~NodeDetector() = default; - - void Load(const NodeType _type,const int _featurelength,const int _upper_bound,const int _index,const char* _filename); - bool Classify(int* f); -private: - double minvalue; - double maxvalue; -public: - void SetValues(const double v) - { - if(v>maxvalue) maxvalue = v; - if(v& types,std::vector& upper_bounds,std::vector& filenames); - - int Scan(IntImage& original,std::vector& results,const int stepsize,const int round,std::ofstream* out,const int upper_bound); - int FastScan(IntImage& original,std::vector& results,const int stepsize); - int FeatureLength() const - { - return (xdiv-1)*(ydiv-1)*baseflength; - } - - - int height; - int width; - int xdiv; - int ydiv; - int baseflength; - double ratio; - static const int EXT = 1; - - CascadeDetector* cascade; - -private: - IntImage* integrals; - IntImage image,sobel; - IntImage ct; - Array2dC hist; - IntImage scores; - - void InitImage(IntImage& original); - void InitIntegralImages(const int stepsize); - void ResizeImage(); -}; - -void LoadCascade(std::string cascade1, std::string cascade2, DetectionScanner& ds); diff --git a/src/Detector/tensorrt_yolo/API.h b/src/Detector/tensorrt_onnx/API.h similarity index 100% rename from src/Detector/tensorrt_yolo/API.h rename to src/Detector/tensorrt_onnx/API.h diff --git a/src/Detector/tensorrt_yolo/CMakeLists.txt b/src/Detector/tensorrt_onnx/CMakeLists.txt similarity index 65% rename from src/Detector/tensorrt_yolo/CMakeLists.txt rename to src/Detector/tensorrt_onnx/CMakeLists.txt index c3b4ecec2..257dd3e28 100644 --- a/src/Detector/tensorrt_yolo/CMakeLists.txt +++ b/src/Detector/tensorrt_onnx/CMakeLists.txt @@ -30,35 +30,46 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror ${CUDA_WARNING} -restrict") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_61,code=sm_61") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_52,code=sm_52") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_35,code=sm_35") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_30,code=sm_30") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=sm_75") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_75,code=compute_75") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_70,code=sm_70") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_72,code=sm_72") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_80,code=sm_80") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_86,code=sm_86") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_86,code=sm_89") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_89,code=sm_90") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_90,code=compute_75") SET(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake) find_package(CUDNN REQUIRED) find_package(TensorRT REQUIRED) +message("TensorRT version: " ${TensorRT_VERSION}) + include_directories(${OpenCV_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${CUDNN_INCLUDE_DIR}) include_directories(${TensorRT_INCLUDE_DIRS}) +include_directories(${PROJECT_SOURCE_DIR}/../../mtracking) -file(GLOB TENSORRT_SOURCE_FILES *.cpp) -file(GLOB TENSORRT_HEADER_FILES *.h) +file(GLOB TENSORRT_SOURCE_FILES *.cpp common/*.cpp) +file(GLOB TENSORRT_HEADER_FILES *.h* common/*.h*) file(GLOB TENSORRT_CUDA_FILES *.cu) cuda_add_library(${libname_rt} SHARED ${TENSORRT_CUDA_FILES} ${TENSORRT_SOURCE_FILES} - ${TENSORRT_HEADER_FILES} -) + ${TENSORRT_HEADER_FILES}) #message("TensorRT OpenCV libraries:") #message("${OpenCV_LIBS}") #message(${OpenCV_DIR}) +set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_nvinfer_LIBRARY} ${TensorRT_nvinfer_plugin_LIBRARY} ${TensorRT_nvonnxparser_LIBRARY}) + +message("TensorRT_LIBRARIES: ${TensorRT_LIBRARIES}") + + set(TENSORRT_LIBS ${OpenCV_LIBS} #${CUDA_LIBRARIES} @@ -68,11 +79,21 @@ set(TENSORRT_LIBS ${CUDA_curand_LIBRARY} ${CUDNN_LIBRARY} # ${LIB_PTHREAD} - ${TensorRT_LIBRARIES} -) + ${TensorRT_LIBRARIES}) if (CMAKE_COMPILER_IS_GNUCXX) - set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs nvinfer_plugin) + set(TENSORRT_LIBS ${TENSORRT_LIBS} stdc++fs nvinfer_plugin nvonnxparser) endif(CMAKE_COMPILER_IS_GNUCXX) +message("TENSORRT_LIBS: ${TENSORRT_LIBS}") + target_link_libraries(${libname_rt} ${TENSORRT_LIBS}) + +install(TARGETS ${libname_rt} + EXPORT MTTrackingExports + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) + +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Detector/tensorrt_onnx/DFINE_bb.hpp b/src/Detector/tensorrt_onnx/DFINE_bb.hpp new file mode 100644 index 000000000..a75bae2b9 --- /dev/null +++ b/src/Detector/tensorrt_onnx/DFINE_bb.hpp @@ -0,0 +1,121 @@ +#pragma once + +#include "YoloONNX.hpp" + +#include "nms.h" + +/// +/// \brief The DFINE_bb_onnx class +/// +class DFINE_bb_onnx : public YoloONNX +{ +public: + DFINE_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + inputTensorNames.push_back("orig_target_sizes"); + outputTensorNames.push_back("scores"); + outputTensorNames.push_back("labels"); + outputTensorNames.push_back("boxes"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector tmpBoxes; + + //0: name: images, size : 1x3x640x640 + //1: name: orig_target_sizes, size : 1x2 + //2: name: labels, size : 1x300 + //3: name: boxes, size : 1x300x4 + //4: name: scores, size : 1x300 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + auto labels = (const int64_t*)outputs[1]; + auto boxes = outputs[2]; + auto scores = outputs[0]; + +#if 0 + std::cout << "scores mem:\n"; + for (size_t ii = 0; ii < 15; ++ii) + { + std::cout << ii << ": "; + for (size_t jj = 0; jj < 20; ++jj) + { + std::cout << scores[ii * 20 + jj] << " "; + } + std::cout << ";" << std::endl; + } + std::cout << std::endl; + + std::cout << "labels mem:\n"; + for (size_t ii = 0; ii < 15; ++ii) + { + std::cout << ii << ": "; + for (size_t jj = 0; jj < 20; ++jj) + { + std::cout << labels[ii * 20 + jj] << " "; + } + std::cout << ";" << std::endl; + } + std::cout << std::endl; + + std::cout << "boxes mem:\n"; + for (size_t ii = 0; ii < 15; ++ii) + { + std::cout << ii << ": "; + for (size_t jj = 0; jj < 20; ++jj) + { + std::cout << boxes[ii * 20 + jj] << " "; + } + std::cout << ";" << std::endl; + } + std::cout << std::endl; + + std::cout << "m_outpuDims[0].d[1] = " << m_outpuDims[0].d[1] << std::endl; +#endif + + for (size_t i = 0; i < static_cast(m_outpuDims[0].d[1]); ++i) + { + float classConf = scores[i]; + int64_t classId = labels[i]; + + //if (classId > 0) + // --classId; + + if (classConf >= m_params.m_confThreshold) + { + auto ind = i * m_outpuDims[1].d[2]; + float x = fw * (boxes[ind + 0] - m_resizedROI.x); + float y = fh * (boxes[ind + 1] - m_resizedROI.y); + float width = fw * (boxes[ind + 2] - boxes[ind + 0]); + float height = fh * (boxes[ind + 3] - boxes[ind + 1]); + + //std::cout << "ind = " << ind << ", boxes[0] = " << boxes[ind + 0] << ", boxes[1] = " << boxes[ind + 1] << ", boxes[2] = " << boxes[ind + 2] << ", boxes[3] = " << boxes[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << std::endl; + + tmpBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + std::vector resBoxes; + resBoxes.reserve(tmpBoxes.size()); + + nms3(tmpBoxes, resBoxes, static_cast(0.3), + [](const tensor_rt::Result& reg) { return reg.m_brect; }, + [](const tensor_rt::Result& reg) { return reg.m_prob; }, + [](const tensor_rt::Result& reg) { return reg.m_id; }, + 0, static_cast(0)); + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/DFINE_is.hpp b/src/Detector/tensorrt_onnx/DFINE_is.hpp new file mode 100644 index 000000000..84c46ed9f --- /dev/null +++ b/src/Detector/tensorrt_onnx/DFINE_is.hpp @@ -0,0 +1,194 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The DFINE_is_onnx class +/// +class DFINE_is_onnx : public YoloONNX +{ +public: + DFINE_is_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("logits"); + outputTensorNames.push_back("boxes"); + outputTensorNames.push_back("mask_probs"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size: 1x3x640x640 + //1: name: logits, size: 1x300x80 + //2: name: boxes, size: 1x300x4 + //3: name: mask_probs, size: 1x300x160x160 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + int labelsInd = 0; + int detsInd = 1; + int segInd = 2; + + auto dets = outputs[detsInd]; + auto labels = outputs[labelsInd]; + + auto masks = outputs[segInd]; + + size_t ncInd = 2; + size_t lenInd = 1; + + + size_t nc = m_outpuDims[labelsInd].d[ncInd]; + size_t len = static_cast(m_outpuDims[detsInd].d[lenInd]) / m_params.m_explicitBatchSize; + auto volume0 = len * m_outpuDims[detsInd].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[labelsInd].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + int segChannels = static_cast(m_outpuDims[segInd].d[1]); + int segWidth = static_cast(m_outpuDims[segInd].d[2]); + int segHeight = static_cast(m_outpuDims[segInd].d[3]); + masks += imgIdx * segChannels * segWidth * segHeight; + + cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1); + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.m_confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + + if (classConf >= m_params.m_confThreshold) + { + float d0 = dets[0]; + float d1 = dets[1]; + float d2 = dets[2]; + float d3 = dets[3]; + + float x = fw * (inputSizef.width * (d0 - d2 / 2.f) - m_resizedROI.x); + float y = fh * (inputSizef.height * (d1 - d3 / 2.f) - m_resizedROI.y); + float width = fw * inputSizef.width * d2; + float height = fh * inputSizef.height * d3; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect2f(x, y, width, height) << std::endl; + // std::cout << "dets = " << d0 << ", " << d1 << ", " << d2 << ", " << d3 << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + double maskThreshold = 0.1; + for (int row = 0; row < segHeight; ++row) + { + const float* maskPtr = masks + row * segWidth; + uchar* binMaskPtr = binaryMask8U.ptr(row); + + for (int col = 0; col < segWidth; ++col) + { + binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0; + } + } + + tensor_rt::Result& resObj = resBoxes.back(); + + cv::Rect smallRect; + smallRect.x = cvRound(segHeight * (d0 - d2 / 2.f)); + smallRect.y = cvRound(segHeight * (d1 - d3 / 2.f)); + smallRect.width = cvRound(segHeight * d2); + smallRect.height = cvRound(segHeight * d3); + smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight)); + + if (smallRect.area() > 0) + { + cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST); + +#if 0 + static int globalObjInd = 0; + SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true); + SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true); + SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true); + SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true); + std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; +#endif + +#if 0 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resObj.m_brect.x; + int dy = resObj.m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = rr.center.x * fw + dx; + rr.center.y = rr.center.y * fw + dy; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound(dx + br.x * fw); + br.y = cvRound(dy + br.y * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resObj.m_brect = br; + //resObj.m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + else + { + resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255)); + } + } + + dets += m_outpuDims[detsInd].d[ncInd]; + labels += m_outpuDims[labelsInd].d[ncInd]; + masks += segWidth * segHeight; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/RFDETR_bb.hpp b/src/Detector/tensorrt_onnx/RFDETR_bb.hpp new file mode 100644 index 000000000..ea2c86ffc --- /dev/null +++ b/src/Detector/tensorrt_onnx/RFDETR_bb.hpp @@ -0,0 +1,113 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The RFDETR_bb_onnx class +/// +class RFDETR_bb_onnx : public YoloONNX +{ +public: + RFDETR_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("dets"); + outputTensorNames.push_back("labels"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size : 1x3x560x560 + //1: name: dets, size : 1x300x4 + //2: name: labels, size : 1x300x91 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + auto dets = outputs[0]; + auto labels = outputs[1]; + + size_t ncInd = 2; + size_t lenInd = 1; + + size_t nc = m_outpuDims[1].d[ncInd]; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.m_explicitBatchSize; + auto volume0 = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[1].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + //for (size_t i = 0; i < len; ++i) + //{ + // std::cout << "labels: "; + // for (size_t j = 0; j < m_outpuDims[1].d[ncInd]; ++j) + // { + // std::cout << labels[j] << " | "; + // } + // std::cout << std::endl; + // + // std::cout << "dets: "; + // for (size_t j = 0; j < m_outpuDims[0].d[ncInd]; ++j) + // { + // std::cout << dets[j] << " | "; + // } + // std::cout << std::endl; + //} + + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (m_inputDims[0].d[2] * (dets[0] - dets[2] / 2.f) - m_resizedROI.x); + float y = fh * (m_inputDims[0].d[3] * (dets[1] - dets[3] / 2.f) - m_resizedROI.y); + float width = fw * m_inputDims[0].d[2] * dets[2]; + float height = fh * m_inputDims[0].d[3] * dets[3]; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + // std::cout << "dets = " << dets[0] << ", " << dets[1] << ", " << dets[2] << ", " << dets[3] << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + + dets += m_outpuDims[0].d[ncInd]; + labels += m_outpuDims[1].d[ncInd]; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/RFDETR_is.hpp b/src/Detector/tensorrt_onnx/RFDETR_is.hpp new file mode 100644 index 000000000..d2f1988ed --- /dev/null +++ b/src/Detector/tensorrt_onnx/RFDETR_is.hpp @@ -0,0 +1,188 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The RFDETR_is_onnx class +/// +class RFDETR_is_onnx : public YoloONNX +{ +public: + RFDETR_is_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("input"); + outputTensorNames.push_back("dets"); + outputTensorNames.push_back("labels"); + outputTensorNames.push_back("4245"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: input, size: 1x3x432x432 + //1: name: dets, size: 1x200x4 + //2: name: labels, size: 1x200x91 + //3: name: 4245, size: 1x200x108x108 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + cv::Size inputSize(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); + cv::Size2f inputSizef(static_cast(inputSize.width), static_cast(inputSize.height)); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << frameSize << ", fw_h: " << cv::Size2f(fw, fh) << ", m_inputDims: " << cv::Point3i(m_inputDims.d[1], m_inputDims.d[2], m_inputDims.d[3]) << std::endl; + + auto dets = outputs[0]; + auto labels = outputs[1]; + + int segInd = 2; + auto masks = outputs[segInd]; + + size_t ncInd = 2; + size_t lenInd = 1; + + + size_t nc = m_outpuDims[1].d[ncInd]; + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.m_explicitBatchSize; + auto volume0 = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + dets += volume0 * imgIdx; + auto volume1 = len * m_outpuDims[1].d[ncInd]; // Volume(m_outpuDims[0]); + labels += volume1 * imgIdx; + + int segChannels = static_cast(m_outpuDims[segInd].d[1]); + int segWidth = static_cast(m_outpuDims[segInd].d[2]); + int segHeight = static_cast(m_outpuDims[segInd].d[3]); + masks += imgIdx * segChannels * segWidth * segHeight; + + cv::Mat binaryMask8U(segHeight, segWidth, CV_8UC1); + + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume0 = " << volume0 << ", volume1 = " << volume1 << std::endl; + + auto L2Conf = [](float v) + { + return 1.f / (1.f + std::exp(-v)); + }; + + for (size_t i = 0; i < len; ++i) + { + float classConf = L2Conf(labels[0]); + size_t classId = 0; + for (size_t cli = 1; cli < nc; ++cli) + { + auto conf = L2Conf(labels[cli]); + if (classConf < conf) + { + classConf = conf; + classId = cli; + } + } + if (classId > 0) + --classId; + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (inputSizef.width * (dets[0] - dets[2] / 2.f) - m_resizedROI.x); + float y = fh * (inputSizef.height * (dets[1] - dets[3] / 2.f) - m_resizedROI.y); + float width = fw * inputSizef.width * dets[2]; + float height = fh * inputSizef.height * dets[3]; + + //if (i == 0) + //{ + // std::cout << i << ": classConf = " << classConf << ", classId = " << classId << " (" << labels[classId] << "), rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + // std::cout << "dets = " << dets[0] << ", " << dets[1] << ", " << dets[2] << ", " << dets[3] << std::endl; + //} + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + double maskThreshold = 0.1; + for (int row = 0; row < segHeight; ++row) + { + const float* maskPtr = masks + row * segWidth; + uchar* binMaskPtr = binaryMask8U.ptr(row); + + for (int col = 0; col < segWidth; ++col) + { + binMaskPtr[col] = (maskPtr[col] > maskThreshold) ? 255 : 0; + } + } + + tensor_rt::Result& resObj = resBoxes.back(); + + cv::Rect smallRect; + smallRect.x = cvRound(segHeight * (dets[0] - dets[2] / 2.f)); + smallRect.y = cvRound(segHeight * (dets[1] - dets[3] / 2.f)); + smallRect.width = cvRound(segHeight * dets[2]); + smallRect.height = cvRound(segHeight * dets[3]); + smallRect = Clamp(smallRect, cv::Size(segWidth, segHeight)); + + if (smallRect.area() > 0) + { + cv::resize(binaryMask8U(smallRect), resObj.m_boxMask, resObj.m_brect.size(), 0, 0, cv::INTER_NEAREST); + +#if 0 + static int globalObjInd = 0; + SaveMat(mask, std::to_string(globalObjInd) + "_mask", ".png", "tmp", true); + SaveMat(binaryMask, std::to_string(globalObjInd) + "_bin_mask", ".png", "tmp", true); + SaveMat(binaryMask8U, std::to_string(globalObjInd) + "_bin_mask_8u", ".png", "tmp", true); + SaveMat(resObj.m_boxMask, std::to_string(globalObjInd++) + "_obj_mask", ".png", "tmp", true); + std::cout << "inputSize: " << inputSize << ", localRect: " << localRect << std::endl; +#endif + +#if 0 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resObj.m_boxMask, contours); +#else + cv::findContours(resObj.m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resObj.m_brect.x; + int dy = resObj.m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = rr.center.x * fw + dx; + rr.center.y = rr.center.y * fw + dy; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound(dx + br.x * fw); + br.y = cvRound(dy + br.y * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resObj.m_brect = br; + //resObj.m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + else + { + resObj.m_boxMask = cv::Mat(resObj.m_brect.size(), CV_8UC1, cv::Scalar(255)); + } + } + + dets += m_outpuDims[0].d[ncInd]; + labels += m_outpuDims[1].d[ncInd]; + masks += segWidth * segHeight; + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNX.cpp b/src/Detector/tensorrt_onnx/YoloONNX.cpp new file mode 100644 index 000000000..bec31df90 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNX.cpp @@ -0,0 +1,505 @@ +#include + +#define DEFINE_TRT_ENTRYPOINTS 1 + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +//! +//! \brief Creates the network, configures the builder and creates the network engine +//! +//! \details This function creates the YOLO network by parsing the ONNX model and builds +//! the engine that will be used to run YOLO (m_engine) +//! +//! \return Returns true if the engine was created successfully and false otherwise +//! +bool YoloONNX::Init(const SampleYoloParams& params) +{ + bool res = false; + + m_params = params; + + sample::setReportableSeverity(sample::Logger::Severity::kINFO); + initLibNvInferPlugins(&sample::gLogger.getTRTLogger(), ""); + + auto GetBindings = [&]() + { + auto numBindings = m_engine->getNbIOTensors(); + + m_inputDims.clear(); + m_outpuDims.clear(); + + std::cout << "** Bindings: " << numBindings << " **" << std::endl; + for (int32_t i = 0; i < numBindings; ++i) + { + std::string bindName = m_engine->getIOTensorName(i); + nvinfer1::Dims dim = m_engine->getTensorShape(bindName.c_str()); + + for (const auto& inName : m_params.m_inputTensorNames) + { + if (bindName == inName) + { + m_inputDims.emplace_back(dim); + break; + } + } + + for (const auto& outName : m_params.m_outputTensorNames) + { + if (bindName == outName) + { + m_outpuDims.emplace_back(dim); + break; + } + } + + std::cout << i << ": name: " << bindName; + std::cout << ", size: "; + for (int j = 0; j < dim.nbDims; ++j) + { + std::cout << dim.d[j]; + if (j < dim.nbDims - 1) + std::cout << "x"; + } + std::cout << std::endl; + } + }; + + if (fs::exists(m_params.m_engineFileName)) + { + std::vector trtModelStream; + size_t size{0}; + std::ifstream file(m_params.m_engineFileName, std::ios::binary); + if (file.good()) + { + file.seekg(0, file.end); + size = file.tellg(); + file.seekg(0, file.beg); + trtModelStream.resize(size); + file.read(trtModelStream.data(), size); + file.close(); + } + + m_inferRuntime = std::shared_ptr(nvinfer1::createInferRuntime(sample::gLogger)); + if (m_params.m_dlaCore >= 0) + m_inferRuntime->setDLACore(m_params.m_dlaCore); + + m_engine = std::shared_ptr(m_inferRuntime->deserializeCudaEngine(trtModelStream.data(), size), samplesCommon::InferDeleter()); +#if (NV_TENSORRT_MAJOR < 8) + m_inferRuntime->destroy(); + m_inferRuntime.reset(); +#else + //m_inferRuntime.reset(); +#endif + + if (m_engine) + { + GetBindings(); + res = true; + } + else + { + res = true; + } + sample::gLogInfo << "TRT Engine loaded from: " << m_params.m_engineFileName << " with res = " << res << std::endl; + } + else + { + auto builder = YoloONNXUniquePtr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); + if (!builder) + return false; + + const auto explicitBatch = 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH); + auto network = YoloONNXUniquePtr(builder->createNetworkV2(explicitBatch)); + if (!network) + return false; + + auto parser = YoloONNXUniquePtr(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger())); + if (!parser) + return false; + + auto config = YoloONNXUniquePtr(builder->createBuilderConfig()); + if (!config) + return false; + + auto constructed = ConstructNetwork(builder, network, config, parser); + if (!constructed) + return false; + + GetBindings(); + assert(m_inputDims[0].nbDims == 4); + + res = true; + } + + std::cout << "YoloONNX::Init: loaded = " << res << std::endl; + + if (res) + { + m_buffers = std::make_unique(m_engine, 0/*m_params.batchSize*/); + std::cout << "YoloONNX::Init: m_buffers = " << (m_buffers != nullptr) << std::endl; + m_context = YoloONNXUniquePtr(m_engine->createExecutionContext()); + std::cout << "YoloONNX::Init: m_context = " << (m_context != nullptr) << std::endl; + if (!m_context) + res = false; + } + + std::cout << "YoloONNX::Init: res = " << res << std::endl; + + return res; +} + +//! +//! \brief Uses an onnx parser to create the YOLO Network and marks the +//! output layers +//! +//! \param network Pointer to the network that will be populated with the YOLO network +//! +//! \param builder Pointer to the engine builder +//! +bool YoloONNX::ConstructNetwork(YoloONNXUniquePtr& builder, + YoloONNXUniquePtr& network, + YoloONNXUniquePtr& config, + YoloONNXUniquePtr& parser) +{ + bool res = false; + + // Parse ONNX model file to populate TensorRT INetwork + //int verbosity = (int) nvinfer1::ILogger::Severity::kERROR; + int verbosity = (int)nvinfer1::ILogger::Severity::kVERBOSE; + + sample::gLogInfo << "Parsing ONNX file: " << m_params.m_onnxFileName << std::endl; + + if (!parser->parseFromFile(m_params.m_onnxFileName.c_str(), verbosity)) + { + sample::gLogError << "Unable to parse ONNX model file: " << m_params.m_onnxFileName << std::endl; + return res; + } + + { + nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile(); + + //std::cout << "m_params.inputTensorNames.size = " << m_params.inputTensorNames.size() << ", m_inputDims.size = " << m_inputDims.size() << std::endl; + if (m_params.m_inputTensorNames.size() > 0) + { + nvinfer1::Dims dim = network->getInput(0)->getDimensions(); + //std::cout << "dim[0] = " << dim.nbDims << ": [" << dim.d[0] << ", " << dim.d[1] << ", " << dim.d[2] << ", " << dim.d[3] << "]" << std::endl; + if (dim.d[0] < 1) + dim.d[0] = 1; // batch size + if (dim.d[1] < 1) + dim.d[1] = 3; // Channels + if (dim.d[2] < 1) + dim.d[2] = 640; // Width + if (dim.d[3] < 1) + dim.d[3] = 640; // Height + + profile->setDimensions(m_params.m_inputTensorNames[0].c_str(), nvinfer1::OptProfileSelector::kMIN, dim); + profile->setDimensions(m_params.m_inputTensorNames[0].c_str(), nvinfer1::OptProfileSelector::kOPT, dim); + profile->setDimensions(m_params.m_inputTensorNames[0].c_str(), nvinfer1::OptProfileSelector::kMAX, dim); + } + + // For D-FINE + if (m_params.m_inputTensorNames.size() > 1) + { + nvinfer1::Dims dim = network->getInput(1)->getDimensions(); + //std::cout << "dim[1] = " << dim.nbDims << ": [" << dim.d[0] << ", " << dim.d[1] << "]" << std::endl; + if (dim.d[0] < 1) + dim.d[0] = 1; // batch size + if (dim.d[1] < 1) + dim.d[1] = 2; // Input size + + profile->setDimensions(m_params.m_inputTensorNames[1].c_str(), nvinfer1::OptProfileSelector::kMIN, dim); + profile->setDimensions(m_params.m_inputTensorNames[1].c_str(), nvinfer1::OptProfileSelector::kOPT, dim); + profile->setDimensions(m_params.m_inputTensorNames[1].c_str(), nvinfer1::OptProfileSelector::kMAX, dim); + } + + config->addOptimizationProfile(profile); + } + +#if (NV_TENSORRT_MAJOR < 8) + builder->setMaxBatchSize(m_params.batchSize); + config->setMaxWorkspaceSize(m_params.videoMemory ? m_params.videoMemory : 4096_MiB); +#else + size_t workspaceSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE); + size_t dlaManagedSRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM); + size_t dlaLocalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM); + size_t dlaGlobalDRAMSize = config->getMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM); + std::cout << "m_params.videoMemory = " << m_params.m_videoMemory << ", workspaceSize = " << workspaceSize << ", dlaManagedSRAMSize = " << dlaManagedSRAMSize << ", dlaLocalDRAMSize = " << dlaLocalDRAMSize << ", dlaGlobalDRAMSize = " << dlaGlobalDRAMSize << std::endl; + + config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, m_params.m_videoMemory ? m_params.m_videoMemory : workspaceSize); +#endif + + config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); + + switch (m_params.m_precision) + { + case tensor_rt::Precision::FP16: + config->setFlag(nvinfer1::BuilderFlag::kFP16); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP16)" << std::endl; + break; + + case tensor_rt::Precision::FP8: + config->setFlag(nvinfer1::BuilderFlag::kFP8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kFP8)" << std::endl; + break; + + case tensor_rt::Precision::INT8: + { + // Calibrator life time needs to last until after the engine is built. + std::unique_ptr calibrator; + + BatchStream calibrationStream(m_params.m_explicitBatchSize, m_params.m_nbCalBatches, m_params.m_calibrationBatches, m_params.m_dataDirs); + calibrator.reset(new Int8EntropyCalibrator2(calibrationStream, 0, "Yolo", m_params.m_inputTensorNames[0].c_str())); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + sample::gLogInfo << "config->setFlag(nvinfer1::BuilderFlag::kINT8)" << std::endl; + config->setInt8Calibrator(calibrator.get()); + } + break; + + default: + break; + } + + // Enable DLA if mParams.dlaCore is true + samplesCommon::enableDLA(builder.get(), config.get(), m_params.m_dlaCore); + + sample::gLogInfo << "Building TensorRT engine: " << m_params.m_engineFileName << std::endl; + +#if (NV_TENSORRT_MAJOR < 8) + m_engine = std::shared_ptr(builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter()); +#else + nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(sample::gLogger); + if (m_params.m_dlaCore >= 0) + infer->setDLACore(m_params.m_dlaCore); + nvinfer1::IHostMemory* mem = builder->buildSerializedNetwork(*network, *config); + if (mem) + m_engine = std::shared_ptr(infer->deserializeCudaEngine(mem->data(), mem->size()), samplesCommon::InferDeleter()); + else + sample::gLogError << "Unable to buildSerializedNetwork" << std::endl; + delete infer; +#endif + + if (!m_engine) + return res; + + if (m_params.m_engineFileName.size() > 0) + { + std::ofstream p(m_params.m_engineFileName, std::ios::binary); + if (!p) + return false; + + nvinfer1::IHostMemory* ptr = m_engine->serialize(); + assert(ptr); + p.write(reinterpret_cast(ptr->data()), ptr->size()); +#if (NV_TENSORRT_MAJOR < 8) + ptr->destroy(); +#else + delete ptr; +#endif + p.close(); + sample::gLogInfo << "TRT Engine file saved to: " << m_params.m_engineFileName << std::endl; + } + res = true; + + return res; +} + +/// +/// \brief YoloONNX::Detect +/// \param frames +/// \param bboxes +/// \return +/// +bool YoloONNX::Detect(const std::vector& frames, std::vector& bboxes) +{ + // Read the input data into the managed buffers + if (!ProcessInputAspectRatio(frames)) + return false; + + // Memcpy from host input buffers to device input buffers + m_buffers->copyInputToDevice(); + + bool status = m_context->executeV2(m_buffers->getDeviceBindings().data()); + if (!status) + return false; + + // Memcpy from device output buffers to host output buffers + m_buffers->copyOutputToHost(); + + // Post-process detections and verify results + bboxes.resize(frames.size()); + for (size_t i = 0; i < bboxes.size(); ++i) + { + VerifyOutputAspectRatio(i, bboxes[i], frames[i].size()); + } + + return true; +} + +/// +/// \brief YoloONNX::GetInputSize +/// \return Return input size +/// +cv::Size YoloONNX::GetInputSize() const +{ + return cv::Size(static_cast(m_inputDims[0].d[3]), static_cast(m_inputDims[0].d[2])); +} + +/// +/// \brief YoloONNX::GetNumClasses +/// \return +/// +size_t YoloONNX::GetNumClasses() const +{ + if (m_outpuDims[0].nbDims == 2) // with nms + { + return 0; + } + else + { + size_t ncInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - 5); + return (size_t)nc; + } +} + +//! +//! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer +//! +bool YoloONNX::ProcessInputAspectRatio(const std::vector& sampleImages) +{ + const int inputB = static_cast(m_inputDims[0].d[0]); + const int inputC = static_cast(m_inputDims[0].d[1]); + const int inputH = static_cast(m_inputDims[0].d[2]); + const int inputW = static_cast(m_inputDims[0].d[3]); + + float* hostInputBuffer = nullptr; + if (m_params.m_inputTensorNames[0].empty()) + hostInputBuffer = static_cast(m_buffers->getHostBuffer(0)); + else + hostInputBuffer = static_cast(m_buffers->getHostBuffer(m_params.m_inputTensorNames[0])); + + if (static_cast(m_inputChannels.size()) < inputB) + { + for (int b = 0; b < inputB; ++b) + { + m_inputChannels.push_back(std::vector {static_cast(inputC)}); + } + } + + m_resizedROI = cv::Rect(0, 0, inputW, inputH); + +#if 1 + // resize the image with scale + const float imgHeight = static_cast(sampleImages[0].rows); + const float imgWidth = static_cast(sampleImages[0].cols); + float dim = std::max(imgHeight, imgWidth); + int resizeH = cvRound((imgHeight * inputH) / dim); + int resizeW = cvRound((imgWidth * inputW) / dim); + //float scalingFactor = static_cast(resizeH) / imgHeight; + + // Additional checks for images with non even dims + if ((inputW - resizeW) % 2) + resizeW--; + if ((inputH - resizeH) % 2) + resizeH--; + assert((inputW - resizeW) % 2 == 0); + assert((inputH - resizeH) % 2 == 0); + + float xOffset = (inputW - resizeW) / 2.f; + float yOffset = (inputH - resizeH) / 2.f; + + assert(2 * xOffset + resizeW == inputW); + assert(2 * yOffset + resizeH == inputH); + + cv::Size scaleSize(inputW, inputH); + m_resizedROI = cv::Rect(cvRound(xOffset), cvRound(yOffset), resizeW, resizeH); + + //std::cout << "m_resizedROI: " << m_resizedROI << ", frameSize: " << sampleImages[0].size() << ", resizeW_H: " << cv::Size2f(resizeW, resizeH) << std::endl; + + if (m_resizedBatch.size() < sampleImages.size()) + m_resizedBatch.resize(sampleImages.size()); + + // Each element in batch share the same image matrix + for (int b = 0; b < inputB; ++b) + { + if (m_resizedBatch[b].size() != scaleSize) + m_resizedBatch[b] = cv::Mat(scaleSize, sampleImages[b].type(), cv::Scalar::all(128)); + cv::resize(sampleImages[b], cv::Mat(m_resizedBatch[b], m_resizedROI), m_resizedROI.size(), 0, 0, cv::INTER_LINEAR); + cv::split(m_resizedBatch[b], m_inputChannels[b]); + std::swap(m_inputChannels[b][0], m_inputChannels[b][2]); + } +#else + auto scaleSize = cv::Size(inputW, inputH); + + if (m_resizedBatch.size() < sampleImages.size()) + m_resizedBatch.resize(sampleImages.size()); + + // Each element in batch share the same image matrix + for (int b = 0; b < inputB; ++b) + { + cv::resize(sampleImages[b], m_resizedBatch[b], scaleSize, 0, 0, cv::INTER_LINEAR); + cv::split(m_resizedBatch[b], m_inputChannels[b]); + std::swap(m_inputChannels[b][0], m_inputChannels[b][2]); + } +#endif + + int volBatch = inputC * inputH * inputW; + int volChannel = inputH * inputW; + + constexpr float to1 = 1.f / 255.0f; + + int d_batch_pos = 0; + for (int b = 0; b < inputB; ++b) + { + int d_c_pos = d_batch_pos; + for (int c = 0; c < inputC; ++c) + { + m_inputChannels[b][c].convertTo(cv::Mat(inputH, inputW, CV_32FC1, &hostInputBuffer[d_c_pos]), CV_32FC1, to1, 0); + d_c_pos += volChannel; + } + d_batch_pos += volBatch; + } + + // For D-FINE + if (m_params.m_inputTensorNames.size() > 1) + { + int64_t* hostInput2 = static_cast(m_buffers->getHostBuffer(m_params.m_inputTensorNames[1])); + hostInput2[0] = inputW; + hostInput2[1] = inputH; + } + return true; +} + +//! +//! \brief Filters output detections and verify result +//! +//! \return whether the detection output matches expectations +//! +bool YoloONNX::VerifyOutputAspectRatio(size_t imgIdx, std::vector& nms_bboxes, cv::Size frameSize) +{ + std::vector outputs; + for (size_t i = 0; i < m_params.m_outputTensorNames.size();) + { + float* output = static_cast(m_buffers->getHostBuffer(m_params.m_outputTensorNames[i])); +#if 0 + if (output) + outputs.push_back(output); +#else + if (!output) + { + std::cout << i << " output tensor \"" << m_params.m_outputTensorNames[i] << "\" is null, will be removed" << std::endl; + m_params.m_outputTensorNames.erase(std::begin(m_params.m_outputTensorNames) + i); + } + else + { + outputs.push_back(output); + ++i; + } +#endif + } + if (!outputs.empty()) + nms_bboxes = GetResult(imgIdx, m_params.m_keepTopK, outputs, frameSize); + + return !outputs.empty(); +} diff --git a/src/Detector/tensorrt_onnx/YoloONNX.hpp b/src/Detector/tensorrt_onnx/YoloONNX.hpp new file mode 100644 index 000000000..cae188c54 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNX.hpp @@ -0,0 +1,114 @@ +#pragma once + +#include "common/BatchStream.h" +#include "common/EntropyCalibrator.h" +#include "common/buffers.h" +#include "common/common.h" +#include "common/logger.h" + +#include "NvOnnxParser.h" +#include "NvInfer.h" +#include +#include +#include +#include +#include +#include + +#include +#include "class_detector.h" + +//! +//! \brief The SampleYoloParams structure groups the additional parameters required by +//! the SSD sample. +//! +struct SampleYoloParams +{ + int m_keepTopK = 1000; //!< The maximum number of detection post-NMS + int m_nbCalBatches = 100; //!< The number of batches for calibration + float m_confThreshold = 0.3; + float m_nmsThreshold = 0.5; + + size_t m_videoMemory = 0 ; //!< If zero then will use default value + + int m_explicitBatchSize = 1; + std::string m_calibrationBatches; //!< The path to calibration batches + std::string m_engineFileName; + + std::string m_onnxFileName; //!< Filename of ONNX file of a network + int32_t m_dlaCore{-1}; //!< Specify the DLA core to run network on. + tensor_rt::ModelType m_netType { tensor_rt::ModelType::YOLOV7 }; + tensor_rt::Precision m_precision { tensor_rt::Precision::FP32 }; //!< Allow runnning the network in Int8 mode. + std::vector m_dataDirs; //!< Directory paths where sample data files are stored + std::vector m_inputTensorNames; + std::vector m_outputTensorNames; +}; + +/// +/// \brief The YoloONNX class +/// +class YoloONNX +{ + template + using YoloONNXUniquePtr = std::unique_ptr; + +public: + YoloONNX() = default; + virtual ~YoloONNX() = default; + + //! + //! \brief Function builds the network engine + //! + bool Init(const SampleYoloParams& params); + + //! + //! \brief Runs the TensorRT inference engine for this sample + //! + bool Detect(const std::vector& frames, std::vector& bboxes); + + //! + //! \brief Return input size + //! + cv::Size GetInputSize() const; + + //! + //! \brief Return classes count + //! + size_t GetNumClasses() const; + +protected: + SampleYoloParams m_params; //!< The parameters for the sample + std::vector m_inputDims; //!< The dimensions of the input to the network + std::vector m_outpuDims; //!< The dimensions of the input to the network + cv::Rect m_resizedROI; //!< Input frame resized into input dimensions with the frame aspect ratio + + virtual std::vector GetResult(size_t imgIdx, int keep_topk, const std::vector& outputs, cv::Size frameSize) = 0; + +private: + std::shared_ptr m_engine; //!< The TensorRT engine used to run the network + std::shared_ptr m_inferRuntime; + + cv::Mat m_resized; + std::vector m_resizedBatch; + std::vector> m_inputChannels; + + std::unique_ptr m_buffers; + YoloONNXUniquePtr m_context; + + //! + //! \brief Parses an ONNX model for YOLO and creates a TensorRT network + //! + bool ConstructNetwork(YoloONNXUniquePtr& builder, + YoloONNXUniquePtr& network, YoloONNXUniquePtr& config, + YoloONNXUniquePtr& parser); + + //! + //! \brief Reads the input and mean data, preprocesses, and stores the result in a managed buffer + //! + bool ProcessInputAspectRatio(const std::vector& sampleImages); + + //! + //! \brief Filters output detections and verify results + //! + bool VerifyOutputAspectRatio(size_t imgIdx, std::vector& nms_bboxes, cv::Size frameSize); +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv10_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv10_bb.hpp new file mode 100644 index 000000000..6fdabb43c --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv10_bb.hpp @@ -0,0 +1,92 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv10_bb_onnx class +/// +class YOLOv10_bb_onnx : public YoloONNX +{ +public: + YOLOv10_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 2; + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * 6; + + //if (i == 0) + // std::cout << i << ": " << output[k + 0] << " " << output[k + 1] << " " << output[k + 2] << " " << output[k + 3] << " " << output[k + 4] << " " << output[k + 5] << std::endl; + + float x = fw * (output[k + 0] - m_resizedROI.x); + float y = fh * (output[k + 1] - m_resizedROI.y); + float width = fw * (output[k + 2] - output[k + 0]); + float height = fh * (output[k + 3] - output[k + 1]); + float objectConf = output[k + 4]; + int classId = cvRound(output[k + 5]); + //if (i == 0) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_bb.hpp new file mode 100644 index 000000000..972ab2f55 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_bb.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_bb_onnx class +/// +class YOLOv11_bb_onnx : public YoloONNX +{ +public: + YOLOv11_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - 4); + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp new file mode 100644 index 000000000..641c5c7dc --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_instance.hpp @@ -0,0 +1,308 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv11_instance_onnx class +/// +class YOLOv11_instance_onnx : public YoloONNX +{ +public: + YOLOv11_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[outInd]; + + //std::cout << "output[1] mem:\n"; + //auto output1 = outputs[1]; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output1[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x116x8400 + //2: name: output1, size: 1x32x160x160 + // 25200 = 3x80x80 + 3x40x40 + 3x20x20 + // 116 = x, y, w, h, 80 classes, 32 seg ancors + // 80 * 8 = 640, 40 * 16 = 640, 20 * 32 = 640 + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[outInd].d[ncInd] - 4 - 32); + int dimensions = nc + 32 + 4; + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 4 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4 + 32); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // std::cout << "without nms: mem" << i << ": "; + // for (size_t ii = 0; ii < 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = 4; ii < nc + 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k] - output[k + 2] / 2; + float y = output[k + 1] - output[k + 3] / 2; + float width = output[k + 2]; + float height = output[k + 3]; + + //auto ClampToFrame = [](float& v, float& size, int hi) -> int + //{ + // int res = 0; +// + // if (size < 1) + // size = 0; +// + // if (v < 0) + // { + // res = v; + // v = 0; + // return res; + // } + // else if (v + size > hi - 1) + // { + // res = v; + // v = hi - 1 - size; + // if (v < 0) + // { + // size += v; + // v = 0; + // } + // res -= v; + // return res; + // } + // return res; + //}; + //ClampToFrame(x, width, frameSize.width); + //ClampToFrame(y, height, frameSize.height); + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (width > 4 && height > 4) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 4 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + } + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[segInd]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv11_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv11_obb.hpp new file mode 100644 index 000000000..e32903f14 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv11_obb.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv11_obb_onnx class +/// +class YOLOv11_obb_onnx : public YoloONNX +{ +public: + YOLOv11_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - shapeDataSize); + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) + float cx = fw * (output[k] - m_resizedROI.x); + float cy = fh * (output[k + 1] - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + float angle = 180.f * output[k + nc + shapeDataSize - 1] / static_cast(M_PI); + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); + + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) + { + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv12_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv12_bb.hpp new file mode 100644 index 000000000..afa08e1af --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv12_bb.hpp @@ -0,0 +1,16 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv12_bb_onnx class +/// +class YOLOv12_bb_onnx : public YOLOv11_bb_onnx +{ +public: + YOLOv12_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + : YOLOv11_bb_onnx(inputTensorNames, outputTensorNames) + { + } + +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv13_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv13_bb.hpp new file mode 100644 index 000000000..ca4512581 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv13_bb.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv13_bb_onnx class +/// +class YOLOv13_bb_onnx : public YoloONNX +{ +public: + YOLOv13_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - 4); + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp new file mode 100644 index 000000000..19cdd67a6 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_bb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_bb_onnx class +/// +class YOLOv26_bb_onnx : public YoloONNX +{ +public: + YOLOv26_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x6 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * (output[ind + 2] - output[ind + 0]); + float height = fh * (output[ind + 3] - output[ind + 1]); + + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp new file mode 100644 index 000000000..9ec2d27ef --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_instance.hpp @@ -0,0 +1,175 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv26_instance_onnx class +/// +class YOLOv26_instance_onnx : public YoloONNX +{ +public: + YOLOv26_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[outInd]; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x300x38 + //2: name: output1, size: 1x32x160x160 + + size_t dimInd = 2; + size_t lenInd = 1; + int dimensions = static_cast(m_outpuDims[outInd].d[dimInd]); + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]); + auto volume = len * dimensions; + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + int netWidth = 6 + segChannels; + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * dimensions; + + float objectConf = output[k + 4]; + int classId = static_cast(output[k + 5]); + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k]; + float y = output[k + 1]; + float width = output[k + 2] - output[k]; + float height = output[k + 3] - output[k + 1]; + + if (width > 4 && height > 4) + { + resBoxes.emplace_back(classId, objectConf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + + std::vector tempProto(output + k + 6, output + k + netWidth); + maskProposals.push_back(cv::Mat(tempProto).t()); + } + } + } + + //std::cout << "maskProposals.size = " << maskProposals.size() << std::endl; + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[segInd]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + //std::cout << "m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + + //std::cout << "m_boxMask = " << resBoxes[i].m_boxMask.size() << ", m_brect = " << resBoxes[i].m_brect << ", dest = " << dest.size() << ", mask = " << mask.size() << std::endl; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + cv::resize(resBoxes[i].m_boxMask, resBoxes[i].m_boxMask, resBoxes[i].m_brect.size(), 0, 0, cv::INTER_NEAREST); + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp new file mode 100644 index 000000000..8a097f2f3 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv26_obb.hpp @@ -0,0 +1,64 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv26_obb_onnx class +/// +class YOLOv26_obb_onnx : public YoloONNX +{ +public: + YOLOv26_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x300x7 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t lenInd = 1; + size_t len = static_cast(m_outpuDims[0].d[lenInd]); + auto volume = len * m_outpuDims[0].d[2]; + output += volume * imgIdx; + //std::cout << "len = " << len << ", confThreshold = " << m_params.m_confThreshold << ", volume = " << volume << std::endl; + + for (size_t i = 0; i < len; ++i) + { + auto ind = i * m_outpuDims[0].d[2]; + + float classConf = output[ind + 4]; + int classId = static_cast(output[ind + 5]); + + if (classConf >= m_params.m_confThreshold) + { + float x = fw * (output[ind + 0] - m_resizedROI.x); + float y = fh * (output[ind + 1] - m_resizedROI.y); + float width = fw * output[ind + 2]; + float height = fh * output[ind + 3]; + float angle = 180.f * output[ind + 6] / static_cast(M_PI); + //std::cout << "ind = " << ind << ", output[0] = " << output[ind + 0] << ", output[1] = " << output[ind + 1] << ", output[2] = " << output[ind + 2] << ", output[3] = " << output[ind + 3] << std::endl; + //std::cout << "ind = " << ind << ", classConf = " << classConf << ", classId = " << classId << ", x = " << x << ", y = " << y << ", width = " << width << ", height = " << height << ", angle = " << angle << std::endl; + + resBoxes.emplace_back(classId, classConf, cv::RotatedRect(cv::Point2f(x, y), cv::Size2f(width, height), angle)); + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv5_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv5_bb.hpp new file mode 100644 index 000000000..afd0e3151 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv5_bb.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv5_bb_onnx class +/// +class YOLOv5_bb_onnx : public YoloONNX +{ +public: + YOLOv5_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - 4); + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv6_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv6_bb.hpp new file mode 100644 index 000000000..5493f55d0 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv6_bb.hpp @@ -0,0 +1,200 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv6_bb_onnx class +/// +class YOLOv6_bb_onnx : public YoloONNX +{ +public: + YOLOv6_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("image_arrays"); + outputTensorNames.push_back("outputs"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + if (outputs.size() == 4) + { + auto dets = reinterpret_cast(outputs[0]); + auto boxes = outputs[1]; + auto scores = outputs[2]; + auto classes = reinterpret_cast(outputs[3]); + + int objectsCount = static_cast(m_outpuDims[1].d[1]); + + //std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl; + + const size_t step1 = imgIdx * objectsCount; + const size_t step2 = 4 * imgIdx * objectsCount; + for (size_t i = 0; i < static_cast(dets[imgIdx]); ++i) + { + // Box + const size_t k = i * 4; + float class_conf = scores[i + step1]; + int classId = classes[i + step1]; + if (class_conf >= m_params.m_confThreshold) + { + float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x); + float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y); + float width = fw * boxes[k + 2 + step2] - x; + float height = fh * boxes[k + 3 + step2] - y; + + //if (i == 0) + //{ + // std::cout << i << ": class_conf = " << class_conf << ", classId = " << classId << " (" << classes[i + step1] << "), rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + // std::cout << "boxes = " << boxes[k + 0 + step2] << ", " << boxes[k + 1 + step2] << ", " << boxes[k + 2 + step2] << ", " << boxes[k + 3 + step2] << std::endl; + //} + resBoxes.emplace_back(classId, class_conf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + } + else if (outputs.size() == 1) + { + auto output = outputs[0]; + + size_t ncInd = 2; + size_t lenInd = 1; + if (m_outpuDims[0].nbDims == 2) + { + ncInd = 1; + lenInd = 0; + } + int nc = static_cast(m_outpuDims[0].d[ncInd] - 5); + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + if (m_outpuDims[0].nbDims == 2) // With nms + { + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * 7; + float class_conf = output[k + 6]; + int classId = cvRound(output[k + 5]); + if (class_conf >= m_params.m_confThreshold) + { + float x = fw * (output[k + 1] - m_resizedROI.x); + float y = fh * (output[k + 2] - m_resizedROI.y); + float width = fw * (output[k + 3] - output[k + 1]); + float height = fh * (output[k + 4] - output[k + 2]); + + //if (i == 0) + // std::cout << i << ": class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + classIds.push_back(classId); + confidences.push_back(class_conf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + //bboxes.emplace_back(classId, class_conf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + } + else // Without nms + { + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 5); + float object_conf = output[k + 4]; + + //if (i == 0) + //{ + // std::cout << "mem" << i << ": "; + // for (size_t ii = 0; ii < nc + 5; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << std::endl; + //} + + if (object_conf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + + // Classes + float class_conf = output[k + 5]; + int classId = 0; + + for (int j = 1; j < nc; ++j) + { + if (class_conf < output[k + 5 + j]) + { + classId = j; + class_conf = output[k + 5 + j]; + } + } + + class_conf *= object_conf; + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + classIds.push_back(classId); + confidences.push_back(class_conf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_bb.hpp new file mode 100644 index 000000000..1d6b72601 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_bb.hpp @@ -0,0 +1,204 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv7_bb_onnx class +/// +class YOLOv7_bb_onnx : public YoloONNX +{ +public: + YOLOv7_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output"); + outputTensorNames.push_back("num_dets"); // batch x 1 + outputTensorNames.push_back("det_boxes"); // batch x 100 x 4 + outputTensorNames.push_back("det_scores"); // batch x 100 + outputTensorNames.push_back("det_classes"); // batch x 100 + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + if (outputs.size() == 4) + { + auto dets = reinterpret_cast(outputs[0]); + auto boxes = outputs[1]; + auto scores = outputs[2]; + auto classes = reinterpret_cast(outputs[3]); + + int objectsCount = static_cast(m_outpuDims[1].d[1]); + + //std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl; + + const size_t step1 = imgIdx * objectsCount; + const size_t step2 = 4 * imgIdx * objectsCount; + for (size_t i = 0; i < static_cast(dets[imgIdx]); ++i) + { + // Box + const size_t k = i * 4; + float class_conf = scores[i + step1]; + int classId = classes[i + step1]; + if (class_conf >= m_params.m_confThreshold) + { + float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x); + float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y); + float width = fw * boxes[k + 2 + step2] - x; + float height = fh * boxes[k + 3 + step2] - y; + + //if (i == 0) + //{ + // std::cout << i << ": class_conf = " << class_conf << ", classId = " << classId << " (" << classes[i + step1] << "), rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + // std::cout << "boxes = " << boxes[k + 0 + step2] << ", " << boxes[k + 1 + step2] << ", " << boxes[k + 2 + step2] << ", " << boxes[k + 3 + step2] << std::endl; + //} + resBoxes.emplace_back(classId, class_conf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + } + else if (outputs.size() == 1) + { + auto output = outputs[0]; + + size_t ncInd = 2; + size_t lenInd = 1; + if (m_outpuDims[0].nbDims == 2) + { + ncInd = 1; + lenInd = 0; + } + int nc = static_cast(m_outpuDims[0].d[ncInd] - 5); + size_t len = static_cast(m_outpuDims[0].d[lenInd]) / m_params.m_explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + if (m_outpuDims[0].nbDims == 2) // With nms + { + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * 7; + float class_conf = output[k + 6]; + int classId = cvRound(output[k + 5]); + if (class_conf >= m_params.m_confThreshold) + { + float x = fw * (output[k + 1] - m_resizedROI.x); + float y = fh * (output[k + 2] - m_resizedROI.y); + float width = fw * (output[k + 3] - output[k + 1]); + float height = fh * (output[k + 4] - output[k + 2]); + + //if (i == 0) + // std::cout << i << ": class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + classIds.push_back(classId); + confidences.push_back(class_conf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + //bboxes.emplace_back(classId, class_conf, cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height))); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + } + else // Without nms + { + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 5); + float object_conf = output[k + 4]; + + //if (i == 0) + //{ + // std::cout << "mem" << i << ": "; + // for (size_t ii = 0; ii < nc + 5; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << std::endl; + //} + + if (object_conf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + + // Classes + float class_conf = output[k + 5]; + int classId = 0; + + for (int j = 1; j < nc; ++j) + { + if (class_conf < output[k + 5 + j]) + { + classId = j; + class_conf = output[k + 5 + j]; + } + } + + class_conf *= object_conf; + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + classIds.push_back(classId); + confidences.push_back(class_conf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + } + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp new file mode 100644 index 000000000..247e352c9 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv7_instance.hpp @@ -0,0 +1,260 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv7_instance_onnx class +/// +class YOLOv7_instance_onnx : public YoloONNX +{ +public: + YOLOv7_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output"); + outputTensorNames.push_back("516"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = (outputs.size() == 0) ? 0 : 1; + size_t segInd = (outputs.size() == 0) ? 1 : 0; + + auto output = outputs[0]; + + //0: name: images, size : 1x3x640x640 + //1 : name : 516, size : 1x32x160x160 + //2 : name : onnx::Slice_542, size : 1x3x80x80x117 + //3 : name : onnx::Slice_710, size : 1x3x40x40x117 + //4 : name : onnx::Slice_878, size : 1x3x20x20x117 + //5 : name : output, size : 1x25200x117 + // 25200 = 3x80x80 + 3x40x40 + 3x20x20 + // 117 = x, y, w, h, c, 80 classes, 32 seg ancors + // 80 * 8 = 640, 40 * 16 = 640, 20 * 32 = 640 + + size_t ncInd = 2; + size_t lenInd = 1; + if (m_outpuDims[outInd].nbDims == 2) + { + ncInd = 1; + lenInd = 0; + } + int nc = static_cast(m_outpuDims[outInd].d[ncInd] - 5 - 32); + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "516 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; +// + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 5 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 5 + 32); + float object_conf = output[k + 4]; + + if (i == 0) + { + //std::cout << "without nms: mem" << i << ": "; + //for (size_t ii = 0; ii < nc; ++ii) + //{ + // std::cout << output[k + ii] << " "; + //} + //std::cout << ";" << std::endl; + //for (size_t ii = nc; ii < nc + 4; ++ii) + //{ + // std::cout << output[k + ii] << " "; + //} + //std::cout << ";" << std::endl; + //for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + //{ + // std::cout << output[k + ii] << " "; + //} + //std::cout << ";" << std::endl; + } + + if (object_conf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k] - output[k + 2] / 2; + float y = output[k + 1] - output[k + 3] / 2; + float width = output[k + 2]; + float height = output[k + 3]; + + // Classes + float class_conf = output[k + 5]; + int classId = 0; + + for (int j = 1; j < nc; ++j) + { + if (class_conf < output[k + 5 + j]) + { + classId = j; + class_conf = output[k + 5 + j]; + } + } + + class_conf *= object_conf; + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + classIds.push_back(classId); + confidences.push_back(class_conf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 5 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + + if (rectBoxes.size() >= CV_CN_MAX - 1) + break; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[1]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_bb.hpp new file mode 100644 index 000000000..0efdbdd04 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_bb.hpp @@ -0,0 +1,118 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv8_bb_onnx class +/// +class YOLOv8_bb_onnx : public YoloONNX +{ +public: + YOLOv8_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - 4); + int dimensions = nc + 4; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp new file mode 100644 index 000000000..6422cc47c --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_instance.hpp @@ -0,0 +1,312 @@ +#pragma once + +#include "YoloONNX.hpp" +#include "../../mtracking/defines.h" + +/// +/// \brief The YOLOv8_instance_onnx class +/// +class YOLOv8_instance_onnx : public YoloONNX +{ +public: + YOLOv8_instance_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + outputTensorNames.push_back("output1"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + size_t outInd = 0; + size_t segInd = 1; + + auto output = outputs[0]; + + //std::cout << "output[1] mem:\n"; + //auto output1 = outputs[1]; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output1[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x116x8400 + //2: name: output1, size: 1x32x160x160 + // 25200 = 3x80x80 + 3x40x40 + 3x20x20 + // 116 = x, y, w, h, 80 classes, 32 seg ancors + // 80 * 8 = 640, 40 * 16 = 640, 20 * 32 = 640 + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[outInd].d[ncInd] - 4 - 32); + int dimensions = nc + 32 + 4; + size_t len = static_cast(m_outpuDims[outInd].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[outInd].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + +#if 1 + int segWidth = 160; + int segHeight = 160; + int segChannels = 32; + + if (outputs.size() > 1) + { + //std::cout << "output1 nbDims: " << m_outpuDims[segInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[segInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[segInd].d[i]; + // if (i + 1 != m_outpuDims[segInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + //std::cout << "output nbDims: " << m_outpuDims[outInd].nbDims << ", "; + //for (size_t i = 0; i < m_outpuDims[outInd].nbDims; ++i) + //{ + // std::cout << m_outpuDims[outInd].d[i]; + // if (i + 1 != m_outpuDims[outInd].nbDims) + // std::cout << "x"; + //} + //std::cout << std::endl; + + segChannels = static_cast(m_outpuDims[segInd].d[1]); + segWidth = static_cast(m_outpuDims[segInd].d[2]); + segHeight = static_cast(m_outpuDims[segInd].d[3]); + } + cv::Mat maskProposals; + std::vector> picked_proposals; + int net_width = nc + 4 + segChannels; +#endif + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + 4 + 32); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (objectConf > 0.1) + // std::cout << i << ": objectConf = " << objectConf << ", classId = " << classId << std::endl; + + //if (i == 0) + //{ + // std::cout << "without nms: mem" << i << ": "; + // for (size_t ii = 0; ii < 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = 4; ii < nc + 4; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + // for (size_t ii = nc + 4; ii < nc + 4 + 32; ++ii) + // { + // std::cout << output[k + ii] << " "; + // } + // std::cout << ";" << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + // (center x, center y, width, height) to (x, y, w, h) + float x = output[k] - output[k + 2] / 2; + float y = output[k + 1] - output[k + 3] / 2; + float width = output[k + 2]; + float height = output[k + 3]; + + //auto ClampToFrame = [](float& v, float& size, int hi) -> int + //{ + // int res = 0; +// + // if (size < 1) + // size = 0; +// + // if (v < 0) + // { + // res = v; + // v = 0; + // return res; + // } + // else if (v + size > hi - 1) + // { + // res = v; + // v = hi - 1 - size; + // if (v < 0) + // { + // size += v; + // v = 0; + // } + // res -= v; + // return res; + // } + // return res; + //}; + //ClampToFrame(x, width, frameSize.width); + //ClampToFrame(y, height, frameSize.height); + + if (width > 4 && height > 4) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + + std::vector temp_proto(output + k + 4 + nc, output + k + net_width); + picked_proposals.push_back(temp_proto); + } + } + } + + //std::cout << "rectBoxes.size = " << rectBoxes.size() << std::endl; + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], Clamp(rectBoxes[indices[bi]], frameSize)); + maskProposals.push_back(cv::Mat(picked_proposals[indices[bi]]).t()); + } + + if (!maskProposals.empty()) + { + // Mask processing + const float* pdata = outputs[1]; + std::vector maskFloat(pdata, pdata + segChannels * segWidth * segHeight); + + int INPUT_W = static_cast(m_inputDims[0].d[3]); + int INPUT_H = static_cast(m_inputDims[0].d[2]); + static constexpr float MASK_THRESHOLD = 0.5; + + cv::Mat mask_protos = cv::Mat(maskFloat); + cv::Mat protos = mask_protos.reshape(0, { segChannels, segWidth * segHeight }); + + cv::Mat matmulRes = (maskProposals * protos).t();//n*32 32*25600 + cv::Mat masks = matmulRes.reshape(static_cast(resBoxes.size()), { segWidth, segHeight }); + std::vector maskChannels; + split(masks, maskChannels); + for (size_t i = 0; i < resBoxes.size(); ++i) + { + cv::Mat dest; + cv::Mat mask; + //sigmoid + cv::exp(-maskChannels[i], dest); + dest = 1.0 / (1.0 + dest);//160*160 + + int padw = 0; + int padh = 0; + cv::Rect roi(int((float)padw / INPUT_W * segWidth), int((float)padh / INPUT_H * segHeight), int(segWidth - padw / 2), int(segHeight - padh / 2)); + dest = dest(roi); + + cv::resize(dest, mask, cv::Size(INPUT_W, INPUT_H), cv::INTER_NEAREST); + + resBoxes[i].m_boxMask = mask(resBoxes[i].m_brect) > MASK_THRESHOLD; + +#if 0 + static int globalObjInd = 0; + SaveMat(resBoxes[i].m_boxMask, std::to_string(globalObjInd++), ".png", "tmp", true); +#endif + +#if 1 + std::vector> contours; +#if ((CV_VERSION_MAJOR > 4) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 9))) + cv::findContoursLinkRuns(resBoxes[i].m_boxMask, contours); +#else + cv::findContours(resBoxes[i].m_boxMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE, cv::Point()); +#endif + for (const auto& contour : contours) + { + cv::Rect br = cv::boundingRect(contour); + + //std::cout << "contour br: " << br << std::endl; + + if (br.width >= 4 && + br.height >= 4) + { + int dx = resBoxes[i].m_brect.x; + int dy = resBoxes[i].m_brect.y; + + cv::RotatedRect rr = (contour.size() < 5) ? cv::minAreaRect(contour) : cv::fitEllipse(contour); + rr.center.x = (rr.center.x + dx - m_resizedROI.x) * fw; + rr.center.y = (rr.center.y + dy - m_resizedROI.y) * fw; + rr.size.width *= fw; + rr.size.height *= fh; + + br.x = cvRound((dx + br.x - m_resizedROI.x) * fw); + br.y = cvRound((dy + br.y - m_resizedROI.y) * fh); + br.width = cvRound(br.width * fw); + br.height = cvRound(br.height * fh); + + resBoxes[i].m_brect = br; + resBoxes[i].m_rrect = rr; + + //std::cout << "resBoxes[" << i << "] br: " << br << ", rr: (" << rr.size << " from " << rr.center << ", " << rr.angle << ")" << std::endl; + + break; + } + } +#endif + } + } + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv8_obb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv8_obb.hpp new file mode 100644 index 000000000..ed44eb4a2 --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv8_obb.hpp @@ -0,0 +1,131 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv8_obb_onnx class +/// +class YOLOv8_obb_onnx : public YoloONNX +{ +public: + YOLOv8_obb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x1024x1024 + //1: name: output0, size: 1x20x21504 + //20: 15 DOTA classes + x + y + w + h + a + constexpr int shapeDataSize = 5; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - shapeDataSize); + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + 4 + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + //{ + // for (int jj = 0; jj < 20; ++jj) + // { + // std::cout << output[jj] << " "; + // } + // std::cout << std::endl; + //} + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) + float cx = fw * (output[k] - m_resizedROI.x); + float cy = fh * (output[k + 1] - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + float angle = 180.f * output[k + nc + shapeDataSize - 1] / static_cast(M_PI); + rectBoxes.emplace_back(cv::Point2f(cx, cy), cv::Size2f(width, height), angle); + + //if (rectBoxes.size() == 1) + // std::cout << i << ": object_conf = " << objectConf << ", classId = " << classId << ", rect = " << rectBoxes.back().boundingRect() << ", angle = " << angle << std::endl; + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + //std::vector indices; + //cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.confThreshold, m_params.nmsThreshold, indices); + //resBoxes.reserve(indices.size()); + + resBoxes.reserve(rectBoxes.size()); + for (size_t bi = 0; bi < rectBoxes.size(); ++bi) + { + resBoxes.emplace_back(classIds[bi], confidences[bi], rectBoxes[bi]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/YoloONNXv9_bb.hpp b/src/Detector/tensorrt_onnx/YoloONNXv9_bb.hpp new file mode 100644 index 000000000..8d456abdd --- /dev/null +++ b/src/Detector/tensorrt_onnx/YoloONNXv9_bb.hpp @@ -0,0 +1,120 @@ +#pragma once + +#include "YoloONNX.hpp" + +/// +/// \brief The YOLOv9_bb_onnx class +/// +class YOLOv9_bb_onnx : public YoloONNX +{ +public: + YOLOv9_bb_onnx(std::vector& inputTensorNames, std::vector& outputTensorNames) + { + inputTensorNames.push_back("images"); + outputTensorNames.push_back("output0"); + } + +protected: + /// + /// \brief GetResult + /// \param output + /// \return + /// + std::vector GetResult(size_t imgIdx, int /*keep_topk*/, const std::vector& outputs, cv::Size frameSize) + { + std::vector resBoxes; + + //0: name: images, size: 1x3x640x640 + //1: name: output0, size: 1x84x8400 + //84: 80 COCO classes + x + y + w + h + constexpr int shapeDataSize = 4; + + const float fw = static_cast(frameSize.width) / static_cast(m_resizedROI.width); + const float fh = static_cast(frameSize.height) / static_cast(m_resizedROI.height); + + auto output = outputs[0]; + + size_t ncInd = 1; + size_t lenInd = 2; + int nc = static_cast(m_outpuDims[0].d[ncInd] - shapeDataSize); + int dimensions = nc + shapeDataSize; + size_t len = static_cast(m_outpuDims[0].d[lenInd]);// / m_params.explicitBatchSize; + //auto Volume = [](const nvinfer1::Dims& d) + //{ + // return std::accumulate(d.d, d.d + d.nbDims, 1, std::multiplies()); + //}; + auto volume = len * m_outpuDims[0].d[ncInd]; // Volume(m_outpuDims[0]); + output += volume * imgIdx; + //std::cout << "len = " << len << ", nc = " << nc << ", m_params.confThreshold = " << m_params.confThreshold << ", volume = " << volume << std::endl; + + cv::Mat rawMemory(1, dimensions * static_cast(len), CV_32FC1, output); + rawMemory = rawMemory.reshape(1, dimensions); + cv::transpose(rawMemory, rawMemory); + output = (float*)rawMemory.data; + + //std::cout << "output[0] mem:\n"; + //for (size_t ii = 0; ii < 100; ++ii) + //{ + // std::cout << ii << ": "; + // for (size_t jj = 0; jj < 20; ++jj) + // { + // std::cout << output[ii * 20 + jj] << " "; + // } + // std::cout << ";" << std::endl; + //} + //std::cout << ";" << std::endl; + + std::vector classIds; + std::vector confidences; + std::vector rectBoxes; + classIds.reserve(len); + confidences.reserve(len); + rectBoxes.reserve(len); + + for (size_t i = 0; i < len; ++i) + { + // Box + size_t k = i * (nc + shapeDataSize); + + int classId = -1; + float objectConf = 0.f; + for (int j = 0; j < nc; ++j) + { + const float classConf = output[k + shapeDataSize + j]; + if (classConf > objectConf) + { + classId = j; + objectConf = classConf; + } + } + + //if (i == 0) + // std::cout << i << ": object_conf = " << object_conf << ", class_conf = " << class_conf << ", classId = " << classId << ", rect = " << cv::Rect(cvRound(x), cvRound(y), cvRound(width), cvRound(height)) << std::endl; + + if (objectConf >= m_params.m_confThreshold) + { + classIds.push_back(classId); + confidences.push_back(objectConf); + + // (center x, center y, width, height) to (x, y, w, h) + float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x); + float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y); + float width = fw * output[k + 2]; + float height = fh * output[k + 3]; + rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height)); + } + } + + // Non-maximum suppression to eliminate redudant overlapping boxes + std::vector indices; + cv::dnn::NMSBoxes(rectBoxes, confidences, m_params.m_confThreshold, m_params.m_nmsThreshold, indices); + resBoxes.reserve(indices.size()); + + for (size_t bi = 0; bi < indices.size(); ++bi) + { + resBoxes.emplace_back(classIds[indices[bi]], confidences[indices[bi]], rectBoxes[indices[bi]]); + } + + return resBoxes; + } +}; diff --git a/src/Detector/tensorrt_onnx/class_detector.cpp b/src/Detector/tensorrt_onnx/class_detector.cpp new file mode 100644 index 000000000..f5f4fb666 --- /dev/null +++ b/src/Detector/tensorrt_onnx/class_detector.cpp @@ -0,0 +1,216 @@ +#include "class_detector.h" +#include "YoloONNX.hpp" + +#include "YoloONNXv5_bb.hpp" +#include "YoloONNXv6_bb.hpp" +#include "YoloONNXv7_bb.hpp" +#include "YoloONNXv7_instance.hpp" +#include "YoloONNXv8_bb.hpp" +#include "YoloONNXv8_obb.hpp" +#include "YoloONNXv8_instance.hpp" +#include "YoloONNXv9_bb.hpp" +#include "YoloONNXv10_bb.hpp" +#include "YoloONNXv11_bb.hpp" +#include "YoloONNXv11_obb.hpp" +#include "YoloONNXv11_instance.hpp" +#include "YoloONNXv12_bb.hpp" +#include "RFDETR_bb.hpp" +#include "RFDETR_is.hpp" +#include "DFINE_bb.hpp" +#include "YoloONNXv13_bb.hpp" +#include "DFINE_is.hpp" +#include "YoloONNXv26_bb.hpp" +#include "YoloONNXv26_obb.hpp" +#include "YoloONNXv26_instance.hpp" + +namespace tensor_rt +{ + /// + /// \brief The Detector::Impl class + /// + class Detector::Impl + { + public: + Impl() = default; + virtual ~Impl() = default; + + virtual bool Init(const Config& config) = 0; + virtual void Detect(const std::vector& mat_image, std::vector& vec_batch_result) = 0; + virtual cv::Size GetInputSize() const = 0; + }; + + /// + /// \brief The YoloDectectorImpl class + /// + class YoloONNXImpl final : public Detector::Impl + { + public: + bool Init(const Config& config) override + { + // The onnx file to load + m_params.m_onnxFileName = config.m_fileModelCfg.empty() ? config.m_fileModelWeights : config.m_fileModelCfg; //"yolov6s.onnx" + + switch (config.m_netType) + { + case ModelType::YOLOV5: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV6: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV7: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV7Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV8: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV8_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV8Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV9: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV10: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV11: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV11_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV11Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26_OBB: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV26Mask: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV12: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::RFDETR: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::RFDETR_IS: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::DFINE: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::YOLOV13: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + case ModelType::DFINE_IS: + m_detector = std::make_unique(m_params.m_inputTensorNames, m_params.m_outputTensorNames); + break; + } + + // Threshold values + m_params.m_confThreshold = config.m_detectThresh; + m_params.m_nmsThreshold = 0.5; + + m_params.m_videoMemory = config.m_videoMemory; + + // Batch size, you can modify to other batch size values if needed + m_params.m_explicitBatchSize = config.m_batchSize; + + m_params.m_precision = config.m_inferencePrecision; + m_params.m_netType = config.m_netType; + + std::string precisionStr; + std::map dictprecision; + dictprecision[tensor_rt::INT8] = "kINT8"; + dictprecision[tensor_rt::FP16] = "kHALF"; + dictprecision[tensor_rt::FP32] = "kFLOAT"; + dictprecision[tensor_rt::FP8] = "kFP8"; + + auto precision = dictprecision.find(m_params.m_precision); + if (precision != dictprecision.end()) + precisionStr = precision->second; + m_params.m_engineFileName = config.m_fileModelCfg + "-" + precisionStr + "-batch" + std::to_string(config.m_batchSize) + ".engine"; + + return m_detector->Init(m_params); + } + + void Detect(const std::vector& mat_image, std::vector& vec_batch_result) override + { + vec_batch_result.clear(); + if (vec_batch_result.capacity() < mat_image.size()) + vec_batch_result.reserve(mat_image.size()); + + m_detector->Detect(mat_image, vec_batch_result); + } + + cv::Size GetInputSize() const override + { + return m_detector->GetInputSize(); + } + + private: + std::unique_ptr m_detector; + SampleYoloParams m_params; + }; + + /// + /// \brief Detector::Detector + /// + Detector::Detector() noexcept + { + } + + /// + /// \brief Detector::~Detector + /// + Detector::~Detector() + { + if (m_impl) + delete m_impl; + } + + /// + /// \brief Detector::Init + /// \param config + /// + bool Detector::Init(const Config& config) + { + if (m_impl) + delete m_impl; + + m_impl = new YoloONNXImpl(); + + bool res = m_impl->Init(config); + assert(res); + return res; + } + + /// + /// \brief Detector::Detect + /// \param mat_image + /// \param vec_batch_result + /// + void Detector::Detect(const std::vector& mat_image, std::vector& vec_batch_result) + { + m_impl->Detect(mat_image, vec_batch_result); + } + + /// + /// \brief Detector::GetInputSize + /// \return + /// + cv::Size Detector::GetInputSize() const + { + return m_impl->GetInputSize(); + } +} diff --git a/src/Detector/tensorrt_onnx/class_detector.h b/src/Detector/tensorrt_onnx/class_detector.h new file mode 100644 index 000000000..7ea989bcf --- /dev/null +++ b/src/Detector/tensorrt_onnx/class_detector.h @@ -0,0 +1,125 @@ +#ifndef CLASS_DETECTOR_H_ +#define CLASS_DETECTOR_H_ + +#include "API.h" +#include +#include + +namespace tensor_rt +{ + /// + /// \brief The Result struct + /// + struct Result + { + cv::RotatedRect m_rrect; + cv::Rect m_brect; + int m_id = -1; + float m_prob = 0.f; + cv::Mat m_boxMask; + + Result(int id, float prob, const cv::Rect& brect) + : m_brect(brect), m_id(id), m_prob(prob) + { + m_rrect = cv::RotatedRect(m_brect.tl(), cv::Point2f(static_cast(m_brect.x + m_brect.width), static_cast(m_brect.y)), m_brect.br()); + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + } + + Result(int id, float prob, const cv::RotatedRect& rrect) + : m_rrect(rrect), m_id(id), m_prob(prob) + { + m_brect = m_rrect.boundingRect(); + } + }; + + using BatchResult = std::vector; + + /// + /// \brief The ModelType enum + /// + enum ModelType + { + YOLOV3, + YOLOV3_TINY, + YOLOV4, + YOLOV4_TINY, + YOLOV5, + YOLOV6, + YOLOV7, + YOLOV7Mask, + YOLOV8, + YOLOV8_OBB, + YOLOV8Mask, + YOLOV9, + YOLOV10, + YOLOV11, + YOLOV11_OBB, + YOLOV11Mask, + YOLOV12, + RFDETR, + RFDETR_IS, + DFINE, + YOLOV13, + DFINE_IS, + YOLOV26, + YOLOV26_OBB, + YOLOV26Mask + }; + + /// + /// \brief The Precision enum + /// + enum Precision + { + INT8 = 0, + FP16, + FP32, + FP8 + }; + + /// + /// \brief The Config struct + /// + struct Config + { + std::string m_fileModelCfg = "yolov4.cfg"; + std::string m_fileModelWeights = "yolov4.weights"; + float m_detectThresh = 0.5f; + ModelType m_netType = YOLOV4; + Precision m_inferencePrecision = FP32; + int m_gpuInd = 0; + size_t m_videoMemory = 0; + uint32_t m_batchSize = 1; + std::string m_calibrationImageListFileTxt = "configs/calibration_images.txt"; + }; + + /// + /// \brief The Detector class + /// + class API Detector + { + public: + explicit Detector() noexcept; + ~Detector(); + + bool Init(const Config& config); + + void Detect(const std::vector& mat_image, std::vector& vec_batch_result); + + cv::Size GetInputSize() const; + + class Impl; + + private: + Detector(const Detector &); + const Detector& operator =(const Detector&) + { + } + + Impl* m_impl = nullptr; + }; +} +#endif // !CLASS_QH_DETECTOR_H_ diff --git a/src/Detector/tensorrt_onnx/cmake/FindCUDNN.cmake b/src/Detector/tensorrt_onnx/cmake/FindCUDNN.cmake new file mode 100644 index 000000000..7a692b055 --- /dev/null +++ b/src/Detector/tensorrt_onnx/cmake/FindCUDNN.cmake @@ -0,0 +1,104 @@ +# Distributed under the OSI-approved BSD 3-Clause License. +# Copyright Stefano Sinigardi + +#.rst: +# FindCUDNN +# -------- +# +# Result Variables +# ^^^^^^^^^^^^^^^^ +# +# This module will set the following variables in your project:: +# +# ``CUDNN_FOUND`` +# True if CUDNN found on the local system +# +# ``CUDNN_INCLUDE_DIRS`` +# Location of CUDNN header files. +# +# ``CUDNN_LIBRARIES`` +# The CUDNN libraries. +# +# ``CuDNN::CuDNN`` +# The CUDNN target +# + +include(FindPackageHandleStandardArgs) + +find_path(CUDNN_INCLUDE_DIR NAMES cudnn.h cudnn_v8.h cudnn_v7.h + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/include + PATH_SUFFIXES cuda/include include) +find_library(CUDNN_LIBRARY NAMES cudnn cudnn8 cudnn7 + HINTS $ENV{CUDA_PATH} $ENV{CUDA_TOOLKIT_ROOT_DIR} $ENV{CUDA_HOME} $ENV{CUDNN_ROOT_DIR} /usr/lib/x86_64-linux-gnu/ + PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64 cuda/lib/x64) +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v8.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v8.h CUDNN_HEADER_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_v7.h") + file(READ ${CUDNN_INCLUDE_DIR}/cudnn_v7.h CUDNN_HEADER_CONTENTS) +endif() +if(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v8.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +elseif(EXISTS "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h") + file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version_v7.h" CUDNN_VERSION_H_CONTENTS) + string(APPEND CUDNN_HEADER_CONTENTS "${CUDNN_VERSION_H_CONTENTS}") + unset(CUDNN_VERSION_H_CONTENTS) +endif() +if(CUDNN_HEADER_CONTENTS) + string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" + _CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" + _CUDNN_VERSION_MAJOR "${_CUDNN_VERSION_MAJOR}") + string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" + _CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" + _CUDNN_VERSION_MINOR "${_CUDNN_VERSION_MINOR}") + string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" + _CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}") + string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" + _CUDNN_VERSION_PATCH "${_CUDNN_VERSION_PATCH}") + if(NOT _CUDNN_VERSION_MAJOR) + set(CUDNN_VERSION "?") + else() + set(CUDNN_VERSION "${_CUDNN_VERSION_MAJOR}.${_CUDNN_VERSION_MINOR}.${_CUDNN_VERSION_PATCH}") + endif() +endif() + +set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) +set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) +mark_as_advanced(CUDNN_LIBRARY CUDNN_INCLUDE_DIR) + +find_package_handle_standard_args(CUDNN + REQUIRED_VARS CUDNN_INCLUDE_DIR CUDNN_LIBRARY + VERSION_VAR CUDNN_VERSION +) + +if(WIN32) + set(CUDNN_DLL_DIR ${CUDNN_INCLUDE_DIR}) + list(TRANSFORM CUDNN_DLL_DIR APPEND "/../bin") + find_file(CUDNN_LIBRARY_DLL NAMES cudnn64_${CUDNN_VERSION_MAJOR}.dll PATHS ${CUDNN_DLL_DIR}) +endif() + +if( CUDNN_FOUND AND NOT TARGET CuDNN::CuDNN ) + if( EXISTS "${CUDNN_LIBRARY_DLL}" ) + add_library( CuDNN::CuDNN SHARED IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY_DLL}" + IMPORTED_IMPLIB "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + else() + add_library( CuDNN::CuDNN UNKNOWN IMPORTED ) + set_target_properties( CuDNN::CuDNN PROPERTIES + IMPORTED_LOCATION "${CUDNN_LIBRARY}" + INTERFACE_INCLUDE_DIRECTORIES "${CUDNN_INCLUDE_DIR}" + IMPORTED_LINK_INTERFACE_LANGUAGES "C" ) + endif() +endif() diff --git a/src/Detector/darknet/cmake/FindCUDNN.cmake b/src/Detector/tensorrt_onnx/cmake/FindCUDNN.cmake_ similarity index 100% rename from src/Detector/darknet/cmake/FindCUDNN.cmake rename to src/Detector/tensorrt_onnx/cmake/FindCUDNN.cmake_ diff --git a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake b/src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake similarity index 80% rename from src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake rename to src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake index 7ec8d9980..fc9e92d72 100644 --- a/src/Detector/tensorrt_yolo/cmake/FindTensorRT.cmake +++ b/src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake @@ -43,15 +43,18 @@ endif() mark_as_advanced(TensorRT_INCLUDE_DIR) -if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInfer.h") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") - file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") +if(TensorRT_INCLUDE_DIR AND EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") set(TensorRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}") + message("TensorRT_VERSION_STRING from ${TensorRT_INCLUDE_DIR}/NvInferVersion.h: ${TensorRT_VERSION_STRING}") +else() + message("${TensorRT_INCLUDE_DIR}/NvInferVersion.h not found") endif() include(FindPackageHandleStandardArgs) diff --git a/src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake_ b/src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake_ new file mode 100644 index 000000000..f4f9f42c0 --- /dev/null +++ b/src/Detector/tensorrt_onnx/cmake/FindTensorRT.cmake_ @@ -0,0 +1,124 @@ +# ~~~ +# Copyright 2021 Olivier Le Doeuff +# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: +# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# This module defines the following variables: +# +# - TensorRT_FOUND: A boolean specifying whether or not TensorRT was found. +# - TensorRT_VERSION: The exact version of TensorRT found +# - TensorRT_VERSION_MAJOR: The major version of TensorRT. +# - TensorRT_VERSION_MINOR: The minor version of TensorRT. +# - TensorRT_VERSION_PATCH: The patch version of TensorRT. +# - TensorRT_VERSION_TWEAK: The tweak version of TensorRT. +# - TensorRT_INCLUDE_DIRS: The path to TensorRT ``include`` folder containing the header files required to compile a project linking against TensorRT. +# - TensorRT_LIBRARY_DIRS: The path to TensorRT library directory that contains libraries. +# +# This module create following targets: +# - trt::nvinfer +# - trt::nvinfer_plugin +# - trt::nvonnxparser +# - trt::nvparsers +# This script was inspired from https://github.com/NicolasIRAGNE/CMakeScripts +# This script was inspired from https://github.com/NVIDIA/tensorrt-laboratory/blob/master/cmake/FindTensorRT.cmake +# +# Hints +# ^^^^^ +# A user may set ``TensorRT_ROOT`` to an installation root to tell this module where to look. +# ~~~ + +if(NOT TensorRT_FIND_COMPONENTS) + set(TensorRT_FIND_COMPONENTS nvinfer nvinfer_plugin nvonnxparser) +endif() +set(TensorRT_LIBRARIES) + +# find the include directory of TensorRT +find_path( + TensorRT_INCLUDE_DIR + NAMES NvInfer.h + PATHS ${TensorRT_ROOT} ENV TensorRT_ROOT + PATH_SUFFIXES include +) + +string(FIND ${TensorRT_INCLUDE_DIR} "NOTFOUND" _include_dir_notfound) +if(NOT _include_dir_notfound EQUAL -1) + if(TensorRT_FIND_REQUIRED) + message(FATAL_ERROR "Fail to find TensorRT, please set TensorRT_ROOT. Include path not found.") + endif() + return() + +if(NOT TensorRT_LIBRARY) + foreach(search ${_TensorRT_SEARCHES}) + find_library(TRT_NVONNX_PARSER NAMES nvonnxparser ${${search}} PATH_SUFFIXES lib lib64 lib/x64) + find_library(TRT_NVINFER NAMES nvinfer ${${search}} PATH_SUFFIXES lib lib64 lib/x64) + find_library(TRT_NVINFER_PLUGIN NAMES nvinfer_plugin ${${search}} PATH_SUFFIXES lib lib64 lib/x64) + endforeach() + list(APPEND TensorRT_LIBRARY ${TRT_NVINFER} ${TRT_NVINFER_PLUGIN} ${TRT_NVONNX_PARSER}) + +endif() +set(TensorRT_INCLUDE_DIRS ${TensorRT_INCLUDE_DIR}) + +# Extract version of tensorrt +if(EXISTS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$") + file(STRINGS "${TensorRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_TWEAK REGEX "^#define NV_TENSORRT_BUILD [0-9]+.*$") + + string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}") + string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}") + string(REGEX REPLACE "^#define NV_TENSORRT_BUILD ([0-9]+).*$" "\\1" TensorRT_VERSION_TWEAK "${TensorRT_TWEAK}") + set(TensorRT_VERSION "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}.${TensorRT_VERSION_TWEAK}") +endif() + +function(_find_trt_component component) + + # Find library for component (ie nvinfer, nvparsers, etc...) + find_library( + TensorRT_${component}_LIBRARY + NAMES ${component} + PATHS ${TensorRT_ROOT} ${TENSORRT_LIBRARY_DIR} ENV TensorRT_ROOT + ) + + string(FIND ${TensorRT_${component}_LIBRARY} "NOTFOUND" _library_not_found) + + if(NOT TensorRT_LIBRARY_DIR) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIR + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIR" + ) + endif() + + if(NOT TensorRT_LIBRARY_DIRS) + get_filename_component(_path ${TensorRT_${component}_LIBRARY} DIRECTORY) + set(TensorRT_LIBRARY_DIRS + "${_path}" + CACHE INTERNAL "TensorRT_LIBRARY_DIRS" + ) + endif() + + # Library found, and doesn't already exists + if(_library_not_found EQUAL -1 AND NOT TARGET trt::${component}) + set(TensorRT_${component}_FOUND + TRUE + CACHE INTERNAL "Found ${component}" + ) + + # Create a target + add_library(trt::${component} IMPORTED INTERFACE) + target_include_directories(trt::${component} SYSTEM INTERFACE "${TensorRT_INCLUDE_DIRS}") + target_link_libraries(trt::${component} INTERFACE "${TensorRT_${component}_LIBRARY}") + set(TensorRT_LIBRARIES ${TensorRT_LIBRARIES} ${TensorRT_${component}_LIBRARY}) + endif() + +endfunction() + +# Find each components +foreach(component IN LISTS TensorRT_FIND_COMPONENTS) + _find_trt_component(${component}) +endforeach() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(TensorRT HANDLE_COMPONENTS VERSION_VAR TensorRT_VERSION REQUIRED_VARS TensorRT_INCLUDE_DIR) diff --git a/src/Detector/tensorrt_onnx/common/BatchStream.h b/src/Detector/tensorrt_onnx/common/BatchStream.h new file mode 100644 index 000000000..7bbb89ea7 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/BatchStream.h @@ -0,0 +1,380 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef BATCH_STREAM_H +#define BATCH_STREAM_H + +#include "NvInfer.h" +#include "common.h" +#include +#include +#include + +class IBatchStream +{ +public: + virtual void reset(int firstBatch) = 0; + virtual bool next() = 0; + virtual void skip(int skipCount) = 0; + virtual float* getBatch() = 0; + virtual float* getLabels() = 0; + virtual int getBatchesRead() const = 0; + virtual int getBatchSize() const = 0; + virtual nvinfer1::Dims getDims() const = 0; +}; + +class MNISTBatchStream : public IBatchStream +{ +public: + MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, const std::string& labelsFile, + const std::vector& directories) + : mBatchSize{batchSize} + , mMaxBatches{maxBatches} + , mDims{3, {1, 28, 28}} //!< We already know the dimensions of MNIST images. + { + readDataFile(locateFile(dataFile, directories)); + readLabelsFile(locateFile(labelsFile, directories)); + } + + void reset(int firstBatch) override + { + mBatchCount = firstBatch; + } + + bool next() override + { + if (mBatchCount >= mMaxBatches) + { + return false; + } + ++mBatchCount; + return true; + } + + void skip(int skipCount) override + { + mBatchCount += skipCount; + } + + float* getBatch() override + { + return mData.data() + (mBatchCount * mBatchSize * samplesCommon::volume(mDims)); + } + + float* getLabels() override + { + return mLabels.data() + (mBatchCount * mBatchSize); + } + + int getBatchesRead() const override + { + return mBatchCount; + } + + int getBatchSize() const override + { + return mBatchSize; + } + + nvinfer1::Dims getDims() const override + { + return nvinfer1::Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}}; + } + +private: + void readDataFile(const std::string& dataFilePath) + { + std::ifstream file{dataFilePath.c_str(), std::ios::binary}; + + int magicNumber, numImages, imageH, imageW; + file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); + // All values in the MNIST files are big endian. + magicNumber = samplesCommon::swapEndianness(magicNumber); + ASSERT(magicNumber == 2051 && "Magic Number does not match the expected value for an MNIST image set"); + + // Read number of images and dimensions + file.read(reinterpret_cast(&numImages), sizeof(numImages)); + file.read(reinterpret_cast(&imageH), sizeof(imageH)); + file.read(reinterpret_cast(&imageW), sizeof(imageW)); + + numImages = samplesCommon::swapEndianness(numImages); + imageH = samplesCommon::swapEndianness(imageH); + imageW = samplesCommon::swapEndianness(imageW); + + // The MNIST data is made up of unsigned bytes, so we need to cast to float and normalize. + int numElements = numImages * imageH * imageW; + std::vector rawData(numElements); + file.read(reinterpret_cast(rawData.data()), numElements * sizeof(uint8_t)); + mData.resize(numElements); + std::transform( + rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast(val) / 255.F; }); + } + + void readLabelsFile(const std::string& labelsFilePath) + { + std::ifstream file{labelsFilePath.c_str(), std::ios::binary}; + int magicNumber, numImages; + file.read(reinterpret_cast(&magicNumber), sizeof(magicNumber)); + // All values in the MNIST files are big endian. + magicNumber = samplesCommon::swapEndianness(magicNumber); + ASSERT(magicNumber == 2049 && "Magic Number does not match the expected value for an MNIST labels file"); + + file.read(reinterpret_cast(&numImages), sizeof(numImages)); + numImages = samplesCommon::swapEndianness(numImages); + + std::vector rawLabels(numImages); + file.read(reinterpret_cast(rawLabels.data()), numImages * sizeof(uint8_t)); + mLabels.resize(numImages); + std::transform( + rawLabels.begin(), rawLabels.end(), mLabels.begin(), [](uint8_t val) { return static_cast(val); }); + } + + int mBatchSize{0}; + int mBatchCount{0}; //!< The batch that will be read on the next invocation of next() + int mMaxBatches{0}; + nvinfer1::Dims mDims{}; + std::vector mData{}; + std::vector mLabels{}; +}; + +class BatchStream : public IBatchStream +{ +public: + BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::string const& suffix, + std::vector const& directories) + : mBatchSize(batchSize) + , mMaxBatches(maxBatches) + , mPrefix(prefix) + , mSuffix(suffix) + , mDataDir(directories) + { + std::ifstream file(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), std::ios::binary); + ASSERT(file.good()); + int d[4]; + file.read(reinterpret_cast(d), 4 * sizeof(int32_t)); + mDims.nbDims = 4; // The number of dimensions. + mDims.d[0] = d[0]; // Batch Size + mDims.d[1] = d[1]; // Channels + mDims.d[2] = d[2]; // Height + mDims.d[3] = d[3]; // Width + ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0); + + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); + mBatch.resize(mBatchSize * mImageSize, 0); + mLabels.resize(mBatchSize, 0); + mFileBatch.resize(mDims.d[0] * mImageSize, 0); + mFileLabels.resize(mDims.d[0], 0); + } + + BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::vector const& directories) + : BatchStream(batchSize, maxBatches, prefix, ".batch", directories) + { + } + + BatchStream(int batchSize, int maxBatches, nvinfer1::Dims const& dims, std::string const& listFile, + std::vector const& directories) + : mBatchSize(batchSize) + , mMaxBatches(maxBatches) + , mDims(dims) + , mListFile(listFile) + , mDataDir(directories) + { + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); + mBatch.resize(mBatchSize * mImageSize, 0); + mLabels.resize(mBatchSize, 0); + mFileBatch.resize(mDims.d[0] * mImageSize, 0); + mFileLabels.resize(mDims.d[0], 0); + } + + // Resets data members + void reset(int firstBatch) override + { + mBatchCount = 0; + mFileCount = 0; + mFileBatchPos = static_cast(mDims.d[0]); + skip(firstBatch); + } + + // Advance to next batch and return true, or return false if there is no batch left. + bool next() override + { + if (mBatchCount == mMaxBatches) + { + return false; + } + + for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) + { + ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); + if (mFileBatchPos == mDims.d[0] && !update()) + { + return false; + } + + // copy the smaller of: elements left to fulfill the request, or elements left in the file buffer. + csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos); + std::copy_n( + getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize); + std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos); + } + mBatchCount++; + return true; + } + + // Skips the batches + void skip(int skipCount) override + { + if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0]) + { + mFileCount += skipCount * mBatchSize / mDims.d[0]; + return; + } + + int x = mBatchCount; + for (int i = 0; i < skipCount; i++) + { + next(); + } + mBatchCount = x; + } + + float* getBatch() override + { + return mBatch.data(); + } + + float* getLabels() override + { + return mLabels.data(); + } + + int getBatchesRead() const override + { + return mBatchCount; + } + + int getBatchSize() const override + { + return static_cast(mBatchSize); + } + + nvinfer1::Dims getDims() const override + { + return mDims; + } + +private: + float* getFileBatch() + { + return mFileBatch.data(); + } + + float* getFileLabels() + { + return mFileLabels.data(); + } + + bool update() + { + if (mListFile.empty()) + { + std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir); + std::ifstream file(inputFileName.c_str(), std::ios::binary); + if (!file) + { + return false; + } + int d[4]; + file.read(reinterpret_cast(d), 4 * sizeof(int32_t)); + ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]); + file.read(reinterpret_cast(getFileBatch()), sizeof(float) * mDims.d[0] * mImageSize); + file.read(reinterpret_cast(getFileLabels()), sizeof(float) * mDims.d[0]); + } + else + { + std::vector fNames; + std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary); + if (!file) + { + return false; + } + + sample::gLogInfo << "Batch #" << mFileCount << std::endl; + file.seekg(((mBatchCount * mBatchSize)) * 7); + + for (int i = 1; i <= mBatchSize; i++) + { + std::string sName; + std::getline(file, sName); + sName = sName + ".ppm"; + sample::gLogInfo << "Calibrating with file " << sName << std::endl; + fNames.emplace_back(sName); + } + + mFileCount++; + + const int imageC = 3; + const int imageH = 300; + const int imageW = 300; + std::vector> ppms(fNames.size()); + for (uint32_t i = 0; i < fNames.size(); ++i) + { + readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]); + } + + std::vector data(samplesCommon::volume(mDims)); + const float scale = 2.0 / 255.0; + const float bias = 1.0; + long int volChl = static_cast(mDims.d[2] * mDims.d[3]); + + // Normalize input data + int64_t volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; + for (int i = 0; i < mBatchSize; ++i) + { + for (int c = 0; c < mDims.d[1]; ++c) + { + for (int j = 0; j < volChl; ++j) + { + data[i * volImg + c * volChl + j] = scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias; + } + } + } + + std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch()); + } + + mFileBatchPos = 0; + return true; + } + + int64_t mBatchSize{0}; + int mMaxBatches{0}; + int mBatchCount{0}; + int mFileCount{0}; + int mFileBatchPos{0}; + int mImageSize{0}; + std::vector mBatch; //!< Data for the batch + std::vector mLabels; //!< Labels for the batch + std::vector mFileBatch; //!< List of image files + std::vector mFileLabels; //!< List of label files + std::string mPrefix; //!< Batch file name prefix + std::string mSuffix; //!< Batch file name suffix + nvinfer1::Dims mDims; //!< Input dimensions + std::string mListFile; //!< File name of the list of image names + std::vector mDataDir; //!< Directories where the files can be found +}; + +#endif diff --git a/src/Detector/tensorrt_onnx/common/EntropyCalibrator.h b/src/Detector/tensorrt_onnx/common/EntropyCalibrator.h new file mode 100644 index 000000000..67a0130ee --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/EntropyCalibrator.h @@ -0,0 +1,136 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENTROPY_CALIBRATOR_H +#define ENTROPY_CALIBRATOR_H + +#include "BatchStream.h" +#include "NvInfer.h" + +//! \class EntropyCalibratorImpl +//! +//! \brief Implements common functionality for Entropy calibrators. +//! +template +class EntropyCalibratorImpl +{ +public: + EntropyCalibratorImpl(TBatchStream const& stream, int firstBatch, std::string const& networkName, + const char* inputBlobName, bool readCache = true) + : mStream{stream} + , mCalibrationTableName("CalibrationTable" + networkName) + , mInputBlobName(inputBlobName) + , mReadCache(readCache) + { + nvinfer1::Dims dims = mStream.getDims(); + mInputCount = samplesCommon::volume(dims); + CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float))); + mStream.reset(firstBatch); + } + + virtual ~EntropyCalibratorImpl() + { + CHECK(cudaFree(mDeviceInput)); + } + + int getBatchSize() const noexcept + { + return mStream.getBatchSize(); + } + + bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept + { + if (!mStream.next()) + { + return false; + } + CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice)); + ASSERT(!strcmp(names[0], mInputBlobName)); + bindings[0] = mDeviceInput; + return true; + } + + const void* readCalibrationCache(size_t& length) noexcept + { + mCalibrationCache.clear(); + std::ifstream input(mCalibrationTableName, std::ios::binary); + input >> std::noskipws; + if (mReadCache && input.good()) + { + std::copy(std::istream_iterator(input), std::istream_iterator(), + std::back_inserter(mCalibrationCache)); + } + length = mCalibrationCache.size(); + return length ? mCalibrationCache.data() : nullptr; + } + + void writeCalibrationCache(const void* cache, size_t length) noexcept + { + std::ofstream output(mCalibrationTableName, std::ios::binary); + output.write(reinterpret_cast(cache), length); + } + +private: + TBatchStream mStream; + size_t mInputCount; + std::string mCalibrationTableName; + const char* mInputBlobName; + bool mReadCache{true}; + void* mDeviceInput{nullptr}; + std::vector mCalibrationCache; +}; + +//! \class Int8EntropyCalibrator2 +//! +//! \brief Implements Entropy calibrator 2. +//! CalibrationAlgoType is kENTROPY_CALIBRATION_2. +//! +template +class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 +{ +public: + Int8EntropyCalibrator2(TBatchStream const& stream, int32_t firstBatch, const char* networkName, + const char* inputBlobName, bool readCache = true) + : mImpl(stream, firstBatch, networkName, inputBlobName, readCache) + { + } + + int getBatchSize() const noexcept override + { + return mImpl.getBatchSize(); + } + + bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override + { + return mImpl.getBatch(bindings, names, nbBindings); + } + + const void* readCalibrationCache(size_t& length) noexcept override + { + return mImpl.readCalibrationCache(length); + } + + void writeCalibrationCache(const void* cache, size_t length) noexcept override + { + mImpl.writeCalibrationCache(cache, length); + } + +private: + EntropyCalibratorImpl mImpl; +}; + +#endif // ENTROPY_CALIBRATOR_H diff --git a/src/Detector/tensorrt_onnx/common/ErrorRecorder.h b/src/Detector/tensorrt_onnx/common/ErrorRecorder.h new file mode 100644 index 000000000..bfb857c52 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/ErrorRecorder.h @@ -0,0 +1,138 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ERROR_RECORDER_H +#define ERROR_RECORDER_H +#include "NvInferRuntime.h" +#include "logger.h" +#include +#include +#include +#include +#include + +using nvinfer1::IErrorRecorder; +using nvinfer1::ErrorCode; + +//! +//! A simple implementation of the IErrorRecorder interface for +//! use by samples. This interface also can be used as a reference +//! implementation. +//! The sample Error recorder is based on a vector that pairs the error +//! code and the error string into a single element. It also uses +//! standard mutex's and atomics in order to make sure that the code +//! works in a multi-threaded environment. +//! +class SampleErrorRecorder : public IErrorRecorder +{ + using errorPair = std::pair; + using errorStack = std::vector; + +public: + SampleErrorRecorder() = default; + + ~SampleErrorRecorder() noexcept override {} + int32_t getNbErrors() const noexcept final + { + return mErrorStack.size(); + } + ErrorCode getErrorCode(int32_t errorIdx) const noexcept final + { + return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; + }; + IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final + { + return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); + } + // This class can never overflow since we have dynamic resize via std::vector usage. + bool hasOverflowed() const noexcept final + { + return false; + } + + // Empty the errorStack. + void clear() noexcept final + { + try + { + // grab a lock so that there is no addition while clearing. + std::lock_guard guard(mStackLock); + mErrorStack.clear(); + } + catch (const std::exception& e) + { + sample::gLogFatal << "Internal Error: " << e.what() << std::endl; + } + }; + + //! Simple helper function that + bool empty() const noexcept + { + return mErrorStack.empty(); + } + + bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final + { + try + { + std::lock_guard guard(mStackLock); + sample::gLogError << "Error[" << static_cast(val) << "]: " << desc << std::endl; + mErrorStack.push_back(errorPair(val, desc)); + } + catch (const std::exception& e) + { + sample::gLogFatal << "Internal Error: " << e.what() << std::endl; + } + // All errors are considered fatal. + return true; + } + + // Atomically increment or decrement the ref counter. + IErrorRecorder::RefCount incRefCount() noexcept final + { + return ++mRefCount; + } + IErrorRecorder::RefCount decRefCount() noexcept final + { + return --mRefCount; + } + +private: + // Simple helper functions. + const errorPair& operator[](size_t index) const noexcept + { + return mErrorStack[index]; + } + + bool invalidIndexCheck(int32_t index) const noexcept + { + // By converting signed to unsigned, we only need a single check since + // negative numbers turn into large positive greater than the size. + size_t sIndex = index; + return sIndex >= mErrorStack.size(); + } + // Mutex to hold when locking mErrorStack. + std::mutex mStackLock; + + // Reference count of the class. Destruction of the class when mRefCount + // is not zero causes undefined behavior. + std::atomic mRefCount{0}; + + // The error stack that holds the errors recorded by TensorRT. + errorStack mErrorStack; +}; // class SampleErrorRecorder +#endif // ERROR_RECORDER_H diff --git a/src/Detector/tensorrt_onnx/common/argsParser.h b/src/Detector/tensorrt_onnx/common/argsParser.h new file mode 100644 index 000000000..1f0b9025c --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/argsParser.h @@ -0,0 +1,162 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_ARGS_PARSER_H +#define TENSORRT_ARGS_PARSER_H + +#ifdef _MSC_VER +#include "getOptWin.h" +#else +#include +#endif +#include +#include +#include + +namespace samplesCommon +{ + +//! +//! \brief The SampleParams structure groups the basic parameters required by +//! all sample networks. +//! +struct SampleParams +{ + int32_t batchSize{1}; //!< Number of inputs in a batch + int32_t dlaCore{-1}; //!< Specify the DLA core to run network on. + bool int8{false}; //!< Allow runnning the network in Int8 mode. + bool fp16{false}; //!< Allow running the network in FP16 mode. + bool bf16{false}; //!< Allow running the network in BF16 mode. + std::vector dataDirs; //!< Directory paths where sample data files are stored + std::vector inputTensorNames; + std::vector outputTensorNames; + std::string timingCacheFile; //!< Path to timing cache file +}; + +//! +//! \brief The OnnxSampleParams structure groups the additional parameters required by +//! networks that use ONNX +//! +struct OnnxSampleParams : public SampleParams +{ + std::string onnxFileName; //!< Filename of ONNX file of a network +}; + +//! +//! /brief Struct to maintain command-line arguments. +//! +struct Args +{ + bool runInInt8{false}; + bool runInFp16{false}; + bool runInBf16{false}; + bool help{false}; + int32_t useDLACore{-1}; + int32_t batch{1}; + std::vector dataDirs; + std::string saveEngine; + std::string loadEngine; + bool rowOrder{true}; + std::string timingCacheFile; +}; + +//! +//! \brief Populates the Args struct with the provided command-line parameters. +//! +//! \throw invalid_argument if any of the arguments are not valid +//! +//! \return boolean If return value is true, execution can continue, otherwise program should exit +//! +inline bool parseArgs(Args& args, int32_t argc, char* argv[]) +{ + while (1) + { + int32_t arg; + static struct option long_options[] + = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'}, {"int8", no_argument, 0, 'i'}, + {"fp16", no_argument, 0, 'f'}, {"bf16", no_argument, 0, 'z'}, {"columnOrder", no_argument, 0, 'c'}, + {"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'}, + {"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, + {"timingCacheFile", required_argument, 0, 't'}, {nullptr, 0, nullptr, 0}}; + int32_t option_index = 0; + arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index); + if (arg == -1) + { + break; + } + + switch (arg) + { + case 'h': args.help = true; return true; + case 'd': + if (optarg) + { + args.dataDirs.push_back(optarg); + } + else + { + std::cerr << "ERROR: --datadir requires option argument" << std::endl; + return false; + } + break; + case 's': + if (optarg) + { + args.saveEngine = optarg; + } + break; + case 'o': + if (optarg) + { + args.loadEngine = optarg; + } + break; + case 'i': args.runInInt8 = true; break; + case 'f': args.runInFp16 = true; break; + case 'z': args.runInBf16 = true; break; + case 'c': args.rowOrder = false; break; + case 'u': + if (optarg) + { + args.useDLACore = std::stoi(optarg); + } + break; + case 'b': + if (optarg) + { + args.batch = std::stoi(optarg); + } + break; + case 't': + if (optarg) + { + args.timingCacheFile = optarg; + } + else + { + std::cerr << "ERROR: --timingCacheFile requires option argument" << std::endl; + return false; + } + break; + default: return false; + } + } + return true; +} + +} // namespace samplesCommon + +#endif // TENSORRT_ARGS_PARSER_H diff --git a/src/Detector/tensorrt_onnx/common/bfloat16.cpp b/src/Detector/tensorrt_onnx/common/bfloat16.cpp new file mode 100644 index 000000000..8222826ae --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/bfloat16.cpp @@ -0,0 +1,60 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "bfloat16.h" +#include + +namespace sample +{ + +BFloat16::operator float() const +{ + static_assert(sizeof(uint32_t) == sizeof(float), ""); + float val{0.F}; + auto bits = static_cast(mRep) << 16; + std::memcpy(&val, &bits, sizeof(uint32_t)); + return val; +} + +BFloat16::BFloat16(float x) +{ + static_assert(sizeof(uint32_t) == sizeof(float), ""); + uint32_t bits{0}; + std::memcpy(&bits, &x, sizeof(float)); + + // FP32 format: 1 sign bit, 8 bit exponent, 23 bit mantissa + // BF16 format: 1 sign bit, 8 bit exponent, 7 bit mantissa + + // Mask for exponent + constexpr uint32_t exponent = 0xFFU << 23; + + // Check if exponent is all 1s (NaN or infinite) + if ((bits & exponent) != exponent) + { + // x is finite - round to even + bits += 0x7FFFU + (bits >> 16 & 1); + } + + mRep = static_cast(bits >> 16); +} + +BFloat16 operator+(BFloat16 x, BFloat16 y) +{ + return BFloat16(static_cast(x) + static_cast(y)); +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/bfloat16.h b/src/Detector/tensorrt_onnx/common/bfloat16.h new file mode 100644 index 000000000..0d0ab9222 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/bfloat16.h @@ -0,0 +1,46 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace sample +{ + +//! Implements "Brain Floating Point": like an IEEE FP32, +//! but the significand is only 7 bits instead of 23 bits. +class BFloat16 +{ +public: + BFloat16() + : mRep(0) + { + } + + // Rounds to even if there is a tie. + BFloat16(float x); + + operator float() const; + +private: + //! Value stored in BFloat16 representation. + uint16_t mRep; +}; +BFloat16 operator+(BFloat16 x, BFloat16 y); + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/buffers.h b/src/Detector/tensorrt_onnx/common/buffers.h new file mode 100644 index 000000000..e58f2f5c1 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/buffers.h @@ -0,0 +1,456 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_BUFFERS_H +#define TENSORRT_BUFFERS_H + +#include "NvInfer.h" +#include "common.h" +#include "half.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace samplesCommon +{ + +//! +//! \brief The GenericBuffer class is a templated class for buffers. +//! +//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation, +//! deallocation, querying of buffers on both the device and the host. +//! It can handle data of arbitrary types because it stores byte buffers. +//! The template parameters AllocFunc and FreeFunc are used for the +//! allocation and deallocation of the buffer. +//! AllocFunc must be a functor that takes in (void** ptr, size_t size) +//! and returns bool. ptr is a pointer to where the allocated buffer address should be stored. +//! size is the amount of memory in bytes to allocate. +//! The boolean indicates whether or not the memory allocation was successful. +//! FreeFunc must be a functor that takes in (void* ptr) and returns void. +//! ptr is the allocated buffer address. It must work with nullptr input. +//! +template +class GenericBuffer +{ +public: + //! + //! \brief Construct an empty buffer. + //! + GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT) + : mSize(0) + , mCapacity(0) + , mType(type) + , mBuffer(nullptr) + { + } + + //! + //! \brief Construct a buffer with the specified allocation size in bytes. + //! + GenericBuffer(size_t size, nvinfer1::DataType type) + : mSize(size) + , mCapacity(size) + , mType(type) + { + if (!allocFn(&mBuffer, this->nbBytes())) + { + throw std::bad_alloc(); + } + } + + GenericBuffer(GenericBuffer&& buf) + : mSize(buf.mSize) + , mCapacity(buf.mCapacity) + , mType(buf.mType) + , mBuffer(buf.mBuffer) + { + buf.mSize = 0; + buf.mCapacity = 0; + buf.mType = nvinfer1::DataType::kFLOAT; + buf.mBuffer = nullptr; + } + + GenericBuffer& operator=(GenericBuffer&& buf) + { + if (this != &buf) + { + freeFn(mBuffer); + mSize = buf.mSize; + mCapacity = buf.mCapacity; + mType = buf.mType; + mBuffer = buf.mBuffer; + // Reset buf. + buf.mSize = 0; + buf.mCapacity = 0; + buf.mBuffer = nullptr; + } + return *this; + } + + //! + //! \brief Returns pointer to underlying array. + //! + void* data() + { + return mBuffer; + } + + //! + //! \brief Returns pointer to underlying array. + //! + const void* data() const + { + return mBuffer; + } + + //! + //! \brief Returns the size (in number of elements) of the buffer. + //! + size_t size() const + { + return mSize; + } + + //! + //! \brief Returns the size (in bytes) of the buffer. + //! + size_t nbBytes() const + { + return this->size() * samplesCommon::getElementSize(mType); + } + + //! + //! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity. + //! + void resize(size_t newSize) + { + mSize = newSize; + if (mCapacity < newSize) + { + freeFn(mBuffer); + if (!allocFn(&mBuffer, this->nbBytes())) + { + throw std::bad_alloc{}; + } + mCapacity = newSize; + } + } + + //! + //! \brief Overload of resize that accepts Dims + //! + void resize(const nvinfer1::Dims& dims) + { + return this->resize(samplesCommon::volume(dims)); + } + + ~GenericBuffer() + { + freeFn(mBuffer); + } + +private: + size_t mSize{0}, mCapacity{0}; + nvinfer1::DataType mType; + void* mBuffer; + AllocFunc allocFn; + FreeFunc freeFn; +}; + +class DeviceAllocator +{ +public: + bool operator()(void** ptr, size_t size) const + { + return cudaMalloc(ptr, size) == cudaSuccess; + } +}; + +class DeviceFree +{ +public: + void operator()(void* ptr) const + { + cudaFree(ptr); + } +}; + +class HostAllocator +{ +public: + bool operator()(void** ptr, size_t size) const + { + *ptr = malloc(size); + return *ptr != nullptr; + } +}; + +class HostFree +{ +public: + void operator()(void* ptr) const + { + free(ptr); + } +}; + +using DeviceBuffer = GenericBuffer; +using HostBuffer = GenericBuffer; + +//! +//! \brief The ManagedBuffer class groups together a pair of corresponding device and host buffers. +//! +class ManagedBuffer +{ +public: + DeviceBuffer deviceBuffer; + HostBuffer hostBuffer; +}; + +//! +//! \brief The BufferManager class handles host and device buffer allocation and deallocation. +//! +//! \details This RAII class handles host and device buffer allocation and deallocation, +//! memcpy between host and device buffers to aid with inference, +//! and debugging dumps to validate inference. The BufferManager class is meant to be +//! used to simplify buffer management and any interactions between buffers and the engine. +//! +class BufferManager +{ +public: + static const size_t kINVALID_SIZE_VALUE = ~size_t(0); + + //! + //! \brief Create a BufferManager for handling buffer interactions with engine, when the I/O tensor volumes + //! are provided + //! + BufferManager( + std::shared_ptr engine, std::vector const& volumes, int32_t batchSize = 0) + : mEngine(engine) + , mBatchSize(batchSize) + { + // Create host and device buffers + for (int32_t i = 0; i < mEngine->getNbIOTensors(); i++) + { + auto const name = engine->getIOTensorName(i); + mNames[name] = i; + + nvinfer1::DataType type = mEngine->getTensorDataType(name); + + std::unique_ptr manBuf{new ManagedBuffer()}; + manBuf->deviceBuffer = DeviceBuffer(volumes[i], type); + manBuf->hostBuffer = HostBuffer(volumes[i], type); + void* deviceBuffer = manBuf->deviceBuffer.data(); + mDeviceBindings.emplace_back(deviceBuffer); + mManagedBuffers.emplace_back(std::move(manBuf)); + } + } + + //! + //! \brief Create a BufferManager for handling buffer interactions with engine. + //! + BufferManager(std::shared_ptr engine, int32_t const batchSize = 0, + nvinfer1::IExecutionContext const* context = nullptr) + : mEngine(engine) + , mBatchSize(batchSize) + { + // Create host and device buffers + for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++) + { + auto const name = engine->getIOTensorName(i); + mNames[name] = i; + + auto dims = context ? context->getTensorShape(name) : mEngine->getTensorShape(name); + size_t vol = context || !mBatchSize ? 1 : static_cast(mBatchSize); + nvinfer1::DataType type = mEngine->getTensorDataType(name); + int32_t vecDim = mEngine->getTensorVectorizedDim(name); + if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector + { + int32_t scalarsPerVec = mEngine->getTensorComponentsPerElement(name); + dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec); + vol *= scalarsPerVec; + } + vol *= samplesCommon::volume(dims); + std::unique_ptr manBuf{new ManagedBuffer()}; + manBuf->deviceBuffer = DeviceBuffer(vol, type); + manBuf->hostBuffer = HostBuffer(vol, type); + void* deviceBuffer = manBuf->deviceBuffer.data(); + mDeviceBindings.emplace_back(deviceBuffer); + mManagedBuffers.emplace_back(std::move(manBuf)); + } + } + + //! + //! \brief Returns a vector of device buffers that you can use directly as + //! bindings for the execute and enqueue methods of IExecutionContext. + //! + std::vector& getDeviceBindings() + { + return mDeviceBindings; + } + + //! + //! \brief Returns a vector of device buffers. + //! + std::vector const& getDeviceBindings() const + { + return mDeviceBindings; + } + + //! + //! \brief Returns the device buffer corresponding to tensorName. + //! Returns nullptr if no such tensor can be found. + //! + void* getDeviceBuffer(std::string const& tensorName) const + { + return getBuffer(false, tensorName); + } + + //! + //! \brief Returns the host buffer corresponding to tensorName. + //! Returns nullptr if no such tensor can be found. + //! + void* getHostBuffer(std::string const& tensorName) const + { + return getBuffer(true, tensorName); + } + + //! + //! \brief Returns the size of the host and device buffers that correspond to tensorName. + //! Returns kINVALID_SIZE_VALUE if no such tensor can be found. + //! + size_t size(std::string const& tensorName) const + { + auto record = mNames.find(tensorName); + if (record == mNames.end()) + return kINVALID_SIZE_VALUE; + return mManagedBuffers[record->second]->hostBuffer.nbBytes(); + } + + //! + //! \brief Templated print function that dumps buffers of arbitrary type to std::ostream. + //! rowCount parameter controls how many elements are on each line. + //! A rowCount of 1 means that there is only 1 element on each line. + //! + template + void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount) + { + assert(rowCount != 0); + assert(bufSize % sizeof(T) == 0); + T* typedBuf = static_cast(buf); + size_t numItems = bufSize / sizeof(T); + for (int32_t i = 0; i < static_cast(numItems); i++) + { + // Handle rowCount == 1 case + if (rowCount == 1 && i != static_cast(numItems) - 1) + os << typedBuf[i] << std::endl; + else if (rowCount == 1) + os << typedBuf[i]; + // Handle rowCount > 1 case + else if (i % rowCount == 0) + os << typedBuf[i]; + else if (i % rowCount == rowCount - 1) + os << " " << typedBuf[i] << std::endl; + else + os << " " << typedBuf[i]; + } + } + + //! + //! \brief Copy the contents of input host buffers to input device buffers synchronously. + //! + void copyInputToDevice() + { + memcpyBuffers(true, false, false); + } + + //! + //! \brief Copy the contents of output device buffers to output host buffers synchronously. + //! + void copyOutputToHost() + { + memcpyBuffers(false, true, false); + } + + //! + //! \brief Copy the contents of input host buffers to input device buffers asynchronously. + //! + void copyInputToDeviceAsync(cudaStream_t const& stream = 0) + { + memcpyBuffers(true, false, true, stream); + } + + //! + //! \brief Copy the contents of output device buffers to output host buffers asynchronously. + //! + void copyOutputToHostAsync(cudaStream_t const& stream = 0) + { + memcpyBuffers(false, true, true, stream); + } + + ~BufferManager() = default; + +private: + void* getBuffer(bool const isHost, std::string const& tensorName) const + { + auto record = mNames.find(tensorName); + if (record == mNames.end()) + return nullptr; + return (isHost ? mManagedBuffers[record->second]->hostBuffer.data() + : mManagedBuffers[record->second]->deviceBuffer.data()); + } + + bool tenosrIsInput(const std::string& tensorName) const + { + return mEngine->getTensorIOMode(tensorName.c_str()) == nvinfer1::TensorIOMode::kINPUT; + } + + void memcpyBuffers(bool const copyInput, bool const deviceToHost, bool const async, cudaStream_t const& stream = 0) + { + for (auto const& n : mNames) + { + void* dstPtr = deviceToHost ? mManagedBuffers[n.second]->hostBuffer.data() + : mManagedBuffers[n.second]->deviceBuffer.data(); + void const* srcPtr = deviceToHost ? mManagedBuffers[n.second]->deviceBuffer.data() + : mManagedBuffers[n.second]->hostBuffer.data(); + size_t const byteSize = mManagedBuffers[n.second]->hostBuffer.nbBytes(); + const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice; + if ((copyInput && tenosrIsInput(n.first)) || (!copyInput && !tenosrIsInput(n.first))) + { + if (async) + CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream)); + else + CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType)); + } + } + } + + std::shared_ptr mEngine; //!< The pointer to the engine + int mBatchSize; //!< The batch size for legacy networks, 0 otherwise. + std::vector> mManagedBuffers; //!< The vector of pointers to managed buffers + std::vector mDeviceBindings; //!< The vector of device buffers needed for engine execution + std::unordered_map mNames; //!< The map of tensor name and index pairs +}; + +} // namespace samplesCommon + +#endif // TENSORRT_BUFFERS_H diff --git a/src/Detector/tensorrt_onnx/common/common.h b/src/Detector/tensorrt_onnx/common/common.h new file mode 100644 index 000000000..e29c6a302 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/common.h @@ -0,0 +1,943 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_COMMON_H +#define TENSORRT_COMMON_H +#include "NvInfer.h" +#if !TRT_WINML +#include "NvInferPlugin.h" +#endif +#include "logger.h" +#include "safeCommon.h" +#include "timingCache.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +// For loadLibrary +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include +#endif + +#ifdef _MSC_VER +#define FN_NAME __FUNCTION__ +#else +#define FN_NAME __func__ +#endif + +#if defined(__aarch64__) || defined(__QNX__) +#define ENABLE_DLA_API 1 +#endif + +#define CHECK_RETURN_W_MSG(status, val, errMsg) \ + do \ + { \ + if (!(status)) \ + { \ + sample::gLogError << errMsg << " Error in " << __FILE__ << ", function " << FN_NAME << "(), line " << __LINE__ \ + << std::endl; \ + return val; \ + } \ + } while (0) + +#undef ASSERT +#define ASSERT(condition) \ + do \ + { \ + if (!(condition)) \ + { \ + sample::gLogError << "Assertion failure: " << #condition << std::endl; \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + + +#define CHECK_RETURN(status, val) CHECK_RETURN_W_MSG(status, val, "") + +#define OBJ_GUARD(A) std::unique_ptr + +template +OBJ_GUARD(T) +makeObjGuard(T_* t) +{ + CHECK(!(std::is_base_of::value || std::is_same::value)); + auto deleter = [](T* t) { delete t; }; + return std::unique_ptr{static_cast(t), deleter}; +} + +constexpr long double operator"" _GiB(long double val) +{ + return val * (1 << 30); +} +constexpr long double operator"" _MiB(long double val) +{ + return val * (1 << 20); +} +constexpr long double operator"" _KiB(long double val) +{ + return val * (1 << 10); +} + +struct SimpleProfiler : public nvinfer1::IProfiler +{ + struct Record + { + float time{0}; + int count{0}; + }; + + void reportLayerTime(const char* layerName, float ms) noexcept override + { + mProfile[layerName].count++; + mProfile[layerName].time += ms; + if (std::find(mLayerNames.begin(), mLayerNames.end(), layerName) == mLayerNames.end()) + { + mLayerNames.push_back(layerName); + } + } + + SimpleProfiler(const char* name, const std::vector& srcProfilers = std::vector()) + : mName(name) + { + for (const auto& srcProfiler : srcProfilers) + { + for (const auto& rec : srcProfiler.mProfile) + { + auto it = mProfile.find(rec.first); + if (it == mProfile.end()) + { + mProfile.insert(rec); + } + else + { + it->second.time += rec.second.time; + it->second.count += rec.second.count; + } + } + } + } + + friend std::ostream& operator<<(std::ostream& out, const SimpleProfiler& value) + { + out << "========== " << value.mName << " profile ==========" << std::endl; + float totalTime = 0; + std::string layerNameStr = "TensorRT layer name"; + int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); + for (const auto& elem : value.mProfile) + { + totalTime += elem.second.time; + maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); + } + + auto old_settings = out.flags(); + auto old_precision = out.precision(); + // Output header + { + out << std::setfill(' ') << std::setw(maxLayerNameLength) << layerNameStr << " "; + out << std::setw(12) << "Runtime, " + << "%" + << " "; + out << std::setw(12) << "Invocations" + << " "; + out << std::setw(12) << "Runtime, ms" << std::endl; + } + for (size_t i = 0; i < value.mLayerNames.size(); i++) + { + const std::string layerName = value.mLayerNames[i]; + auto elem = value.mProfile.at(layerName); + out << std::setw(maxLayerNameLength) << layerName << " "; + out << std::setw(12) << std::fixed << std::setprecision(1) << (elem.time * 100.0F / totalTime) << "%" + << " "; + out << std::setw(12) << elem.count << " "; + out << std::setw(12) << std::fixed << std::setprecision(2) << elem.time << std::endl; + } + out.flags(old_settings); + out.precision(old_precision); + out << "========== " << value.mName << " total runtime = " << totalTime << " ms ==========" << std::endl; + + return out; + } + +private: + std::string mName; + std::vector mLayerNames; + std::map mProfile; +}; + +namespace samplesCommon +{ +using nvinfer1::utils::loadTimingCacheFile; +using nvinfer1::utils::buildTimingCacheFromFile; +using nvinfer1::utils::saveTimingCacheFile; +using nvinfer1::utils::updateTimingCacheFile; +// Swaps endianness of an integral type. +template ::value, int>::type = 0> +inline T swapEndianness(const T& value) +{ + uint8_t bytes[sizeof(T)]; + for (int i = 0; i < static_cast(sizeof(T)); ++i) + { + bytes[sizeof(T) - 1 - i] = *(reinterpret_cast(&value) + i); + } + return *reinterpret_cast(bytes); +} + +class HostMemory +{ +public: + HostMemory() = delete; + virtual void* data() const noexcept + { + return mData; + } + virtual std::size_t size() const noexcept + { + return mSize; + } + virtual nvinfer1::DataType type() const noexcept + { + return mType; + } + virtual ~HostMemory() {} + +protected: + HostMemory(std::size_t size, nvinfer1::DataType type) + : mData{nullptr} + , mSize(size) + , mType(type) + { + } + void* mData; + std::size_t mSize; + nvinfer1::DataType mType; +}; + +template +class TypedHostMemory : public HostMemory +{ +public: + explicit TypedHostMemory(std::size_t size) + : HostMemory(size, dataType) + { + mData = new ElemType[size]; + }; + ~TypedHostMemory() noexcept override + { + delete[](ElemType*) mData; + } + ElemType* raw() noexcept + { + return static_cast(data()); + } +}; + +using FloatMemory = TypedHostMemory; +using HalfMemory = TypedHostMemory; +using ByteMemory = TypedHostMemory; + +inline void* safeCudaMalloc(size_t memSize) +{ + void* deviceMem; + CHECK(cudaMalloc(&deviceMem, memSize)); + if (deviceMem == nullptr) + { + std::cerr << "Out of memory" << std::endl; + exit(EXIT_FAILURE); + } + return deviceMem; +} + +inline bool isDebug() +{ + return (std::getenv("TENSORRT_DEBUG") ? true : false); +} + +struct InferDeleter +{ + template + void operator()(T* obj) const + { + delete obj; + } +}; + +template +using SampleUniquePtr = std::unique_ptr; + +static auto StreamDeleter = [](cudaStream_t* pStream) { + if (pStream) + { + static_cast(cudaStreamDestroy(*pStream)); + delete pStream; + } +}; + +inline std::unique_ptr makeCudaStream() +{ + std::unique_ptr pStream(new cudaStream_t, StreamDeleter); + if (cudaStreamCreateWithFlags(pStream.get(), cudaStreamNonBlocking) != cudaSuccess) + { + pStream.reset(nullptr); + } + + return pStream; +} + +//! Return vector of indices that puts magnitudes of sequence in descending order. +template +std::vector argMagnitudeSort(Iter begin, Iter end) +{ + std::vector indices(end - begin); + std::iota(indices.begin(), indices.end(), 0); + std::sort(indices.begin(), indices.end(), [&begin](size_t i, size_t j) { return std::abs(begin[j]) < std::abs(begin[i]); }); + return indices; +} + +inline bool readReferenceFile(const std::string& fileName, std::vector& refVector) +{ + std::ifstream infile(fileName); + if (!infile.is_open()) + { + std::cout << "ERROR: readReferenceFile: Attempting to read from a file that is not open." << std::endl; + return false; + } + std::string line; + while (std::getline(infile, line)) + { + if (line.empty()) + continue; + refVector.push_back(line); + } + infile.close(); + return true; +} + +template +std::vector classify( + const std::vector& refVector, const std::vector& output, const size_t topK) +{ + const auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); + std::vector result; + result.reserve(topK); + for (size_t k = 0; k < topK; ++k) + { + result.push_back(refVector[inds[k]]); + } + return result; +} + +// Returns indices of highest K magnitudes in v. +template +std::vector topKMagnitudes(const std::vector& v, const size_t k) +{ + std::vector indices = samplesCommon::argMagnitudeSort(v.cbegin(), v.cend()); + indices.resize(k); + return indices; +} + +template +bool readASCIIFile(const std::string& fileName, const size_t size, std::vector& out) +{ + std::ifstream infile(fileName); + if (!infile.is_open()) + { + std::cout << "ERROR readASCIIFile: Attempting to read from a file that is not open." << std::endl; + return false; + } + out.clear(); + out.reserve(size); + out.assign(std::istream_iterator(infile), std::istream_iterator()); + infile.close(); + return true; +} + +template +bool writeASCIIFile(const std::string& fileName, const std::vector& in) +{ + std::ofstream outfile(fileName); + if (!outfile.is_open()) + { + std::cout << "ERROR: writeASCIIFile: Attempting to write to a file that is not open." << std::endl; + return false; + } + for (auto fn : in) + { + outfile << fn << "\n"; + } + outfile.close(); + return true; +} + +inline void print_version() +{ + std::cout << " TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH + << "." << NV_TENSORRT_BUILD << std::endl; +} + +inline std::string getFileType(const std::string& filepath) +{ + return filepath.substr(filepath.find_last_of(".") + 1); +} + +inline std::string toLower(const std::string& inp) +{ + std::string out = inp; + std::transform(out.begin(), out.end(), out.begin(), ::tolower); + return out; +} + +inline float getMaxValue(const float* buffer, int64_t size) +{ + assert(buffer != nullptr); + assert(size > 0); + return *std::max_element(buffer, buffer + size); +} + +// Ensures that every tensor used by a network has a dynamic range set. +// +// All tensors in a network must have a dynamic range specified if a calibrator is not used. +// This function is just a utility to globally fill in missing scales and zero-points for the entire network. +// +// If a tensor does not have a dyanamic range set, it is assigned inRange or outRange as follows: +// +// * If the tensor is the input to a layer or output of a pooling node, its dynamic range is derived from inRange. +// * Otherwise its dynamic range is derived from outRange. +// +// The default parameter values are intended to demonstrate, for final layers in the network, +// cases where dynamic ranges are asymmetric. +// +// The default parameter values choosen arbitrarily. Range values should be choosen such that +// we avoid underflow or overflow. Also range value should be non zero to avoid uniform zero scale tensor. +inline void setAllDynamicRanges(nvinfer1::INetworkDefinition* network, float inRange = 2.0F, float outRange = 4.0F) +{ + // Ensure that all layer inputs have a scale. + for (int i = 0; i < network->getNbLayers(); i++) + { + auto layer = network->getLayer(i); + for (int j = 0; j < layer->getNbInputs(); j++) + { + nvinfer1::ITensor* input{layer->getInput(j)}; + // Optional inputs are nullptr here and are from RNN layers. + if (input != nullptr && !input->dynamicRangeIsSet()) + { + ASSERT(input->setDynamicRange(-inRange, inRange)); + } + } + } + + // Ensure that all layer outputs have a scale. + // Tensors that are also inputs to layers are ingored here + // since the previous loop nest assigned scales to them. + for (int i = 0; i < network->getNbLayers(); i++) + { + auto layer = network->getLayer(i); + for (int j = 0; j < layer->getNbOutputs(); j++) + { + nvinfer1::ITensor* output{layer->getOutput(j)}; + // Optional outputs are nullptr here and are from RNN layers. + if (output != nullptr && !output->dynamicRangeIsSet()) + { + // Pooling must have the same input and output scales. + if (layer->getType() == nvinfer1::LayerType::kPOOLING) + { + ASSERT(output->setDynamicRange(-inRange, inRange)); + } + else + { + ASSERT(output->setDynamicRange(-outRange, outRange)); + } + } + } + } +} + +inline void setDummyInt8DynamicRanges(const nvinfer1::IBuilderConfig* c, nvinfer1::INetworkDefinition* n) +{ + // Set dummy per-tensor dynamic range if Int8 mode is requested. + if (c->getFlag(nvinfer1::BuilderFlag::kINT8)) + { + sample::gLogWarning << "Int8 calibrator not provided. Generating dummy per-tensor dynamic range. Int8 accuracy " + "is not guaranteed." + << std::endl; + setAllDynamicRanges(n); + } +} + +inline void enableDLA( + nvinfer1::IBuilder* builder, nvinfer1::IBuilderConfig* config, int useDLACore, bool allowGPUFallback = true) +{ + if (useDLACore >= 0) + { + if (builder->getNbDLACores() == 0) + { + std::cerr << "Trying to use DLA core " << useDLACore << " on a platform that doesn't have any DLA cores" + << std::endl; + assert("Error: use DLA core on a platfrom that doesn't have any DLA cores" && false); + } + if (allowGPUFallback) + { + config->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); + } + if (!config->getFlag(nvinfer1::BuilderFlag::kINT8)) + { + // User has not requested INT8 Mode. + // By default run in FP16 mode. FP32 mode is not permitted. + config->setFlag(nvinfer1::BuilderFlag::kFP16); + } + config->setDefaultDeviceType(nvinfer1::DeviceType::kDLA); + config->setDLACore(useDLACore); + } +} + +inline int32_t parseDLA(int32_t argc, char** argv) +{ + for (int32_t i = 1; i < argc; i++) + { + if (strncmp(argv[i], "--useDLACore=", 13) == 0) + { + return std::stoi(argv[i] + 13); + } + } + return -1; +} + +inline uint32_t getElementSize(nvinfer1::DataType t) noexcept +{ + switch (t) + { +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: return 8; +#endif + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: return 4; +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: +#endif + case nvinfer1::DataType::kHALF: return 2; + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kFP8: return 1; +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types"); +#endif + } + return 0; +} + +inline int64_t volume(nvinfer1::Dims const& dims, int32_t start, int32_t stop) +{ + ASSERT(start >= 0); + ASSERT(start <= stop); + ASSERT(stop <= dims.nbDims); + ASSERT(std::all_of(dims.d + start, dims.d + stop, [](int32_t x) { return x >= 0; })); + return std::accumulate(dims.d + start, dims.d + stop, int64_t{1}, std::multiplies{}); +} + +template +struct PPM +{ + std::string magic, fileName; + int h, w, max; + uint8_t buffer[C * H * W]; +}; + +// New vPPM(variable sized PPM) class with variable dimensions. +struct vPPM +{ + std::string magic, fileName; + int h, w, max; + std::vector buffer; +}; + +struct BBox +{ + float x1, y1, x2, y2; +}; + +template +void readPPMFile(const std::string& filename, samplesCommon::PPM& ppm) +{ + ppm.fileName = filename; + std::ifstream infile(filename, std::ifstream::binary); + assert(infile.is_open() && "Attempting to read from a file that is not open."); + infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; + infile.seekg(1, infile.cur); + infile.read(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); +} + +inline void readPPMFile(const std::string& filename, vPPM& ppm, std::vector& input_dir) +{ + ppm.fileName = filename; + std::ifstream infile(locateFile(filename, input_dir), std::ifstream::binary); + infile >> ppm.magic >> ppm.w >> ppm.h >> ppm.max; + infile.seekg(1, infile.cur); + + for (int i = 0; i < ppm.w * ppm.h * 3; ++i) + { + ppm.buffer.push_back(0); + } + + infile.read(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); +} + +template +void writePPMFileWithBBox(const std::string& filename, PPM& ppm, const BBox& bbox) +{ + std::ofstream outfile("./" + filename, std::ofstream::binary); + assert(!outfile.fail()); + outfile << "P6" + << "\n" + << ppm.w << " " << ppm.h << "\n" + << ppm.max << "\n"; + + auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); }; + const int x1 = std::min(std::max(0, round(int(bbox.x1))), W - 1); + const int x2 = std::min(std::max(0, round(int(bbox.x2))), W - 1); + const int y1 = std::min(std::max(0, round(int(bbox.y1))), H - 1); + const int y2 = std::min(std::max(0, round(int(bbox.y2))), H - 1); + + for (int x = x1; x <= x2; ++x) + { + // bbox top border + ppm.buffer[(y1 * ppm.w + x) * 3] = 255; + ppm.buffer[(y1 * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(y1 * ppm.w + x) * 3 + 2] = 0; + // bbox bottom border + ppm.buffer[(y2 * ppm.w + x) * 3] = 255; + ppm.buffer[(y2 * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(y2 * ppm.w + x) * 3 + 2] = 0; + } + + for (int y = y1; y <= y2; ++y) + { + // bbox left border + ppm.buffer[(y * ppm.w + x1) * 3] = 255; + ppm.buffer[(y * ppm.w + x1) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + x1) * 3 + 2] = 0; + // bbox right border + ppm.buffer[(y * ppm.w + x2) * 3] = 255; + ppm.buffer[(y * ppm.w + x2) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + x2) * 3 + 2] = 0; + } + + outfile.write(reinterpret_cast(ppm.buffer), ppm.w * ppm.h * 3); +} + +inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vector& dets) +{ + std::ofstream outfile("./" + filename, std::ofstream::binary); + assert(!outfile.fail()); + outfile << "P6" + << "\n" + << ppm.w << " " << ppm.h << "\n" + << ppm.max << "\n"; + auto round = [](float x) -> int { return int(std::floor(x + 0.5F)); }; + + for (auto bbox : dets) + { + for (int x = int(bbox.x1); x < int(bbox.x2); ++x) + { + // bbox top border + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; + // bbox bottom border + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; + ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; + } + + for (int y = int(bbox.y1); y < int(bbox.y2); ++y) + { + // bbox left border + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; + // bbox right border + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; + ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; + } + } + + outfile.write(reinterpret_cast(&ppm.buffer[0]), ppm.w * ppm.h * 3); +} + +class TimerBase +{ +public: + virtual void start() {} + virtual void stop() {} + float microseconds() const noexcept + { + return mMs * 1000.F; + } + float milliseconds() const noexcept + { + return mMs; + } + float seconds() const noexcept + { + return mMs / 1000.F; + } + void reset() noexcept + { + mMs = 0.F; + } + +protected: + float mMs{0.0F}; +}; + +class GpuTimer : public TimerBase +{ +public: + explicit GpuTimer(cudaStream_t stream) + : mStream(stream) + { + CHECK(cudaEventCreate(&mStart)); + CHECK(cudaEventCreate(&mStop)); + } + ~GpuTimer() + { + CHECK(cudaEventDestroy(mStart)); + CHECK(cudaEventDestroy(mStop)); + } + void start() override + { + CHECK(cudaEventRecord(mStart, mStream)); + } + void stop() override + { + CHECK(cudaEventRecord(mStop, mStream)); + float ms{0.0F}; + CHECK(cudaEventSynchronize(mStop)); + CHECK(cudaEventElapsedTime(&ms, mStart, mStop)); + mMs += ms; + } + +private: + cudaEvent_t mStart, mStop; + cudaStream_t mStream; +}; // class GpuTimer + +template +class CpuTimer : public TimerBase +{ +public: + using clock_type = Clock; + + void start() override + { + mStart = Clock::now(); + } + void stop() override + { + mStop = Clock::now(); + mMs += std::chrono::duration{mStop - mStart}.count(); + } + +private: + std::chrono::time_point mStart, mStop; +}; // class CpuTimer + +using PreciseCpuTimer = CpuTimer; + +inline std::vector splitString(std::string str, char delimiter = ',') +{ + std::vector splitVect; + std::stringstream ss(str); + std::string substr; + + while (ss.good()) + { + getline(ss, substr, delimiter); + splitVect.emplace_back(std::move(substr)); + } + return splitVect; +} + +inline int getC(nvinfer1::Dims const& d) +{ + return d.nbDims >= 3 ? static_cast(d.d[d.nbDims - 3]) : 1; +} + +inline int getH(const nvinfer1::Dims& d) +{ + return d.nbDims >= 2 ? static_cast(d.d[d.nbDims - 2]) : 1; +} + +inline int getW(const nvinfer1::Dims& d) +{ + return d.nbDims >= 1 ? static_cast(d.d[d.nbDims - 1]) : 1; +} + +//! Platform-agnostic wrapper around dynamic libraries. +class DynamicLibrary +{ +public: + explicit DynamicLibrary(std::string const& name) + : mLibName{name} + { +#if defined(_WIN32) + mHandle = LoadLibraryA(name.c_str()); +#else // defined(_WIN32) + int32_t flags{RTLD_LAZY}; +#if ENABLE_ASAN + // https://github.com/google/sanitizers/issues/89 + // asan doesn't handle module unloading correctly and there are no plans on doing + // so. In order to get proper stack traces, don't delete the shared library on + // close so that asan can resolve the symbols correctly. + flags |= RTLD_NODELETE; +#endif // ENABLE_ASAN + + mHandle = dlopen(name.c_str(), flags); +#endif // defined(_WIN32) + + if (mHandle == nullptr) + { + std::string errorStr{}; +#if !defined(_WIN32) + errorStr = std::string{" due to "} + std::string{dlerror()}; +#endif + throw std::runtime_error("Unable to open library: " + name + errorStr); + } + } + + DynamicLibrary(DynamicLibrary const&) = delete; + DynamicLibrary(DynamicLibrary const&&) = delete; + + //! + //! Retrieve a function symbol from the loaded library. + //! + //! \return the loaded symbol on success + //! \throw std::invalid_argument if loading the symbol failed. + //! + template + std::function symbolAddress(char const* name) + { + if (mHandle == nullptr) + { + throw std::runtime_error("Handle to library is nullptr."); + } + void* ret; +#if defined(_MSC_VER) + ret = static_cast(GetProcAddress(static_cast(mHandle), name)); +#else + ret = dlsym(mHandle, name); +#endif + if (ret == nullptr) + { + std::string const kERROR_MSG(mLibName + ": error loading symbol: " + std::string(name)); + throw std::invalid_argument(kERROR_MSG); + } + return reinterpret_cast(ret); + } + + ~DynamicLibrary() + { + try + { +#if defined(_WIN32) + ASSERT(static_cast(FreeLibrary(static_cast(mHandle)))); +#else + ASSERT(dlclose(mHandle) == 0); +#endif + } + catch (...) + { + sample::gLogError << "Unable to close library: " << mLibName << std::endl; + } + } + +private: + std::string mLibName{}; //!< Name of the DynamicLibrary + void* mHandle{}; //!< Handle to the DynamicLibrary +}; + +inline std::unique_ptr loadLibrary(std::string const& path) +{ + // make_unique not available until C++14 - we still need to support C++11 builds. + return std::unique_ptr(new DynamicLibrary{path}); +} + +inline int32_t getMaxPersistentCacheSize() +{ + int32_t deviceIndex{}; + CHECK(cudaGetDevice(&deviceIndex)); + + int32_t maxPersistentL2CacheSize{}; +#if CUDART_VERSION >= 11030 && !TRT_WINML + CHECK(cudaDeviceGetAttribute(&maxPersistentL2CacheSize, cudaDevAttrMaxPersistingL2CacheSize, deviceIndex)); +#endif + + return maxPersistentL2CacheSize; +} + +inline bool isDataTypeSupported(nvinfer1::DataType dataType) +{ + auto builder = SampleUniquePtr(createBuilder()); + if (!builder) + { + return false; + } + + if ((dataType == nvinfer1::DataType::kINT8 && !builder->platformHasFastInt8()) + || (dataType == nvinfer1::DataType::kHALF && !builder->platformHasFastFp16())) + { + return false; + } + + return true; +} +} // namespace samplesCommon + +inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) +{ + os << "("; + for (int i = 0; i < dims.nbDims; ++i) + { + os << (i ? ", " : "") << dims.d[i]; + } + return os << ")"; +} + +#endif // TENSORRT_COMMON_H diff --git a/src/Detector/tensorrt_onnx/common/dumpTFWts.py b/src/Detector/tensorrt_onnx/common/dumpTFWts.py new file mode 100644 index 000000000..70770fbd8 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/dumpTFWts.py @@ -0,0 +1,124 @@ +#!/usr/bin/python +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Script to dump TensorFlow weights in TRT v1 and v2 dump format. +# The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later. + +import sys +import struct +import argparse + +try: + import tensorflow as tf + from tensorflow.python import pywrap_tensorflow +except ImportError as err: + sys.stderr.write("""Error: Failed to import module ({})""".format(err)) + sys.exit() + +parser = argparse.ArgumentParser(description="TensorFlow Weight Dumper") + +parser.add_argument( + "-m", + "--model", + required=True, + help="The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908", +) +parser.add_argument("-o", "--output", required=True, help="The weight file to dump all the weights to.") +parser.add_argument("-1", "--wtsv1", required=False, default=False, type=bool, help="Dump the weights in the wts v1.") + +opt = parser.parse_args() + +if opt.wtsv1: + print("Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:") + print("Line 0: ") + print("Line 1-Num: [buffer name] [buffer type] [buffer size] ") +else: + print("Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:") + print("Line 0: ") + print("Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] ") + +inputbase = opt.model +outputbase = opt.output + + +def float_to_hex(f): + return hex(struct.unpack(" +#include +#include + +namespace nvinfer1 +{ +namespace utils +{ +FileLock::FileLock(ILogger& logger, std::string const& fileName) + : mLogger(logger) + , mFileName(fileName) +{ + std::string lockFileName = mFileName + ".lock"; +#ifdef _MSC_VER + { + std::stringstream ss; + ss << "Trying to set exclusive file lock " << lockFileName << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + // MS docs said this is a blocking IO if "FILE_FLAG_OVERLAPPED" is not provided + mHandle = CreateFileA(lockFileName.c_str(), GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, 0, NULL); + if (mHandle == INVALID_HANDLE_VALUE) + { + throw std::runtime_error("Failed to lock " + lockFileName + "!"); + } +#elif defined(__QNX__) + // We once enabled the file lock on QNX, lockf(F_TLOCK) return -1 and the reported error is + // The error generated was 89, which means that the function is not implemented. +#else + mHandle = fopen(lockFileName.c_str(), "wb+"); + if (mHandle == nullptr) + { + throw std::runtime_error("Cannot open " + lockFileName + "!"); + } + { + std::stringstream ss; + ss << "Trying to set exclusive file lock " << lockFileName << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + mDescriptor = fileno(mHandle); + auto ret = lockf(mDescriptor, F_LOCK, 0); + if (ret != 0) + { + mDescriptor = -1; + fclose(mHandle); + throw std::runtime_error("Failed to lock " + lockFileName + "!"); + } +#endif +} + +FileLock::~FileLock() +{ + std::string lockFileName = mFileName + ".lock"; +#ifdef _MSC_VER + if (mHandle != INVALID_HANDLE_VALUE) + { + CloseHandle(mHandle); + } +#elif defined(__QNX__) + // We once enabled the file lock on QNX, lockf(F_TLOCK) return -1 and the reported error is + // The error generated was 89 + // That means : Function not implemented +#else + if (mDescriptor != -1) + { + auto ret = lockf(mDescriptor, F_ULOCK, 0); + if (mHandle != nullptr) + { + fclose(mHandle); + } + if (ret != 0) + { + std::stringstream ss; + ss << "Failed to unlock " << lockFileName << ", please remove " << lockFileName << ".lock manually!" + << std::endl; + mLogger.log(ILogger::Severity::kVERBOSE, ss.str().c_str()); + } + } +#endif +} +} // namespace utils +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_onnx/common/fileLock.h b/src/Detector/tensorrt_onnx/common/fileLock.h new file mode 100644 index 000000000..d0f64a5b4 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/fileLock.h @@ -0,0 +1,86 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_SAMPLES_COMMON_FILELOCK_H_ +#define TENSORRT_SAMPLES_COMMON_FILELOCK_H_ +#include "NvInfer.h" +#ifdef _MSC_VER +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include // fileno +#include // lockf +#endif +#include + +namespace nvinfer1 +{ +namespace utils +{ +//! +//! \brief RAII object that locks a the specified file. +//! +//! The FileLock class uses a lock file to specify that the +//! current file is being used by a TensorRT tool or sample +//! so that things like the TimingCache can be updated across +//! processes without having conflicts. +//! +class FileLock +{ +public: + FileLock(nvinfer1::ILogger& logger, std::string const& fileName); + ~FileLock(); + FileLock() = delete; // no default ctor + FileLock(FileLock const&) = delete; // no copy ctor + FileLock& operator=(FileLock const&) = delete; // no copy assignment + FileLock(FileLock&&) = delete; // no move ctor + FileLock& operator=(FileLock&&) = delete; // no move assignment + +private: + //! + //! The logger that emits any error messages that might show up. + //! + nvinfer1::ILogger& mLogger; + + //! + //! The filename that the FileLock is protecting from multiple + //! TensorRT processes from writing to. + //! + std::string const mFileName; + +#ifdef _MSC_VER + //! + //! The file handle on windows for the file lock. + //! + HANDLE mHandle{}; +#else + //! + //! The file handle on linux for the file lock. + //! + FILE* mHandle{}; + //! + //! The file descriptor on linux of the file lock. + //! + int32_t mDescriptor{-1}; +#endif +}; // class FileLock +} // namespace utils +} // namespace nvinfer1 + +#endif // TENSORRT_SAMPLES_COMMON_FILELOCK_H_ diff --git a/src/Detector/tensorrt_onnx/common/getOptions.cpp b/src/Detector/tensorrt_onnx/common/getOptions.cpp new file mode 100644 index 000000000..19cd32811 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/getOptions.cpp @@ -0,0 +1,248 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "getOptions.h" +#include "logger.h" + +#include +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace utility +{ + +//! Matching for TRTOptions is defined as follows: +//! +//! If A and B both have longName set, A matches B if and only if A.longName == +//! B.longName and (A.shortName == B.shortName if both have short name set). +//! +//! If A only has shortName set and B only has longName set, then A does not +//! match B. It is assumed that when 2 TRTOptions are compared, one of them is +//! the definition of a TRTOption in the input to getOptions. As such, if the +//! definition only has shortName set, it will never be equal to a TRTOption +//! that does not have shortName set (and same for longName). +//! +//! If A and B both have shortName set but B does not have longName set, A +//! matches B if and only if A.shortName == B.shortName. +//! +//! If A has neither long or short name set, A matches B if and only if B has +//! neither long or short name set. +bool matches(const TRTOption& a, const TRTOption& b) +{ + if (!a.longName.empty() && !b.longName.empty()) + { + if (a.shortName && b.shortName) + { + return (a.longName == b.longName) && (a.shortName == b.shortName); + } + return a.longName == b.longName; + } + + // If only one of them is not set, this will return false anyway. + return a.shortName == b.shortName; +} + +//! getTRTOptionIndex returns the index of a TRTOption in a vector of +//! TRTOptions, -1 if not found. +int getTRTOptionIndex(const std::vector& options, const TRTOption& opt) +{ + for (size_t i = 0; i < options.size(); ++i) + { + if (matches(opt, options[i])) + { + return i; + } + } + return -1; +} + +//! validateTRTOption will return a string containing an error message if options +//! contain non-numeric characters, or if there are duplicate option names found. +//! Otherwise, returns the empty string. +std::string validateTRTOption( + const std::set& seenShortNames, const std::set& seenLongNames, const TRTOption& opt) +{ + if (opt.shortName != 0) + { + if (!std::isalnum(opt.shortName)) + { + return "Short name '" + std::to_string(opt.shortName) + "' is non-alphanumeric"; + } + + if (seenShortNames.find(opt.shortName) != seenShortNames.end()) + { + return "Short name '" + std::to_string(opt.shortName) + "' is a duplicate"; + } + } + + if (!opt.longName.empty()) + { + for (const char& c : opt.longName) + { + if (!std::isalnum(c) && c != '-' && c != '_') + { + return "Long name '" + opt.longName + "' contains characters that are not '-', '_', or alphanumeric"; + } + } + + if (seenLongNames.find(opt.longName) != seenLongNames.end()) + { + return "Long name '" + opt.longName + "' is a duplicate"; + } + } + return ""; +} + +//! validateTRTOptions will return a string containing an error message if any +//! options contain non-numeric characters, or if there are duplicate option +//! names found. Otherwise, returns the empty string. +std::string validateTRTOptions(const std::vector& options) +{ + std::set seenShortNames; + std::set seenLongNames; + for (size_t i = 0; i < options.size(); ++i) + { + const std::string errMsg = validateTRTOption(seenShortNames, seenLongNames, options[i]); + if (!errMsg.empty()) + { + return "Error '" + errMsg + "' at TRTOption " + std::to_string(i); + } + + seenShortNames.insert(options[i].shortName); + seenLongNames.insert(options[i].longName); + } + return ""; +} + +//! parseArgs parses an argument list and returns a TRTParsedArgs with the +//! fields set accordingly. Assumes that options is validated. +//! ErrMsg will be set if: +//! - an argument is null +//! - an argument is empty +//! - an argument does not have option (i.e. "-" and "--") +//! - a short argument has more than 1 character +//! - the last argument in the list requires a value +TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector& options) +{ + TRTParsedArgs parsedArgs; + parsedArgs.values.resize(options.size()); + + for (int i = 1; i < argc; ++i) // index of current command-line argument + { + if (argv[i] == nullptr) + { + return TRTParsedArgs{"Null argument at index " + std::to_string(i)}; + } + + const std::string argStr(argv[i]); + if (argStr.empty()) + { + return TRTParsedArgs{"Empty argument at index " + std::to_string(i)}; + } + + // No starting hyphen means it is a positional argument + if (argStr[0] != '-') + { + parsedArgs.positionalArgs.push_back(argStr); + continue; + } + + if (argStr == "-" || argStr == "--") + { + return TRTParsedArgs{"Argument does not specify an option at index " + std::to_string(i)}; + } + + // If only 1 hyphen, char after is the flag. + TRTOption opt{' ', "", false, ""}; + std::string value; + if (argStr[1] != '-') + { + // Must only have 1 char after the hyphen + if (argStr.size() > 2) + { + return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)}; + } + opt.shortName = argStr[1]; + } + else + { + opt.longName = argStr.substr(2); + + // We need to support --foo=bar syntax, so look for '=' + const size_t eqIndex = opt.longName.find('='); + if (eqIndex < opt.longName.size()) + { + value = opt.longName.substr(eqIndex + 1); + opt.longName = opt.longName.substr(0, eqIndex); + } + } + + const int idx = getTRTOptionIndex(options, opt); + if (idx < 0) + { + continue; + } + + if (options[idx].valueRequired) + { + if (!value.empty()) + { + parsedArgs.values[idx].second.push_back(value); + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + continue; + } + + if (i + 1 >= argc) + { + return TRTParsedArgs{"Last argument requires value, but none given"}; + } + + const std::string nextArg(argv[i + 1]); + if (nextArg.size() >= 1 && nextArg[0] == '-') + { + sample::gLogWarning << "Warning: Using '" << nextArg << "' as a value for '" << argStr + << "', Should this be its own flag?" << std::endl; + } + + parsedArgs.values[idx].second.push_back(nextArg); + i += 1; // Next argument already consumed + + parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + } + else + { + parsedArgs.values[idx].first += 1; + } + } + return parsedArgs; +} + +TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector& options) +{ + const std::string errMsg = validateTRTOptions(options); + if (!errMsg.empty()) + { + return TRTParsedArgs{errMsg}; + } + return parseArgs(argc, argv, options); +} +} // namespace utility +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_onnx/common/getOptions.h b/src/Detector/tensorrt_onnx/common/getOptions.h new file mode 100644 index 000000000..4bbf9e275 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/getOptions.h @@ -0,0 +1,128 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_GET_OPTIONS_H +#define TRT_GET_OPTIONS_H + +#include +#include +#include + +namespace nvinfer1 +{ +namespace utility +{ + +//! TRTOption defines a command line option. At least 1 of shortName and longName +//! must be defined. +//! If bool initialization is undefined behavior on your system, valueRequired +//! must also be explicitly defined. +//! helpText is optional. +struct TRTOption +{ + char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b) + std::string longName; //!< Option name in long (double hyphen) form (i.e. --foo, --bar) + bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, --foo bar) + std::string helpText; //!< Text to show when printing out the command usage +}; + +//! TRTParsedArgs is returned by getOptions after it has parsed a command line +//! argument list (argv). +//! +//! errMsg is a string containing an error message if any errors occurred. If it +//! is empty, no errors occurred. +//! +//! values stores a vector of pairs for each option (ordered by order in the +//! input). Each pair contains an int (the number of occurrences) and a vector +//! of strings (a list of values). The user should know which of these to use, +//! and which options required values. For non-value options, only occurrences is +//! populated. For value-required options, occurrences == # of values. Values do +//! not need to be unique. +//! +//! positionalArgs stores additional arguments that are passed in without an +//! option (these must not start with a hyphen). +struct TRTParsedArgs +{ + std::string errMsg; + std::vector>> values; + std::vector positionalArgs; +}; + +//! Parse the input arguments passed to main() and extract options as well as +//! positional arguments. +//! +//! Options are supposed to be passed to main() with a preceding hyphen '-'. +//! +//! If there is a single preceding hyphen, there should be exactly 1 character +//! after the hyphen, which is interpreted as the option. +//! +//! If there are 2 preceding hyphens, the entire argument (without the hyphens) +//! is interpreted as the option. +//! +//! If the option requires a value, the next argument is used as the value. +//! +//! Positional arguments must not start with a hyphen. +//! +//! If an argument requires a value, the next argument is interpreted as the +//! value, even if it is the form of a valid option (i.e. --foo --bar will store +//! "--bar" as a value for option "foo" if "foo" requires a value). +//! We also support --name=value syntax. In this case, 'value' would be used as +//! the value, NOT the next argument. +//! +//! For options: +//! { { 'a', "", false }, +//! { 'b', "", false }, +//! { 0, "cee", false }, +//! { 'd', "", true }, +//! { 'e', "", true }, +//! { 'f', "foo", true } } +//! +//! ./main hello world -a -a --cee -d 12 -f 34 +//! and +//! ./main hello world -a -a --cee -d 12 --foo 34 +//! +//! will result in: +//! +//! TRTParsedArgs { +//! errMsg: "", +//! values: { { 2, {} }, +//! { 0, {} }, +//! { 1, {} }, +//! { 1, {"12"} }, +//! { 0, {} }, +//! { 1, {"34"} } } +//! positionalArgs: {"hello", "world"}, +//! } +//! +//! Non-POSIX behavior: +//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each +//! option must have its own hyphen prefix. +//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be +//! whitespace-separated from the option it is for. +//! +//! @param[in] argc The number of arguments passed to main (including the +//! file name, which is disregarded) +//! @param[in] argv The arguments passed to main (including the file name, +//! which is disregarded) +//! @param[in] options List of TRTOptions to parse +//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of +//! the fields. +TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector& options); +} // namespace utility +} // namespace nvinfer1 + +#endif // TRT_GET_OPTIONS_H diff --git a/src/Detector/tensorrt_onnx/common/getopt.c b/src/Detector/tensorrt_onnx/common/getopt.c new file mode 100644 index 000000000..c1da08b5b --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/getopt.c @@ -0,0 +1,568 @@ +/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */ +/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */ + +/* + * Copyright (c) 2002 Todd C. Miller + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Sponsored in part by the Defense Advanced Research Projects + * Agency (DARPA) and Air Force Research Laboratory, Air Force + * Materiel Command, USAF, under agreement number F39502-99-1-0512. + */ +/*- + * Copyright (c) 2000 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Dieter Baron and Thomas Klausner. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "getoptWin.h" +#include +#include +#include +#include +#include +#include + +#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */ + +#ifdef REPLACE_GETOPT +int opterr = 1; /* if error message should be printed */ +int optind = 1; /* index into parent argv vector */ +int optopt = '?'; /* character checked for validity */ +#undef optreset /* see getopt.h */ +#define optreset __mingw_optreset +int optreset; /* reset getopt */ +char* optarg; /* argument associated with option */ +#endif + +#define PRINT_ERROR ((opterr) && (*options != ':')) + +#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */ +#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */ +#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */ + +/* return values */ +#define BADCH (int) '?' +#define BADARG ((*options == ':') ? (int) ':' : (int) '?') +#define INORDER (int) 1 + +#ifndef __CYGWIN__ +#define __progname __argv[0] +#else +extern char __declspec(dllimport) * __progname; +#endif + +#ifdef __CYGWIN__ +static char EMSG[] = ""; +#else +#define EMSG "" +#endif + +static int getopt_internal(int, char* const*, char const*, const struct option*, int*, int); +static int parse_long_options(char* const*, char const*, const struct option*, int*, int); +static int gcd(int, int); +static void permute_args(int, int, int, char* const*); + +static char* place = EMSG; /* option letter processing */ + +/* XXX: set optreset to 1 rather than these two */ +static int nonopt_start = -1; /* first non option argument (for permute) */ +static int nonopt_end = -1; /* first option after non options (for permute) */ + +/* Error messages */ +static char const recargchar[] = "option requires an argument -- %c"; +static char const recargstring[] = "option requires an argument -- %s"; +static char const ambig[] = "ambiguous option -- %.*s"; +static char const noarg[] = "option doesn't take an argument -- %.*s"; +static char const illoptchar[] = "unknown option -- %c"; +static char const illoptstring[] = "unknown option -- %s"; + +static void _vwarnx(char const* fmt, va_list ap) +{ + (void) fprintf(stderr, "%s: ", __progname); + if (fmt != NULL) + (void) vfprintf(stderr, fmt, ap); + (void) fprintf(stderr, "\n"); +} + +static void warnx(char const* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + _vwarnx(fmt, ap); + va_end(ap); +} + +/* + * Compute the greatest common divisor of a and b. + */ +static int gcd(int a, int b) +{ + int c; + + c = a % b; + while (c != 0) + { + a = b; + b = c; + c = a % b; + } + + return (b); +} + +/* + * Exchange the block from nonopt_start to nonopt_end with the block + * from nonopt_end to opt_end (keeping the same order of arguments + * in each block). + */ +static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv) +{ + int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos; + char* swap; + + /* + * compute lengths of blocks and number and size of cycles + */ + nnonopts = panonopt_end - panonopt_start; + nopts = opt_end - panonopt_end; + ncycle = gcd(nnonopts, nopts); + cyclelen = (opt_end - panonopt_start) / ncycle; + + for (i = 0; i < ncycle; i++) + { + cstart = panonopt_end + i; + pos = cstart; + for (j = 0; j < cyclelen; j++) + { + if (pos >= panonopt_end) + pos -= nnonopts; + else + pos += nopts; + swap = nargv[pos]; + /* LINTED const cast */ + ((char**) nargv)[pos] = nargv[cstart]; + /* LINTED const cast */ + ((char**) nargv)[cstart] = swap; + } + } +} + +/* + * parse_long_options -- + * Parse long options in argc/argv argument vector. + * Returns -1 if short_too is set and the option does not match long_options. + */ +static int parse_long_options( + char* const* nargv, char const* options, const struct option* long_options, int* idx, int short_too) +{ + char *current_argv, *has_equal; + size_t current_argv_len; + int i, ambiguous, match; + +#define IDENTICAL_INTERPRETATION(_x, _y) \ + (long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \ + && long_options[(_x)].val == long_options[(_y)].val) + + current_argv = place; + match = -1; + ambiguous = 0; + + optind++; + + if ((has_equal = strchr(current_argv, '=')) != NULL) + { + /* argument found (--option=arg) */ + current_argv_len = has_equal - current_argv; + has_equal++; + } + else + current_argv_len = strlen(current_argv); + + for (i = 0; long_options[i].name; i++) + { + /* find matching long option */ + if (strncmp(current_argv, long_options[i].name, current_argv_len)) + continue; + + if (strlen(long_options[i].name) == current_argv_len) + { + /* exact match */ + match = i; + ambiguous = 0; + break; + } + /* + * If this is a known short option, don't allow + * a partial match of a single character. + */ + if (short_too && current_argv_len == 1) + continue; + + if (match == -1) /* partial match */ + match = i; + else if (!IDENTICAL_INTERPRETATION(i, match)) + ambiguous = 1; + } + if (ambiguous) + { + /* ambiguous abbreviation */ + if (PRINT_ERROR) + warnx(ambig, (int) current_argv_len, current_argv); + optopt = 0; + return (BADCH); + } + if (match != -1) + { /* option found */ + if (long_options[match].has_arg == no_argument && has_equal) + { + if (PRINT_ERROR) + warnx(noarg, (int) current_argv_len, current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + return (BADARG); + } + if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument) + { + if (has_equal) + optarg = has_equal; + else if (long_options[match].has_arg == required_argument) + { + /* + * optional argument doesn't use next nargv + */ + optarg = nargv[optind++]; + } + } + if ((long_options[match].has_arg == required_argument) && (optarg == NULL)) + { + /* + * Missing argument; leading ':' indicates no error + * should be generated. + */ + if (PRINT_ERROR) + warnx(recargstring, current_argv); + /* + * XXX: GNU sets optopt to val regardless of flag + */ + if (long_options[match].flag == NULL) + optopt = long_options[match].val; + else + optopt = 0; + --optind; + return (BADARG); + } + } + else + { /* unknown option */ + if (short_too) + { + --optind; + return (-1); + } + if (PRINT_ERROR) + warnx(illoptstring, current_argv); + optopt = 0; + return (BADCH); + } + if (idx) + *idx = match; + if (long_options[match].flag) + { + *long_options[match].flag = long_options[match].val; + return (0); + } + else + return (long_options[match].val); +#undef IDENTICAL_INTERPRETATION +} + +/* + * getopt_internal -- + * Parse argc/argv argument vector. Called by user level routines. + */ +static int getopt_internal( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx, int flags) +{ + char const* oli; /* option letter list index */ + int optchar, short_too; + static int posixly_correct = -1; + + if (options == NULL) + return (-1); + + /* + * XXX Some GNU programs (like cvs) set optind to 0 instead of + * XXX using optreset. Work around this braindamage. + */ + if (optind == 0) + optind = optreset = 1; + + /* + * Disable GNU extensions if POSIXLY_CORRECT is set or options + * string begins with a '+'. + * + * CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or + * optreset != 0 for GNU compatibility. + */ + if (posixly_correct == -1 || optreset != 0) + posixly_correct = (getenv("POSIXLY_CORRECT") != NULL); + if (*options == '-') + flags |= FLAG_ALLARGS; + else if (posixly_correct || *options == '+') + flags &= ~FLAG_PERMUTE; + if (*options == '+' || *options == '-') + options++; + + optarg = NULL; + if (optreset) + nonopt_start = nonopt_end = -1; +start: + if (optreset || !*place) + { /* update scanning pointer */ + optreset = 0; + if (optind >= nargc) + { /* end of argument vector */ + place = EMSG; + if (nonopt_end != -1) + { + /* do permutation, if we have to */ + permute_args(nonopt_start, nonopt_end, optind, nargv); + optind -= nonopt_end - nonopt_start; + } + else if (nonopt_start != -1) + { + /* + * If we skipped non-options, set optind + * to the first of them. + */ + optind = nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL)) + { + place = EMSG; /* found non-option */ + if (flags & FLAG_ALLARGS) + { + /* + * GNU extension: + * return non-option as argument to option 1 + */ + optarg = nargv[optind++]; + return (INORDER); + } + if (!(flags & FLAG_PERMUTE)) + { + /* + * If no permutation wanted, stop parsing + * at first non-option. + */ + return (-1); + } + /* do permutation */ + if (nonopt_start == -1) + nonopt_start = optind; + else if (nonopt_end != -1) + { + permute_args(nonopt_start, nonopt_end, optind, nargv); + nonopt_start = optind - (nonopt_end - nonopt_start); + nonopt_end = -1; + } + optind++; + /* process next argument */ + goto start; + } + if (nonopt_start != -1 && nonopt_end == -1) + nonopt_end = optind; + + /* + * If we have "-" do nothing, if "--" we are done. + */ + if (place[1] != '\0' && *++place == '-' && place[1] == '\0') + { + optind++; + place = EMSG; + /* + * We found an option (--), so if we skipped + * non-options, we have to permute. + */ + if (nonopt_end != -1) + { + permute_args(nonopt_start, nonopt_end, optind, nargv); + optind -= nonopt_end - nonopt_start; + } + nonopt_start = nonopt_end = -1; + return (-1); + } + } + + /* + * Check long options if: + * 1) we were passed some + * 2) the arg is not just "-" + * 3) either the arg starts with -- we are getopt_long_only() + */ + if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY))) + { + short_too = 0; + if (*place == '-') + place++; /* --foo long option */ + else if (*place != ':' && strchr(options, *place) != NULL) + short_too = 1; /* could be short option too */ + + optchar = parse_long_options(nargv, options, long_options, idx, short_too); + if (optchar != -1) + { + place = EMSG; + return (optchar); + } + } + + if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0') + || (oli = strchr(options, optchar)) == NULL) + { + /* + * If the user specified "-" and '-' isn't listed in + * options, return -1 (non-option) as per POSIX. + * Otherwise, it is an unknown option character (or ':'). + */ + if (optchar == (int) '-' && *place == '\0') + return (-1); + if (!*place) + ++optind; + if (PRINT_ERROR) + warnx(illoptchar, optchar); + optopt = optchar; + return (BADCH); + } + if (long_options != NULL && optchar == 'W' && oli[1] == ';') + { + /* -W long-option */ + if (*place) /* no space */ + /* NOTHING */; + else if (++optind >= nargc) + { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } + else /* white space */ + place = nargv[optind]; + optchar = parse_long_options(nargv, options, long_options, idx, 0); + place = EMSG; + return (optchar); + } + if (*++oli != ':') + { /* doesn't take argument */ + if (!*place) + ++optind; + } + else + { /* takes (optional) argument */ + optarg = NULL; + if (*place) /* no white space */ + optarg = place; + else if (oli[1] != ':') + { /* arg not optional */ + if (++optind >= nargc) + { /* no arg */ + place = EMSG; + if (PRINT_ERROR) + warnx(recargchar, optchar); + optopt = optchar; + return (BADARG); + } + else + optarg = nargv[optind]; + } + place = EMSG; + ++optind; + } + /* dump back option letter */ + return (optchar); +} + +#ifdef REPLACE_GETOPT +/* + * getopt -- + * Parse argc/argv argument vector. + * + * [eventually this will replace the BSD getopt] + */ +int getopt(int nargc, char* const* nargv, char const* options) +{ + + /* + * We don't pass FLAG_PERMUTE to getopt_internal() since + * the BSD getopt(3) (unlike GNU) has never done this. + * + * Furthermore, since many privileged programs call getopt() + * before dropping privileges it makes sense to keep things + * as simple (and bug-free) as possible. + */ + return (getopt_internal(nargc, nargv, options, NULL, NULL, 0)); +} +#endif /* REPLACE_GETOPT */ + +/* + * getopt_long -- + * Parse argc/argv argument vector. + */ +int getopt_long(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE)); +} + +/* + * getopt_long_only -- + * Parse argc/argv argument vector. + */ +int getopt_long_only(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx) +{ + + return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY)); +} diff --git a/src/Detector/tensorrt_onnx/common/getoptWin.h b/src/Detector/tensorrt_onnx/common/getoptWin.h new file mode 100644 index 000000000..a1dc6ffa9 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/getoptWin.h @@ -0,0 +1,124 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GETOPT_H__ +/** + * DISCLAIMER + * This file has no copyright assigned and is placed in the Public Domain. + * This file is a part of the w64 mingw-runtime package. + * + * The w64 mingw-runtime package and its code is distributed in the hope that it + * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR + * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to + * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#define __GETOPT_H__ + +/* All the headers include this file. */ +#include + +#if defined(WINGETOPT_SHARED_LIB) +#if defined(BUILDING_WINGETOPT_DLL) +#define WINGETOPT_API __declspec(dllexport) +#else +#define WINGETOPT_API __declspec(dllimport) +#endif +#else +#define WINGETOPT_API +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + + WINGETOPT_API extern int optind; /* index of first non-option in argv */ + WINGETOPT_API extern int optopt; /* single option character, as parsed */ + WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */ + /* (user may set to zero, to suppress) */ + + WINGETOPT_API extern char* optarg; /* pointer to argument of current option */ + + extern int getopt(int nargc, char* const* nargv, char const* options); + +#ifdef _BSD_SOURCE +/* + * BSD adds the non-standard `optreset' feature, for reinitialisation + * of `getopt' parsing. We support this feature, for applications which + * proclaim their BSD heritage, before including this header; however, + * to maintain portability, developers are advised to avoid it. + */ +#define optreset __mingw_optreset + extern int optreset; +#endif +#ifdef __cplusplus +} +#endif +/* + * POSIX requires the `getopt' API to be specified in `unistd.h'; + * thus, `unistd.h' includes this header. However, we do not want + * to expose the `getopt_long' or `getopt_long_only' APIs, when + * included in this manner. Thus, close the standard __GETOPT_H__ + * declarations block, and open an additional __GETOPT_LONG_H__ + * specific block, only when *not* __UNISTD_H_SOURCED__, in which + * to declare the extended API. + */ +#endif /* !defined(__GETOPT_H__) */ + +#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) +#define __GETOPT_LONG_H__ + +#ifdef __cplusplus +extern "C" +{ +#endif + + struct option /* specification for a long form option... */ + { + char const* name; /* option name, without leading hyphens */ + int has_arg; /* does it take an argument? */ + int* flag; /* where to save its status, or NULL */ + int val; /* its associated status value */ + }; + + enum /* permitted values for its `has_arg' field... */ + { + no_argument = 0, /* option never takes an argument */ + required_argument, /* option always requires an argument */ + optional_argument /* option may take an argument */ + }; + + extern int getopt_long( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx); + extern int getopt_long_only( + int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx); +/* + * Previous MinGW implementation had... + */ +#ifndef HAVE_DECL_GETOPT +/* + * ...for the long form API only; keep this for compatibility. + */ +#define HAVE_DECL_GETOPT 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */ diff --git a/src/Detector/tensorrt_onnx/common/half.h b/src/Detector/tensorrt_onnx/common/half.h new file mode 100644 index 000000000..b997e7db6 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/half.h @@ -0,0 +1,4303 @@ +// half - IEEE 754-based half-precision floating point library. +// +// Copyright (c) 2012-2017 Christian Rau +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +// Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Version 1.12.0 + +/// \file +/// Main header file for half precision functionality. + +#ifndef HALF_HALF_HPP +#define HALF_HALF_HPP + +/// Combined gcc version number. +#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// check C++11 language features +#if defined(__clang__) // clang +#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +/*#elif defined(__INTEL_COMPILER) //Intel C++ + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif*/ +#elif defined(__GNUC__) // gcc +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#endif +#elif defined(_MSC_VER) // Visual C++ +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#define HALF_POP_WARNINGS 1 +#pragma warning(push) +#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features +#include +#if defined(_LIBCPP_VERSION) // libc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#elif defined(__GLIBCXX__) // libstdc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifdef __clang__ +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#else +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ +#if _CPPLIB_VER >= 520 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#if _CPPLIB_VER >= 610 +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#endif +#endif +#undef HALF_GNUC_VERSION + +// support constexpr +#if HALF_ENABLE_CPP11_CONSTEXPR +#define HALF_CONSTEXPR constexpr +#define HALF_CONSTEXPR_CONST constexpr +#else +#define HALF_CONSTEXPR +#define HALF_CONSTEXPR_CONST const +#endif + +// support noexcept +#if HALF_ENABLE_CPP11_NOEXCEPT +#define HALF_NOEXCEPT noexcept +#define HALF_NOTHROW noexcept +#else +#define HALF_NOEXCEPT +#define HALF_NOTHROW throw() +#endif + +#include +#include +#include +#include +#include +#include +#if HALF_ENABLE_CPP11_TYPE_TRAITS +#include +#endif +#if HALF_ENABLE_CPP11_CSTDINT +#include +#endif +#if HALF_ENABLE_CPP11_HASH +#include +#endif + +/// Default rounding mode. +/// This specifies the rounding mode used for all conversions between [half](\ref half_float::half)s and `float`s as +/// well as for the half_cast() if not specifying a rounding mode explicitly. It can be redefined (before including +/// half.hpp) to one of the standard rounding modes using their respective constants or the equivalent values of +/// `std::float_round_style`: +/// +/// `std::float_round_style` | value | rounding +/// ---------------------------------|-------|------------------------- +/// `std::round_indeterminate` | -1 | fastest (default) +/// `std::round_toward_zero` | 0 | toward zero +/// `std::round_to_nearest` | 1 | to nearest +/// `std::round_toward_infinity` | 2 | toward positive infinity +/// `std::round_toward_neg_infinity` | 3 | toward negative infinity +/// +/// By default this is set to `-1` (`std::round_indeterminate`), which uses truncation (round toward zero, but with +/// overflows set to infinity) and is the fastest rounding mode possible. It can even be set to +/// `std::numeric_limits::round_style` to synchronize the rounding mode with that of the underlying +/// single-precision implementation. +#ifndef HALF_ROUND_STYLE +#define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#endif + +/// Tie-breaking behaviour for round to nearest. +/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this +/// is defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way +/// cases (and thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more +/// IEEE-conformant behaviour is needed. +#ifndef HALF_ROUND_TIES_TO_EVEN +#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero +#endif + +/// Value signaling overflow. +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow +/// of an operation, in particular it just evaluates to positive infinity. +#define HUGE_VALH std::numeric_limits::infinity() + +/// Fast half-precision fma function. +/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate +/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all +/// arithmetic operations, this is in fact always the case. +#define FP_FAST_FMAH 1 + +#ifndef FP_ILOGB0 +#define FP_ILOGB0 INT_MIN +#endif +#ifndef FP_ILOGBNAN +#define FP_ILOGBNAN INT_MAX +#endif +#ifndef FP_SUBNORMAL +#define FP_SUBNORMAL 0 +#endif +#ifndef FP_ZERO +#define FP_ZERO 1 +#endif +#ifndef FP_NAN +#define FP_NAN 2 +#endif +#ifndef FP_INFINITE +#define FP_INFINITE 3 +#endif +#ifndef FP_NORMAL +#define FP_NORMAL 4 +#endif + +/// Main namespace for half precision functionality. +/// This namespace contains all the functionality provided by the library. +namespace half_float +{ +class half; + +#if HALF_ENABLE_CPP11_USER_LITERALS +/// Library-defined half-precision literals. +/// Import this namespace to enable half-precision floating point literals: +/// ~~~~{.cpp} +/// using namespace half_float::literal; +/// half_float::half = 4.2_h; +/// ~~~~ +namespace literal +{ +half operator"" _h(long double); +} +#endif + +/// \internal +/// \brief Implementation details. +namespace detail +{ +#if HALF_ENABLE_CPP11_TYPE_TRAITS +/// Conditional type. +template +struct conditional : std::conditional +{ +}; + +/// Helper for tag dispatching. +template +struct bool_type : std::integral_constant +{ +}; +using std::false_type; +using std::true_type; + +/// Type traits for floating point types. +template +struct is_float : std::is_floating_point +{ +}; +#else +/// Conditional type. +template +struct conditional +{ + typedef T type; +}; +template +struct conditional +{ + typedef F type; +}; + +/// Helper for tag dispatching. +template +struct bool_type +{ +}; +typedef bool_type true_type; +typedef bool_type false_type; + +/// Type traits for floating point types. +template +struct is_float : false_type +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +#endif + +/// Type traits for floating point bits. +template +struct bits +{ + typedef unsigned char type; +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; + +#if HALF_ENABLE_CPP11_CSTDINT +/// Unsigned integer of (at least) 16 bits width. +typedef std::uint_least16_t uint16; + +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits +{ + typedef std::uint_least32_t type; +}; + +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef std::uint_least64_t type; +}; +#else +/// Unsigned integer of (at least) 16 bits width. +typedef unsigned short uint16; + +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits : conditional::digits >= 32, unsigned int, unsigned long> +{ +}; + +#if HALF_ENABLE_CPP11_LONG_LONG +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits : conditional::digits >= 64, unsigned long, unsigned long long> +{ +}; +#else +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef unsigned long type; +}; +#endif +#endif + +/// Tag type for binary construction. +struct binary_t +{ +}; + +/// Tag for binary construction. +HALF_CONSTEXPR_CONST binary_t binary = binary_t(); + +/// Temporary half-precision expression. +/// This class represents a half-precision expression which just stores a single-precision value internally. +struct expr +{ + /// Conversion constructor. + /// \param f single-precision value to convert + explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + HALF_CONSTEXPR operator float() const HALF_NOEXCEPT + { + return value_; + } + +private: + /// Internal expression value stored in single-precision. + float value_; +}; + +/// SFINAE helper for generic half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member equivalent to \a T. +/// \tparam T type to return +template +struct enable +{ +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; + +/// Return type for specialized generic 2-argument half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member denoting the appropriate return type. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct result : enable +{ +}; +template <> +struct result +{ + typedef half type; +}; + +/// \name Classification helpers +/// \{ + +/// Check for infinity. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if infinity +/// \retval false else +template +bool builtin_isinf(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); +#elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); +#else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); +#endif +} + +/// Check for NaN. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if not a number +/// \retval false else +template +bool builtin_isnan(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); +#elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; +#else + return arg != arg; +#endif +} + +/// Check sign. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if signbit set +/// \retval false else +template +bool builtin_signbit(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); +#else + return arg < T() || (arg == T() && T(1) / arg < T()); +#endif +} + +/// \} +/// \name Conversion +/// \{ + +/// Convert IEEE single-precision to half-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value single-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(float value, true_type) +{ + typedef bits::type uint32; + uint32 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(float)); + /* uint16 hbits = (bits>>16) & 0x8000; + bits &= 0x7FFFFFFF; + int exp = bits >> 23; + if(exp == 255) + return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); + if(exp > 142) + { + if(R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits>>15); + if(R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits>>15); + return hbits | 0x7BFF + (R!=std::round_toward_zero); + } + int g, s; + if(exp > 112) + { + g = (bits>>12) & 1; + s = (bits&0xFFF) != 0; + hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); + } + else if(exp > 101) + { + int i = 125 - exp; + bits = (bits&0x7FFFFF) | 0x800000; + g = (bits>>i) & 1; + s = (bits&((1L<> (i+1); + } + else + { + g = 0; + s = bits != 0; + } + if(R == std::round_to_nearest) + #if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s|hbits); + #else + hbits += g; + #endif + else if(R == std::round_toward_infinity) + hbits += ~(hbits>>15) & (s|g); + else if(R == std::round_toward_neg_infinity) + hbits += (hbits>>15) & (g|s); + */ + static const uint16 base_table[512] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, + 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, + 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, + 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, + 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, + 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; + static const unsigned char shift_table[512] = {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13}; + uint16 hbits = base_table[bits >> 23] + static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); + if (R == std::round_to_nearest) + hbits += (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | (((bits >> 23) & 0xFF) == 102)) + & ((hbits & 0x7C00) != 0x7C00) +#if HALF_ROUND_TIES_TO_EVEN + & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & bits) != 0) | hbits) +#endif + ; + else if (R == std::round_toward_zero) + hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; + else if (R == std::round_toward_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 102) & ((bits >> 23) != 0))) + & (hbits < 0x7C00)) + - ((hbits == 0xFC00) & ((bits >> 23) != 511)); + else if (R == std::round_toward_neg_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 358) & ((bits >> 23) != 256))) + & (hbits < 0xFC00) & (hbits >> 15)) + - ((hbits == 0x7C00) & ((bits >> 23) != 255)); + return hbits; +} + +/// Convert IEEE double-precision to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value double-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(double value, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint64 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(double)); + uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; + uint16 hbits = (hi >> 16) & 0x8000; + hi &= 0x7FFFFFFF; + int exp = hi >> 20; + if (exp == 2047) + return hbits | 0x7C00 | (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); + if (exp > 1038) + { + if (R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits >> 15); + if (R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits >> 15); + return hbits | 0x7BFF + (R != std::round_toward_zero); + } + int g, s = lo != 0; + if (exp > 1008) + { + g = (hi >> 9) & 1; + s |= (hi & 0x1FF) != 0; + hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); + } + else if (exp > 997) + { + int i = 1018 - exp; + hi = (hi & 0xFFFFF) | 0x100000; + g = (hi >> i) & 1; + s |= (hi & ((1L << i) - 1)) != 0; + hbits |= hi >> (i + 1); + } + else + { + g = 0; + s |= hi != 0; + } + if (R == std::round_to_nearest) +#if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s | hbits); +#else + hbits += g; +#endif + else if (R == std::round_toward_infinity) + hbits += ~(hbits >> 15) & (s | g); + else if (R == std::round_toward_neg_infinity) + hbits += (hbits >> 15) & (g | s); + return hbits; +} + +/// Convert non-IEEE floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(T value, ...) +{ + uint16 hbits = static_cast(builtin_signbit(value)) << 15; + if (value == T()) + return hbits; + if (builtin_isnan(value)) + return hbits | 0x7FFF; + if (builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if (exp > 16) + { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + else if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + if (exp < -13) + value = std::ldexp(value, 24); + else + { + value = std::ldexp(value, 11 - exp); + hbits |= ((exp + 13) << 10); + } + T ival, frac = std::modf(value, &ival); + hbits += static_cast(std::abs(static_cast(ival))); + if (R == std::round_to_nearest) + { + frac = std::abs(frac); +#if HALF_ROUND_TIES_TO_EVEN + hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); +#else + hbits += frac >= T(0.5); +#endif + } + else if (R == std::round_toward_infinity) + hbits += frac > T(); + else if (R == std::round_toward_neg_infinity) + hbits += frac < T(); + return hbits; +} + +/// Convert floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half(T value) +{ + return float2half_impl( + value, bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); +} + +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam S `true` if value negative, `false` else +/// \tparam T type to convert (builtin integer type) +/// \param value non-negative integral value +/// \return binary representation of half-precision value +template +uint16 int2half_impl(T value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); +#endif + if (S) + value = -value; + uint16 bits = S << 15; + if (value > 0xFFFF) + { + if (R == std::round_toward_infinity) + bits |= 0x7C00 - S; + else if (R == std::round_toward_neg_infinity) + bits |= 0x7BFF + S; + else + bits |= 0x7BFF + (R != std::round_toward_zero); + } + else if (value) + { + uint32_t m = value, exp = 24; + for (; m < 0x400; m <<= 1, --exp) + ; + for (; m > 0x7FF; m >>= 1, ++exp) + ; + bits |= (exp << 10) + m; + if (exp > 24) + { + if (R == std::round_to_nearest) + bits += (value >> (exp - 25)) & 1 +#if HALF_ROUND_TIES_TO_EVEN + & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) +#endif + ; + else if (R == std::round_toward_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; + else if (R == std::round_toward_neg_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; + } + } + return bits; +} + +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert (builtin integer type) +/// \param value integral value +/// \return binary representation of half-precision value +template +uint16 int2half(T value) +{ + return (value < 0) ? int2half_impl(value) : int2half_impl(value); +} + +/// Convert half-precision to IEEE single-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \param value binary representation of half-precision value +/// \return single-precision value +inline float half2float_impl(uint16 value, float, true_type) +{ + typedef bits::type uint32; + /* uint32 bits = static_cast(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + bits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,bits-=0x800000) ; + bits += static_cast(abs) << 13; + } + */ + static const uint32 mantissa_table[2048] = {0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, + 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, + 0x35700000, 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, + 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 0x36000000, + 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, + 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 0x36400000, 0x36440000, 0x36480000, + 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, + 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, + 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, + 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, + 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, + 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, + 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, + 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, + 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, + 0x370D0000, 0x370E0000, 0x370F0000, 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, + 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, + 0x371F0000, 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, + 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 0x37300000, + 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, + 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 0x37400000, 0x37410000, 0x37420000, + 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, + 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, + 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, + 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, + 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, + 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, + 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, + 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, + 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, + 0x378E8000, 0x378F0000, 0x378F8000, 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, + 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, + 0x37978000, 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, + 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 0x37A00000, + 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, + 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 0x37A80000, 0x37A88000, 0x37A90000, + 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, + 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, + 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, + 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, + 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, + 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, + 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, + 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, + 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, + 0x37D68000, 0x37D70000, 0x37D78000, 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, + 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, + 0x37DF8000, 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, + 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 0x37E80000, + 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, + 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 0x37F00000, 0x37F08000, 0x37F10000, + 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, + 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, + 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, + 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, + 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, + 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, + 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, + 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, + 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, + 0x380F4000, 0x380F8000, 0x380FC000, 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, + 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, + 0x3813C000, 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, + 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 0x38180000, + 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, + 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 0x381C0000, 0x381C4000, 0x381C8000, + 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, + 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, + 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, + 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, + 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, + 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, + 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, + 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, + 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, + 0x38334000, 0x38338000, 0x3833C000, 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, + 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, + 0x3837C000, 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, + 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 0x383C0000, + 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, + 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 0x38400000, 0x38404000, 0x38408000, + 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, + 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, + 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, + 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, + 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, + 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, + 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, + 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, + 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, + 0x38574000, 0x38578000, 0x3857C000, 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, + 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, + 0x385BC000, 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, + 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 0x38600000, + 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, + 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 0x38640000, 0x38644000, 0x38648000, + 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, + 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, + 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, + 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, + 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, + 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, + 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, + 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, + 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, + 0x387B4000, 0x387B8000, 0x387BC000, 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, + 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, + 0x387FC000, 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, + 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 0x38020000, + 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, + 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 0x38040000, 0x38042000, 0x38044000, + 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, + 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, + 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, + 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, + 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, + 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, + 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, + 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, + 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, + 0x380FA000, 0x380FC000, 0x380FE000, 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, + 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, + 0x3811E000, 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, + 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 0x38140000, + 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, + 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 0x38160000, 0x38162000, 0x38164000, + 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, + 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, + 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, + 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, + 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, + 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, + 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, + 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, + 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, + 0x3821A000, 0x3821C000, 0x3821E000, 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, + 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, + 0x3823E000, 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, + 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 0x38260000, + 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, + 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 0x38280000, 0x38282000, 0x38284000, + 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, + 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, + 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, + 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, + 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, + 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, + 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, + 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, + 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, + 0x3833A000, 0x3833C000, 0x3833E000, 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, + 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, + 0x3835E000, 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, + 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 0x38380000, + 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, + 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 0x383A0000, 0x383A2000, 0x383A4000, + 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, + 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, + 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, + 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, + 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, + 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, + 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, + 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, + 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, + 0x3845A000, 0x3845C000, 0x3845E000, 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, + 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, + 0x3847E000, 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, + 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 0x384A0000, + 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, + 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 0x384C0000, 0x384C2000, 0x384C4000, + 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, + 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, + 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, + 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, + 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, + 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, + 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, + 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, + 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, + 0x3857A000, 0x3857C000, 0x3857E000, 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, + 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, + 0x3859E000, 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, + 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 0x385C0000, + 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, + 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 0x385E0000, 0x385E2000, 0x385E4000, + 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, + 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, + 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, + 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, + 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, + 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, + 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, + 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, + 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, + 0x3869A000, 0x3869C000, 0x3869E000, 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, + 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, + 0x386BE000, 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, + 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 0x386E0000, + 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, + 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 0x38700000, 0x38702000, 0x38704000, + 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, + 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, + 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, + 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, + 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, + 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, + 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, + 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, + 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, + 0x387BA000, 0x387BC000, 0x387BE000, 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, + 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, + 0x387DE000, 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, + 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000}; + static const uint32 exponent_table[64] = {0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, + 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, + 0x07800000, 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, + 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 0x80000000, + 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, + 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 0x88000000, 0x88800000, 0x89000000, + 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, + 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; + static const unsigned short offset_table[64] = {0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; + uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + exponent_table[value >> 10]; + // return *reinterpret_cast(&bits); //violating strict aliasing! + float out; + std::memcpy(&out, &bits, sizeof(float)); + return out; +} + +/// Convert half-precision to IEEE double-precision. +/// \param value binary representation of half-precision value +/// \return double-precision value +inline double half2float_impl(uint16 value, double, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint32 hi = static_cast(value & 0x8000) << 16; + int abs = value & 0x7FFF; + if (abs) + { + hi |= 0x3F000000 << static_cast(abs >= 0x7C00); + for (; abs < 0x400; abs <<= 1, hi -= 0x100000) + ; + hi += static_cast(abs) << 10; + } + uint64 bits = static_cast(hi) << 32; + // return *reinterpret_cast(&bits); //violating strict aliasing! + double out; + std::memcpy(&out, &bits, sizeof(double)); + return out; +} + +/// Convert half-precision to non-IEEE floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float_impl(uint16 value, T, ...) +{ + T out; + int abs = value & 0x7FFF; + if (abs > 0x7C00) + out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if (abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if (abs > 0x3FF) + out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); + else + out = std::ldexp(static_cast(abs), -24); + return (value & 0x8000) ? -out : out; +} + +/// Convert half-precision to floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float(uint16 value) +{ + return half2float_impl( + value, T(), bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); +} + +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return integral value +template +T half2int_impl(uint16 value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); +#endif + uint32_t e = value & 0x7FFF; + if (e >= 0x7C00) + return (value & 0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + if (e < 0x3800) + { + if (R == std::round_toward_infinity) + return T(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + return -T(value > 0x8000); + return T(); + } + uint32_t m = (value & 0x3FF) | 0x400; + e >>= 10; + if (e < 25) + { + if (R == std::round_to_nearest) + m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); + else if (R == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); + else if (R == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (25 - e)) - 1U); + m >>= 25 - e; + } + else + m <<= e - 25; + return (value & 0x8000) ? -static_cast(m) : static_cast(m); +} + +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return integral value +template +T half2int(uint16 value) +{ + return half2int_impl(value); +} + +/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) \param value binary representation of half-precision value \return integral value +template +T half2int_up(uint16 value) +{ + return half2int_impl(value); +} + +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half_impl(uint16 value) +{ + uint32_t e = value & 0x7FFF; + uint16 result = value; + if (e < 0x3C00) + { + result &= 0x8000; + if (R == std::round_to_nearest) + result |= 0x3C00U & -(e >= (0x3800 + E)); + else if (R == std::round_toward_infinity) + result |= 0x3C00U & -(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + result |= 0x3C00U & -(value > 0x8000); + } + else if (e < 0x6400) + { + e = 25 - (e >> 10); + uint32_t mask = (1 << e) - 1; + if (R == std::round_to_nearest) + result += (1 << (e - 1)) - (~(result >> e) & E); + else if (R == std::round_toward_infinity) + result += mask & ((value >> 15) - 1); + else if (R == std::round_toward_neg_infinity) + result += mask & -(value >> 15); + result &= ~mask; + } + return result; +} + +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half(uint16 value) +{ + return round_half_impl(value); +} + +/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +inline uint16 round_half_up(uint16 value) +{ + return round_half_impl(value); +} +/// \} + +struct functions; +template +struct unary_specialized; +template +struct binary_specialized; +template +struct half_caster; +} // namespace detail + +/// Half-precision floating point type. +/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and +/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and +/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations +/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to +/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic +/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong +/// half-precision type). +/// +/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and +/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which +/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the +/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be +/// of exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will +/// most probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying +/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 +/// bits if your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the +/// case on nearly any reasonable platform. +/// +/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable +/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. +class half +{ + friend struct detail::functions; + friend struct detail::unary_specialized; + friend struct detail::binary_specialized; + template + friend struct detail::half_caster; + friend class std::numeric_limits; +#if HALF_ENABLE_CPP11_HASH + friend struct std::hash; +#endif +#if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator"" _h(long double); +#endif + +public: + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Copy constructor. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + half(detail::expr rhs) + : data_(detail::float2half(static_cast(rhs))) + { + } + + /// Conversion constructor. + /// \param rhs float to convert + explicit half(float rhs) + : data_(detail::float2half(rhs)) + { + } + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const + { + return detail::half2float(data_); + } + + /// Assignment operator. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + /// \return reference to this half + half& operator=(detail::expr rhs) + { + return *this = static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + template + typename detail::enable::type operator+=(T rhs) + { + return *this += static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + template + typename detail::enable::type operator-=(T rhs) + { + return *this -= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + template + typename detail::enable::type operator*=(T rhs) + { + return *this *= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + template + typename detail::enable::type operator/=(T rhs) + { + return *this /= static_cast(rhs); + } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + half& operator=(float rhs) + { + data_ = detail::float2half(rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + half& operator+=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) + rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + half& operator-=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) - rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + half& operator*=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) * rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + half& operator/=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) / rhs); + return *this; + } + + /// Prefix increment. + /// \return incremented half value + half& operator++() + { + return *this += 1.0F; + } + + /// Prefix decrement. + /// \return decremented half value + half& operator--() + { + return *this -= 1.0F; + } + + /// Postfix increment. + /// \return non-incremented half value + half operator++(int) + { + half out(*this); + ++*this; + return out; + } + + /// Postfix decrement. + /// \return non-decremented half value + half operator--(int) + { + half out(*this); + --*this; + return out; + } + +private: + /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} + + /// Internal binary representation + detail::uint16 data_; +}; + +#if HALF_ENABLE_CPP11_USER_LITERALS +namespace literal +{ +/// Half literal. +/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due +/// to rather involved conversions. +/// \param value literal value +/// \return half with given value (if representable) +inline half operator"" _h(long double value) +{ + return half(detail::binary, detail::float2half(value)); +} +} // namespace literal +#endif + +namespace detail +{ +/// Wrapper implementing unspecialized half-precision functions. +struct functions +{ + /// Addition implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision sum stored in single-precision + static expr plus(float x, float y) + { + return expr(x + y); + } + + /// Subtraction implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision difference stored in single-precision + static expr minus(float x, float y) + { + return expr(x - y); + } + + /// Multiplication implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision product stored in single-precision + static expr multiplies(float x, float y) + { + return expr(x * y); + } + + /// Division implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision quotient stored in single-precision + static expr divides(float x, float y) + { + return expr(x / y); + } + + /// Output implementation. + /// \param out stream to write to + /// \param arg value to write + /// \return reference to stream + template + static std::basic_ostream& write(std::basic_ostream& out, float arg) + { + return out << arg; + } + + /// Input implementation. + /// \param in stream to read from + /// \param arg half to read into + /// \return reference to stream + template + static std::basic_istream& read(std::basic_istream& in, half& arg) + { + float f; + if (in >> f) + arg = f; + return in; + } + + /// Modulo implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr fmod(float x, float y) + { + return expr(std::fmod(x, y)); + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr remainder(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remainder(x, y)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return expr(builtin_signbit(x) ? -0.0f : 0.0f); + ax = std::fmod(ax, ay + ay); + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + if (ax >= y2) + ax -= ay; + } + return expr(builtin_signbit(x) ? -ax : ax); +#endif + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \param quo address to store quotient bits at + /// \return Half-precision division remainder stored in single-precision + static expr remquo(float x, float y, int* quo) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remquo(x, y, quo)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + bool sign = builtin_signbit(x), qsign = static_cast(sign ^ builtin_signbit(y)); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); + ax = std::fmod(ax, 8.0f * ay); + int cquo = 0; + if (ax >= 4.0f * ay) + { + ax -= 4.0f * ay; + cquo += 4; + } + if (ax >= 2.0f * ay) + { + ax -= 2.0f * ay; + cquo += 2; + } + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + ++cquo; + if (ax >= y2) + { + ax -= ay; + ++cquo; + } + } + return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); +#endif + } + + /// Positive difference implementation. + /// \param x first operand + /// \param y second operand + /// \return Positive difference stored in single-precision + static expr fdim(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fdim(x, y)); +#else + return expr((x <= y) ? 0.0f : (x - y)); +#endif + } + + /// Fused multiply-add implementation. + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return \a x * \a y + \a z stored in single-precision + static expr fma(float x, float y, float z) + { +#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) + return expr(std::fma(x, y, z)); +#else + return expr(x * y + z); +#endif + } + + /// Get NaN. + /// \return Half-precision quiet NaN + static half nanh() + { + return half(binary, 0x7FFF); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp(float arg) + { + return expr(std::exp(arg)); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr expm1(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::expm1(arg)); +#else + return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); +#endif + } + + /// Binary exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::exp2(arg)); +#else + return expr(static_cast(std::exp(arg * 0.69314718055994530941723212145818))); +#endif + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log(float arg) + { + return expr(std::log(arg)); + } + + /// Common logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log10(float arg) + { + return expr(std::log10(arg)); + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log1p(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log1p(arg)); +#else + return expr(static_cast(std::log(1.0 + arg))); +#endif + } + + /// Binary logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log2(arg)); +#else + return expr(static_cast(std::log(static_cast(arg)) * 1.4426950408889634073599246810019)); +#endif + } + + /// Square root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sqrt(float arg) + { + return expr(std::sqrt(arg)); + } + + /// Cubic root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cbrt(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::cbrt(arg)); +#else + if (builtin_isnan(arg) || builtin_isinf(arg)) + return expr(arg); + return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0 / 3.0)) + : static_cast(std::pow(static_cast(arg), 1.0 / 3.0))); +#endif + } + + /// Hypotenuse implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr hypot(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::hypot(x, y)); +#else + return expr((builtin_isinf(x) || builtin_isinf(y)) + ? std::numeric_limits::infinity() + : static_cast(std::sqrt(static_cast(x) * x + static_cast(y) * y))); +#endif + } + + /// Power implementation. + /// \param base value to exponentiate + /// \param exp power to expontiate to + /// \return function value stored in single-preicision + static expr pow(float base, float exp) + { + return expr(std::pow(base, exp)); + } + + /// Sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sin(float arg) + { + return expr(std::sin(arg)); + } + + /// Cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cos(float arg) + { + return expr(std::cos(arg)); + } + + /// Tan implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tan(float arg) + { + return expr(std::tan(arg)); + } + + /// Arc sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asin(float arg) + { + return expr(std::asin(arg)); + } + + /// Arc cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acos(float arg) + { + return expr(std::acos(arg)); + } + + /// Arc tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atan(float arg) + { + return expr(std::atan(arg)); + } + + /// Arc tangent implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr atan2(float x, float y) + { + return expr(std::atan2(x, y)); + } + + /// Hyperbolic sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sinh(float arg) + { + return expr(std::sinh(arg)); + } + + /// Hyperbolic cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cosh(float arg) + { + return expr(std::cosh(arg)); + } + + /// Hyperbolic tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tanh(float arg) + { + return expr(std::tanh(arg)); + } + + /// Hyperbolic area sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asinh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::asinh(arg)); +#else + return expr((arg == -std::numeric_limits::infinity()) + ? arg + : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); +#endif + } + + /// Hyperbolic area cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acosh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::acosh(arg)); +#else + return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() + : static_cast(std::log(arg + std::sqrt(arg * arg - 1.0)))); +#endif + } + + /// Hyperbolic area tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atanh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::atanh(arg)); +#else + return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); +#endif + } + + /// Error function implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erf(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erf(arg)); +#else + return expr(static_cast(erf(static_cast(arg)))); +#endif + } + + /// Complementary implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erfc(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erfc(arg)); +#else + return expr(static_cast(1.0 - erf(static_cast(arg)))); +#endif + } + + /// Gamma logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr lgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::lgamma(arg)); +#else + if (builtin_isinf(arg)) + return expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::infinity()); + return expr(static_cast(1.1447298858494001741434273513531 + - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - lgamma(1.0 - arg))); + } + return expr(static_cast(lgamma(static_cast(arg)))); +#endif + } + + /// Gamma implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::tgamma(arg)); +#else + if (arg == 0.0f) + return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) + : expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::quiet_NaN()); + double value = 3.1415926535897932384626433832795 + / (std::sin(3.1415926535897932384626433832795 * f) * std::exp(lgamma(1.0 - arg))); + return expr(static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); + } + if (builtin_isinf(arg)) + return expr(arg); + return expr(static_cast(std::exp(lgamma(static_cast(arg))))); +#endif + } + + /// Floor implementation. + /// \param arg value to round + /// \return rounded value + static half floor(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Ceiling implementation. + /// \param arg value to round + /// \return rounded value + static half ceil(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Truncation implementation. + /// \param arg value to round + /// \return rounded value + static half trunc(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half round(half arg) + { + return half(binary, round_half_up(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half rint(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lrint(half arg) + { + return detail::half2int(arg.data_); + } + +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llrint(half arg) + { + return detail::half2int(arg.data_); + } +#endif + + /// Decompression implementation. + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return normalized significant + static half frexp(half arg, int* exp) + { + int m = arg.data_ & 0x7FFF, e = -14; + if (m >= 0x7C00 || !m) + return *exp = 0, arg; + for (; m < 0x400; m <<= 1, --e) + ; + return *exp = e + (m >> 10), half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); + } + + /// Decompression implementation. + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + static half modf(half arg, half* iptr) + { + uint32_t e = arg.data_ & 0x7FFF; + if (e >= 0x6400) + return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); + if (e < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + e >>= 10; + uint32_t mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if (!m) + return half(binary, arg.data_ & 0x8000); + for (; m < 0x400; m <<= 1, --e) + ; + return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | (m & 0x3FF))); + } + + /// Scaling implementation. + /// \param arg number to scale + /// \param exp power of two to scale by + /// \return scaled number + static half scalbln(half arg, long exp) + { + uint32_t m = arg.data_ & 0x7FFF; + if (m >= 0x7C00 || !m) + return arg; + for (; m < 0x400; m <<= 1, --exp) + ; + exp += m >> 10; + uint16 value = arg.data_ & 0x8000; + if (exp > 30) + { + if (half::round_style == std::round_toward_zero) + value |= 0x7BFF; + else if (half::round_style == std::round_toward_infinity) + value |= 0x7C00 - (value >> 15); + else if (half::round_style == std::round_toward_neg_infinity) + value |= 0x7BFF + (value >> 15); + else + value |= 0x7C00; + } + else if (exp > 0) + value |= (exp << 10) | (m & 0x3FF); + else if (exp > -11) + { + m = (m & 0x3FF) | 0x400; + if (half::round_style == std::round_to_nearest) + { + m += 1 << -exp; +#if HALF_ROUND_TIES_TO_EVEN + m -= (m >> (1 - exp)) & 1; +#endif + } + else if (half::round_style == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); + else if (half::round_style == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (1 - exp)) - 1U); + value |= m >> (1 - exp); + } + else if (half::round_style == std::round_toward_infinity) + value -= (value >> 15) - 1; + else if (half::round_style == std::round_toward_neg_infinity) + value += value >> 15; + return half(binary, value); + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return FP_ILOGB0; + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + return exp; + } + if (abs > 0x7C00) + return FP_ILOGBNAN; + return INT_MAX; + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static half logb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return half(binary, 0xFC00); + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + uint16 bits = (exp < 0) << 15; + if (exp) + { + uint32_t m = std::abs(exp) << 6, e = 18; + for (; m < 0x400; m <<= 1, --e) + ; + bits |= (e << 10) + m; + } + return half(binary, bits); + } + if (abs > 0x7C00) + return arg; + return half(binary, 0x7C00); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nextafter(half from, half to) + { + uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if (fabs > 0x7C00) + return from; + if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) + return to; + if (!fabs) + return half(binary, (to.data_ & 0x8000) + 1); + bool lt = ((fabs == from.data_) ? static_cast(fabs) : -static_cast(fabs)) + < ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lt)) << 1) - 1); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nexttoward(half from, long double to) + { + if (isnan(from)) + return from; + long double lfrom = static_cast(from); + if (builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if (!(from.data_ & 0x7FFF)) + return half(binary, (static_cast(builtin_signbit(to)) << 15) + 1); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - 1); + } + + /// Sign implementation + /// \param x first operand + /// \param y second operand + /// \return composed value + static half copysign(half x, half y) + { + return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static int fpclassify(half arg) + { + uint32_t abs = arg.data_ & 0x7FFF; + return abs + ? ((abs > 0x3FF) ? ((abs >= 0x7C00) ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) : FP_SUBNORMAL) + : FP_ZERO; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if finite number + /// \retval false else + static bool isfinite(half arg) + { + return (arg.data_ & 0x7C00) != 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static bool isinf(half arg) + { + return (arg.data_ & 0x7FFF) == 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if not a number + /// \retval false else + static bool isnan(half arg) + { + return (arg.data_ & 0x7FFF) > 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if normal number + /// \retval false else + static bool isnormal(half arg) + { + return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); + } + + /// Sign bit implementation. + /// \param arg value to check + /// \retval true if signed + /// \retval false if unsigned + static bool signbit(half arg) + { + return (arg.data_ & 0x8000) != 0; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + static bool isequal(half x, half y) + { + return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + static bool isnotequal(half x, half y) + { + return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x > \a y + /// \retval false else + static bool isgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x >= \a y + /// \retval false else + static bool isgreaterequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) >= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x < \a y + /// \retval false else + static bool isless(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x <= \a y + /// \retval false else + static bool islessequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) <= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if either \a x > \a y nor \a x < \a y + /// \retval false else + static bool islessgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00 || yabs > 0x7C00) + return false; + int a = (xabs == x.data_) ? xabs : -xabs, b = (yabs == y.data_) ? yabs : -yabs; + return a < b || a > b; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operand unordered + /// \retval false else + static bool isunordered(half x, half y) + { + return isnan(x) || isnan(y); + } + +private: + static double erf(double arg) + { + if (builtin_isinf(arg)) + return (arg < 0.0) ? -1.0 : 1.0; + double x2 = arg * arg, ax2 = 0.147 * x2, + value = std::sqrt(1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / (1.0 + ax2))); + return builtin_signbit(arg) ? -value : value; + } + + static double lgamma(double arg) + { + double v = 1.0; + for (; arg < 8.0; ++arg) + v *= arg; + double w = 1.0 / (arg * arg); + return (((((((-0.02955065359477124183006535947712 * w + 0.00641025641025641025641025641026) * w + + -0.00191752691752691752691752691753) + * w + + 8.4175084175084175084175084175084e-4) + * w + + -5.952380952380952380952380952381e-4) + * w + + 7.9365079365079365079365079365079e-4) + * w + + -0.00277777777777777777777777777778) + * w + + 0.08333333333333333333333333333333) + / arg + + 0.91893853320467274178032973640562 - std::log(v) - arg + (arg - 0.5) * std::log(arg); + } +}; + +/// Wrapper for unary half-precision functions needing specialization for individual argument types. +/// \tparam T argument type +template +struct unary_specialized +{ + /// Negation implementation. + /// \param arg value to negate + /// \return negated value + static HALF_CONSTEXPR half negate(half arg) + { + return half(binary, arg.data_ ^ 0x8000); + } + + /// Absolute value implementation. + /// \param arg function argument + /// \return absolute value + static half fabs(half arg) + { + return half(binary, arg.data_ & 0x7FFF); + } +}; +template <> +struct unary_specialized +{ + static HALF_CONSTEXPR expr negate(float arg) + { + return expr(-arg); + } + static expr fabs(float arg) + { + return expr(std::fabs(arg)); + } +}; + +/// Wrapper for binary half-precision functions needing specialization for individual argument types. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct binary_specialized +{ + /// Minimum implementation. + /// \param x first operand + /// \param y second operand + /// \return minimum value + static expr fmin(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmin(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::min(x, y)); +#endif + } + + /// Maximum implementation. + /// \param x first operand + /// \param y second operand + /// \return maximum value + static expr fmax(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmax(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::max(x, y)); +#endif + } +}; +template <> +struct binary_specialized +{ + static half fmin(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } + static half fmax(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } +}; + +/// Helper class for half casts. +/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member +/// function and a corresponding `type` member denoting its return type. +/// \tparam T destination type +/// \tparam U source type +/// \tparam R rounding mode to use +template +struct half_caster +{ +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); +#endif + + static half cast(U arg) + { + return cast_impl(arg, is_float()); + }; + +private: + static half cast_impl(U arg, true_type) + { + return half(binary, float2half(arg)); + } + static half cast_impl(U arg, false_type) + { + return half(binary, int2half(arg)); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(half arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(half arg, true_type) + { + return half2float(arg.data_); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(expr arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(float arg, true_type) + { + return static_cast(arg); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ + static half cast(half arg) + { + return arg; + } +}; +template +struct half_caster : half_caster +{ +}; + +/// \name Comparison operators +/// \{ + +/// Comparison for equality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands equal +/// \retval false else +template +typename enable::type operator==(T x, U y) +{ + return functions::isequal(x, y); +} + +/// Comparison for inequality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands not equal +/// \retval false else +template +typename enable::type operator!=(T x, U y) +{ + return functions::isnotequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +template +typename enable::type operator<(T x, U y) +{ + return functions::isless(x, y); +} + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +template +typename enable::type operator>(T x, U y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +template +typename enable::type operator<=(T x, U y) +{ + return functions::islessequal(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +template +typename enable::type operator>=(T x, U y) +{ + return functions::isgreaterequal(x, y); +} + +/// \} +/// \name Arithmetic operators +/// \{ + +/// Add halfs. +/// \param x left operand +/// \param y right operand +/// \return sum of half expressions +template +typename enable::type operator+(T x, U y) +{ + return functions::plus(x, y); +} + +/// Subtract halfs. +/// \param x left operand +/// \param y right operand +/// \return difference of half expressions +template +typename enable::type operator-(T x, U y) +{ + return functions::minus(x, y); +} + +/// Multiply halfs. +/// \param x left operand +/// \param y right operand +/// \return product of half expressions +template +typename enable::type operator*(T x, U y) +{ + return functions::multiplies(x, y); +} + +/// Divide halfs. +/// \param x left operand +/// \param y right operand +/// \return quotient of half expressions +template +typename enable::type operator/(T x, U y) +{ + return functions::divides(x, y); +} + +/// Identity. +/// \param arg operand +/// \return uncahnged operand +template +HALF_CONSTEXPR typename enable::type operator+(T arg) +{ + return arg; +} + +/// Negation. +/// \param arg operand +/// \return negated operand +template +HALF_CONSTEXPR typename enable::type operator-(T arg) +{ + return unary_specialized::negate(arg); +} + +/// \} +/// \name Input and output +/// \{ + +/// Output operator. +/// \param out output stream to write into +/// \param arg half expression to write +/// \return reference to output stream +template +typename enable&, T>::type operator<<(std::basic_ostream& out, T arg) +{ + return functions::write(out, arg); +} + +/// Input operator. +/// \param in input stream to read from +/// \param arg half to read into +/// \return reference to input stream +template +std::basic_istream& operator>>(std::basic_istream& in, half& arg) +{ + return functions::read(in, arg); +} + +/// \} +/// \name Basic mathematical operations +/// \{ + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } +inline half abs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr abs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } +inline half fabs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr fabs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } +inline expr fmod(half x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(half x, expr y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, expr y) +{ + return functions::fmod(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type remainder(T x, U y) { return +// functions::remainder(x, y); } +inline expr remainder(half x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(half x, expr y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, expr y) +{ + return functions::remainder(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \param quo address to store some bits of quotient at +/// \return remainder of floating point division. +// template typename enable::type remquo(T x, U y, int *quo) { return +// functions::remquo(x, y, quo); } +inline expr remquo(half x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(half x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} + +/// Fused multiply add. +/// \param x first operand +/// \param y second operand +/// \param z third operand +/// \return ( \a x * \a y ) + \a z rounded as one operation. +// template typename enable::type fma(T x, U y, V z) { return +// functions::fma(x, y, z); } +inline expr fma(half x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, expr z) +{ + return functions::fma(x, y, z); +} + +/// Maximum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return maximum of operands +// template typename result::type fmax(T x, U y) { return +// binary_specialized::fmax(x, y); } +inline half fmax(half x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(half x, expr y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, expr y) +{ + return binary_specialized::fmax(x, y); +} + +/// Minimum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return minimum of operands +// template typename result::type fmin(T x, U y) { return +// binary_specialized::fmin(x, y); } +inline half fmin(half x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(half x, expr y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, expr y) +{ + return binary_specialized::fmin(x, y); +} + +/// Positive difference. +/// \param x first operand +/// \param y second operand +/// \return \a x - \a y or 0 if difference negative +// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } +inline expr fdim(half x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(half x, expr y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, expr y) +{ + return functions::fdim(x, y); +} + +/// Get NaN value. +/// \return quiet NaN +inline half nanh(const char*) +{ + return functions::nanh(); +} + +/// \} +/// \name Exponential functions +/// \{ + +/// Exponential function. +/// \param arg function argument +/// \return e raised to \a arg +// template typename enable::type exp(T arg) { return functions::exp(arg); } +inline expr exp(half arg) +{ + return functions::exp(arg); +} +inline expr exp(expr arg) +{ + return functions::exp(arg); +} + +/// Exponential minus one. +/// \param arg function argument +/// \return e raised to \a arg subtracted by 1 +// template typename enable::type expm1(T arg) { return functions::expm1(arg); } +inline expr expm1(half arg) +{ + return functions::expm1(arg); +} +inline expr expm1(expr arg) +{ + return functions::expm1(arg); +} + +/// Binary exponential. +/// \param arg function argument +/// \return 2 raised to \a arg +// template typename enable::type exp2(T arg) { return functions::exp2(arg); } +inline expr exp2(half arg) +{ + return functions::exp2(arg); +} +inline expr exp2(expr arg) +{ + return functions::exp2(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base e +// template typename enable::type log(T arg) { return functions::log(arg); } +inline expr log(half arg) +{ + return functions::log(arg); +} +inline expr log(expr arg) +{ + return functions::log(arg); +} + +/// Common logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 10 +// template typename enable::type log10(T arg) { return functions::log10(arg); } +inline expr log10(half arg) +{ + return functions::log10(arg); +} +inline expr log10(expr arg) +{ + return functions::log10(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg plus 1 to base e +// template typename enable::type log1p(T arg) { return functions::log1p(arg); } +inline expr log1p(half arg) +{ + return functions::log1p(arg); +} +inline expr log1p(expr arg) +{ + return functions::log1p(arg); +} + +/// Binary logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 2 +// template typename enable::type log2(T arg) { return functions::log2(arg); } +inline expr log2(half arg) +{ + return functions::log2(arg); +} +inline expr log2(expr arg) +{ + return functions::log2(arg); +} + +/// \} +/// \name Power functions +/// \{ + +/// Square root. +/// \param arg function argument +/// \return square root of \a arg +// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } +inline expr sqrt(half arg) +{ + return functions::sqrt(arg); +} +inline expr sqrt(expr arg) +{ + return functions::sqrt(arg); +} + +/// Cubic root. +/// \param arg function argument +/// \return cubic root of \a arg +// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } +inline expr cbrt(half arg) +{ + return functions::cbrt(arg); +} +inline expr cbrt(expr arg) +{ + return functions::cbrt(arg); +} + +/// Hypotenuse function. +/// \param x first argument +/// \param y second argument +/// \return square root of sum of squares without internal over- or underflows +// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); +//} +inline expr hypot(half x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(half x, expr y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, expr y) +{ + return functions::hypot(x, y); +} + +/// Power function. +/// \param base first argument +/// \param exp second argument +/// \return \a base raised to \a exp +// template typename enable::type pow(T base, U exp) { return functions::pow(base, +// exp); } +inline expr pow(half base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(half base, expr exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, expr exp) +{ + return functions::pow(base, exp); +} + +/// \} +/// \name Trigonometric functions +/// \{ + +/// Sine function. +/// \param arg function argument +/// \return sine value of \a arg +// template typename enable::type sin(T arg) { return functions::sin(arg); } +inline expr sin(half arg) +{ + return functions::sin(arg); +} +inline expr sin(expr arg) +{ + return functions::sin(arg); +} + +/// Cosine function. +/// \param arg function argument +/// \return cosine value of \a arg +// template typename enable::type cos(T arg) { return functions::cos(arg); } +inline expr cos(half arg) +{ + return functions::cos(arg); +} +inline expr cos(expr arg) +{ + return functions::cos(arg); +} + +/// Tangent function. +/// \param arg function argument +/// \return tangent value of \a arg +// template typename enable::type tan(T arg) { return functions::tan(arg); } +inline expr tan(half arg) +{ + return functions::tan(arg); +} +inline expr tan(expr arg) +{ + return functions::tan(arg); +} + +/// Arc sine. +/// \param arg function argument +/// \return arc sine value of \a arg +// template typename enable::type asin(T arg) { return functions::asin(arg); } +inline expr asin(half arg) +{ + return functions::asin(arg); +} +inline expr asin(expr arg) +{ + return functions::asin(arg); +} + +/// Arc cosine function. +/// \param arg function argument +/// \return arc cosine value of \a arg +// template typename enable::type acos(T arg) { return functions::acos(arg); } +inline expr acos(half arg) +{ + return functions::acos(arg); +} +inline expr acos(expr arg) +{ + return functions::acos(arg); +} + +/// Arc tangent function. +/// \param arg function argument +/// \return arc tangent value of \a arg +// template typename enable::type atan(T arg) { return functions::atan(arg); } +inline expr atan(half arg) +{ + return functions::atan(arg); +} +inline expr atan(expr arg) +{ + return functions::atan(arg); +} + +/// Arc tangent function. +/// \param x first argument +/// \param y second argument +/// \return arc tangent value +// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); +//} +inline expr atan2(half x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(half x, expr y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, expr y) +{ + return functions::atan2(x, y); +} + +/// \} +/// \name Hyperbolic functions +/// \{ + +/// Hyperbolic sine. +/// \param arg function argument +/// \return hyperbolic sine value of \a arg +// template typename enable::type sinh(T arg) { return functions::sinh(arg); } +inline expr sinh(half arg) +{ + return functions::sinh(arg); +} +inline expr sinh(expr arg) +{ + return functions::sinh(arg); +} + +/// Hyperbolic cosine. +/// \param arg function argument +/// \return hyperbolic cosine value of \a arg +// template typename enable::type cosh(T arg) { return functions::cosh(arg); } +inline expr cosh(half arg) +{ + return functions::cosh(arg); +} +inline expr cosh(expr arg) +{ + return functions::cosh(arg); +} + +/// Hyperbolic tangent. +/// \param arg function argument +/// \return hyperbolic tangent value of \a arg +// template typename enable::type tanh(T arg) { return functions::tanh(arg); } +inline expr tanh(half arg) +{ + return functions::tanh(arg); +} +inline expr tanh(expr arg) +{ + return functions::tanh(arg); +} + +/// Hyperbolic area sine. +/// \param arg function argument +/// \return area sine value of \a arg +// template typename enable::type asinh(T arg) { return functions::asinh(arg); } +inline expr asinh(half arg) +{ + return functions::asinh(arg); +} +inline expr asinh(expr arg) +{ + return functions::asinh(arg); +} + +/// Hyperbolic area cosine. +/// \param arg function argument +/// \return area cosine value of \a arg +// template typename enable::type acosh(T arg) { return functions::acosh(arg); } +inline expr acosh(half arg) +{ + return functions::acosh(arg); +} +inline expr acosh(expr arg) +{ + return functions::acosh(arg); +} + +/// Hyperbolic area tangent. +/// \param arg function argument +/// \return area tangent value of \a arg +// template typename enable::type atanh(T arg) { return functions::atanh(arg); } +inline expr atanh(half arg) +{ + return functions::atanh(arg); +} +inline expr atanh(expr arg) +{ + return functions::atanh(arg); +} + +/// \} +/// \name Error and gamma functions +/// \{ + +/// Error function. +/// \param arg function argument +/// \return error function value of \a arg +// template typename enable::type erf(T arg) { return functions::erf(arg); } +inline expr erf(half arg) +{ + return functions::erf(arg); +} +inline expr erf(expr arg) +{ + return functions::erf(arg); +} + +/// Complementary error function. +/// \param arg function argument +/// \return 1 minus error function value of \a arg +// template typename enable::type erfc(T arg) { return functions::erfc(arg); } +inline expr erfc(half arg) +{ + return functions::erfc(arg); +} +inline expr erfc(expr arg) +{ + return functions::erfc(arg); +} + +/// Natural logarithm of gamma function. +/// \param arg function argument +/// \return natural logarith of gamma function for \a arg +// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } +inline expr lgamma(half arg) +{ + return functions::lgamma(arg); +} +inline expr lgamma(expr arg) +{ + return functions::lgamma(arg); +} + +/// Gamma function. +/// \param arg function argument +/// \return gamma function value of \a arg +// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } +inline expr tgamma(half arg) +{ + return functions::tgamma(arg); +} +inline expr tgamma(expr arg) +{ + return functions::tgamma(arg); +} + +/// \} +/// \name Rounding +/// \{ + +/// Nearest integer not less than half value. +/// \param arg half to round +/// \return nearest integer not less than \a arg +// template typename enable::type ceil(T arg) { return functions::ceil(arg); } +inline half ceil(half arg) +{ + return functions::ceil(arg); +} +inline half ceil(expr arg) +{ + return functions::ceil(arg); +} + +/// Nearest integer not greater than half value. +/// \param arg half to round +/// \return nearest integer not greater than \a arg +// template typename enable::type floor(T arg) { return functions::floor(arg); } +inline half floor(half arg) +{ + return functions::floor(arg); +} +inline half floor(expr arg) +{ + return functions::floor(arg); +} + +/// Nearest integer not greater in magnitude than half value. +/// \param arg half to round +/// \return nearest integer not greater in magnitude than \a arg +// template typename enable::type trunc(T arg) { return functions::trunc(arg); } +inline half trunc(half arg) +{ + return functions::trunc(arg); +} +inline half trunc(expr arg) +{ + return functions::trunc(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type round(T arg) { return functions::round(arg); } +inline half round(half arg) +{ + return functions::round(arg); +} +inline half round(expr arg) +{ + return functions::round(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type lround(T arg) { return functions::lround(arg); } +inline long lround(half arg) +{ + return functions::lround(arg); +} +inline long lround(expr arg) +{ + return functions::lround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } +inline half nearbyint(half arg) +{ + return functions::rint(arg); +} +inline half nearbyint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type rint(T arg) { return functions::rint(arg); } +inline half rint(half arg) +{ + return functions::rint(arg); +} +inline half rint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type lrint(T arg) { return functions::lrint(arg); } +inline long lrint(half arg) +{ + return functions::lrint(arg); +} +inline long lrint(expr arg) +{ + return functions::lrint(arg); +} +#if HALF_ENABLE_CPP11_LONG_LONG +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type llround(T arg) { return functions::llround(arg); } +inline long long llround(half arg) +{ + return functions::llround(arg); +} +inline long long llround(expr arg) +{ + return functions::llround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type llrint(T arg) { return functions::llrint(arg); } +inline long long llrint(half arg) +{ + return functions::llrint(arg); +} +inline long long llrint(expr arg) +{ + return functions::llrint(arg); +} +#endif + +/// \} +/// \name Floating point manipulation +/// \{ + +/// Decompress floating point number. +/// \param arg number to decompress +/// \param exp address to store exponent at +/// \return significant in range [0.5, 1) +// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } +inline half frexp(half arg, int* exp) +{ + return functions::frexp(arg, exp); +} +inline half frexp(expr arg, int* exp) +{ + return functions::frexp(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half ldexp(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half ldexp(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract integer and fractional parts. +/// \param arg number to decompress +/// \param iptr address to store integer part at +/// \return fractional part +// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); +//} +inline half modf(half arg, half* iptr) +{ + return functions::modf(arg, iptr); +} +inline half modf(expr arg, half* iptr) +{ + return functions::modf(arg, iptr); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half scalbn(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbn(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, +// exp); +//} +inline half scalbln(half arg, long exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbln(expr arg, long exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +/// \retval FP_ILOGB0 for zero +/// \retval FP_ILOGBNAN for NaN +/// \retval MAX_INT for infinity +// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } +inline int ilogb(half arg) +{ + return functions::ilogb(arg); +} +inline int ilogb(expr arg) +{ + return functions::ilogb(arg); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +// template typename enable::type logb(T arg) { return functions::logb(arg); } +inline half logb(half arg) +{ + return functions::logb(arg); +} +inline half logb(expr arg) +{ + return functions::logb(arg); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nextafter(T from, U to) { return +// functions::nextafter(from, to); } +inline half nextafter(half from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(half from, expr to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, expr to) +{ + return functions::nextafter(from, to); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nexttoward(T from, long double to) { return +// functions::nexttoward(from, to); } +inline half nexttoward(half from, long double to) +{ + return functions::nexttoward(from, to); +} +inline half nexttoward(expr from, long double to) +{ + return functions::nexttoward(from, to); +} + +/// Take sign. +/// \param x value to change sign for +/// \param y value to take sign from +/// \return value equal to \a x in magnitude and to \a y in sign +// template typename enable::type copysign(T x, U y) { return +// functions::copysign(x, y); } +inline half copysign(half x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(half x, expr y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, expr y) +{ + return functions::copysign(x, y); +} + +/// \} +/// \name Floating point classification +/// \{ + +/// Classify floating point value. +/// \param arg number to classify +/// \retval FP_ZERO for positive and negative zero +/// \retval FP_SUBNORMAL for subnormal numbers +/// \retval FP_INFINITY for positive and negative infinity +/// \retval FP_NAN for NaNs +/// \retval FP_NORMAL for all other (normal) values +// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } +inline int fpclassify(half arg) +{ + return functions::fpclassify(arg); +} +inline int fpclassify(expr arg) +{ + return functions::fpclassify(arg); +} + +/// Check if finite number. +/// \param arg number to check +/// \retval true if neither infinity nor NaN +/// \retval false else +// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } +inline bool isfinite(half arg) +{ + return functions::isfinite(arg); +} +inline bool isfinite(expr arg) +{ + return functions::isfinite(arg); +} + +/// Check for infinity. +/// \param arg number to check +/// \retval true for positive or negative infinity +/// \retval false else +// template typename enable::type isinf(T arg) { return functions::isinf(arg); } +inline bool isinf(half arg) +{ + return functions::isinf(arg); +} +inline bool isinf(expr arg) +{ + return functions::isinf(arg); +} + +/// Check for NaN. +/// \param arg number to check +/// \retval true for NaNs +/// \retval false else +// template typename enable::type isnan(T arg) { return functions::isnan(arg); } +inline bool isnan(half arg) +{ + return functions::isnan(arg); +} +inline bool isnan(expr arg) +{ + return functions::isnan(arg); +} + +/// Check if normal number. +/// \param arg number to check +/// \retval true if normal number +/// \retval false if either subnormal, zero, infinity or NaN +// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } +inline bool isnormal(half arg) +{ + return functions::isnormal(arg); +} +inline bool isnormal(expr arg) +{ + return functions::isnormal(arg); +} + +/// Check sign. +/// \param arg number to check +/// \retval true for negative number +/// \retval false for positive number +// template typename enable::type signbit(T arg) { return functions::signbit(arg); } +inline bool signbit(half arg) +{ + return functions::signbit(arg); +} +inline bool signbit(expr arg) +{ + return functions::signbit(arg); +} + +/// \} +/// \name Comparison +/// \{ + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +// template typename enable::type isgreater(T x, U y) { return +// functions::isgreater(x, y); } +inline bool isgreater(half x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(half x, expr y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, expr y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +// template typename enable::type isgreaterequal(T x, U y) { return +// functions::isgreaterequal(x, y); } +inline bool isgreaterequal(half x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(half x, expr y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, expr y) +{ + return functions::isgreaterequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +// template typename enable::type isless(T x, U y) { return functions::isless(x, +// y); +//} +inline bool isless(half x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(half x, expr y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, expr y) +{ + return functions::isless(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +// template typename enable::type islessequal(T x, U y) { return +// functions::islessequal(x, y); } +inline bool islessequal(half x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(half x, expr y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, expr y) +{ + return functions::islessequal(x, y); +} + +/// Comarison for less or greater. +/// \param x first operand +/// \param y second operand +/// \retval true if either less or greater +/// \retval false else +// template typename enable::type islessgreater(T x, U y) { return +// functions::islessgreater(x, y); } +inline bool islessgreater(half x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(half x, expr y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, expr y) +{ + return functions::islessgreater(x, y); +} + +/// Check if unordered. +/// \param x first operand +/// \param y second operand +/// \retval true if unordered (one or two NaN operands) +/// \retval false else +// template typename enable::type isunordered(T x, U y) { return +// functions::isunordered(x, y); } +inline bool isunordered(half x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(half x, expr y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, expr y) +{ + return functions::isunordered(x, y); +} + +/// \name Casting +/// \{ + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// It uses the default rounding mode. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam R rounding mode to use. +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} +/// \} +} // namespace detail + +using detail::operator==; +using detail::operator!=; +using detail::operator<; +using detail::operator>; +using detail::operator<=; +using detail::operator>=; +using detail::operator+; +using detail::operator-; +using detail::operator*; +using detail::operator/; +using detail::operator<<; +using detail::operator>>; + +using detail::abs; +using detail::acos; +using detail::acosh; +using detail::asin; +using detail::asinh; +using detail::atan; +using detail::atan2; +using detail::atanh; +using detail::cbrt; +using detail::ceil; +using detail::cos; +using detail::cosh; +using detail::erf; +using detail::erfc; +using detail::exp; +using detail::exp2; +using detail::expm1; +using detail::fabs; +using detail::fdim; +using detail::floor; +using detail::fma; +using detail::fmax; +using detail::fmin; +using detail::fmod; +using detail::hypot; +using detail::lgamma; +using detail::log; +using detail::log10; +using detail::log1p; +using detail::log2; +using detail::lrint; +using detail::lround; +using detail::nanh; +using detail::nearbyint; +using detail::pow; +using detail::remainder; +using detail::remquo; +using detail::rint; +using detail::round; +using detail::sin; +using detail::sinh; +using detail::sqrt; +using detail::tan; +using detail::tanh; +using detail::tgamma; +using detail::trunc; +#if HALF_ENABLE_CPP11_LONG_LONG +using detail::llrint; +using detail::llround; +#endif +using detail::copysign; +using detail::fpclassify; +using detail::frexp; +using detail::ilogb; +using detail::isfinite; +using detail::isgreater; +using detail::isgreaterequal; +using detail::isinf; +using detail::isless; +using detail::islessequal; +using detail::islessgreater; +using detail::isnan; +using detail::isnormal; +using detail::isunordered; +using detail::ldexp; +using detail::logb; +using detail::modf; +using detail::nextafter; +using detail::nexttoward; +using detail::scalbln; +using detail::scalbn; +using detail::signbit; + +using detail::half_cast; +} // namespace half_float + +/// Extensions to the C++ standard library. +namespace std +{ +/// Numeric limits for half-precision floats. +/// Because of the underlying single-precision implementation of many operations, it inherits some properties from +/// `std::numeric_limits`. +template <> +class numeric_limits : public numeric_limits +{ +public: + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Rounding mode. + /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying + /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding + /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the + /// single-precision rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style + = (std::numeric_limits::round_style == half_float::half::round_style) ? half_float::half::round_style + : round_indeterminate; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0400); + } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0xFBFF); + } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7BFF); + } + + /// Difference between one and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x1400); + } + + /// Maximum rounding error. + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, (round_style == std::round_to_nearest) ? 0x3800 : 0x3C00); + } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7C00); + } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7FFF); + } + + /// Signalling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7DFF); + } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0001); + } +}; + +#if HALF_ENABLE_CPP11_HASH +/// Hash function for half-precision floats. +/// This is only defined if C++11 `std::hash` is supported and enabled. +template <> +struct hash //: unary_function +{ + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const + { + return hash()(static_cast(arg.data_) & -(arg.data_ != 0x8000)); + } +}; +#endif +} // namespace std + +#undef HALF_CONSTEXPR +#undef HALF_CONSTEXPR_CONST +#undef HALF_NOEXCEPT +#undef HALF_NOTHROW +#ifdef HALF_POP_WARNINGS +#pragma warning(pop) +#undef HALF_POP_WARNINGS +#endif + +#endif diff --git a/src/Detector/tensorrt_onnx/common/logger.cpp b/src/Detector/tensorrt_onnx/common/logger.cpp new file mode 100644 index 000000000..909ec0bbd --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/logger.cpp @@ -0,0 +1,41 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "logger.h" +#include "ErrorRecorder.h" +#include "logging.h" +using namespace nvinfer1; +SampleErrorRecorder gRecorder; +namespace sample +{ +Logger gLogger{Logger::Severity::kINFO}; +LogStreamConsumer gLogVerbose{LOG_VERBOSE(gLogger)}; +LogStreamConsumer gLogInfo{LOG_INFO(gLogger)}; +LogStreamConsumer gLogWarning{LOG_WARN(gLogger)}; +LogStreamConsumer gLogError{LOG_ERROR(gLogger)}; +LogStreamConsumer gLogFatal{LOG_FATAL(gLogger)}; + +void setReportableSeverity(Logger::Severity severity) +{ + gLogger.setReportableSeverity(severity); + gLogVerbose.setReportableSeverity(severity); + gLogInfo.setReportableSeverity(severity); + gLogWarning.setReportableSeverity(severity); + gLogError.setReportableSeverity(severity); + gLogFatal.setReportableSeverity(severity); +} +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/logger.h b/src/Detector/tensorrt_onnx/common/logger.h new file mode 100644 index 000000000..8205e4572 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/logger.h @@ -0,0 +1,37 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LOGGER_H +#define LOGGER_H + +#include "logging.h" + +class SampleErrorRecorder; +extern SampleErrorRecorder gRecorder; +namespace sample +{ +extern Logger gLogger; +extern LogStreamConsumer gLogVerbose; +extern LogStreamConsumer gLogInfo; +extern LogStreamConsumer gLogWarning; +extern LogStreamConsumer gLogError; +extern LogStreamConsumer gLogFatal; + +void setReportableSeverity(Logger::Severity severity); +} // namespace sample + +#endif // LOGGER_H diff --git a/src/Detector/tensorrt_yolo/logging.h b/src/Detector/tensorrt_onnx/common/logging.h similarity index 75% rename from src/Detector/tensorrt_yolo/logging.h rename to src/Detector/tensorrt_onnx/common/logging.h index 602b69fb5..69273a5ee 100644 --- a/src/Detector/tensorrt_yolo/logging.h +++ b/src/Detector/tensorrt_onnx/common/logging.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -17,15 +18,20 @@ #ifndef TENSORRT_LOGGING_H #define TENSORRT_LOGGING_H -#include "NvInferRuntimeCommon.h" +#include "NvInferRuntime.h" +#include "sampleOptions.h" #include #include #include #include +#include #include #include #include +namespace sample +{ + using Severity = nvinfer1::ILogger::Severity; class LogStreamConsumerBuffer : public std::stringbuf @@ -38,12 +44,18 @@ class LogStreamConsumerBuffer : public std::stringbuf { } - LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) + LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept : mOutput(other.mOutput) + , mPrefix(other.mPrefix) + , mShouldLog(other.mShouldLog) { } + LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete; + LogStreamConsumerBuffer() = delete; + LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete; + LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete; - ~LogStreamConsumerBuffer() + ~LogStreamConsumerBuffer() override { // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence // std::streambuf::pptr() gives a pointer to the current position of the output sequence @@ -55,10 +67,12 @@ class LogStreamConsumerBuffer : public std::stringbuf } } - // synchronizes the stream buffer and returns 0 on success - // synchronizing the stream buffer consists of inserting the buffer contents into the stream, - // resetting the buffer and flushing the stream - virtual int sync() + //! + //! synchronizes the stream buffer and returns 0 on success + //! synchronizing the stream buffer consists of inserting the buffer contents into the stream, + //! resetting the buffer and flushing the stream + //! + int32_t sync() override { putOutput(); return 0; @@ -71,21 +85,21 @@ class LogStreamConsumerBuffer : public std::stringbuf // prepend timestamp std::time_t timestamp = std::time(nullptr); tm* tm_local = std::localtime(×tamp); - std::cout << "["; - std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; - std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; - std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; - std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; - std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; - std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; + mOutput << "["; + mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; + mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; + mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; // std::stringbuf::str() gets the string contents of the buffer // insert the buffer contents pre-appended by the appropriate prefix into the stream mOutput << mPrefix << str(); - // set the buffer to empty - str(""); - // flush the stream - mOutput.flush(); } + // set the buffer to empty + str(""); + // flush the stream + mOutput.flush(); } void setShouldLog(bool shouldLog) @@ -96,8 +110,8 @@ class LogStreamConsumerBuffer : public std::stringbuf private: std::ostream& mOutput; std::string mPrefix; - bool mShouldLog; -}; + bool mShouldLog{}; +}; // class LogStreamConsumerBuffer //! //! \class LogStreamConsumerBase @@ -112,8 +126,9 @@ class LogStreamConsumerBase } protected: + std::mutex mLogMutex; LogStreamConsumerBuffer mBuffer; -}; +}; // class LogStreamConsumerBase //! //! \class LogStreamConsumer @@ -127,9 +142,11 @@ class LogStreamConsumerBase class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { public: + //! //! \brief Creates a LogStreamConsumer which logs messages with level severity. //! Reportable severity determines if the messages are severe enough to be logged. - LogStreamConsumer(Severity reportableSeverity, Severity severity) + //! + LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity) : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) , std::ostream(&mBuffer) // links the stream buffer with the stream , mShouldLog(severity <= reportableSeverity) @@ -137,13 +154,18 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream { } - LogStreamConsumer(LogStreamConsumer&& other) + LogStreamConsumer(LogStreamConsumer&& other) noexcept : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) , std::ostream(&mBuffer) // links the stream buffer with the stream , mShouldLog(other.mShouldLog) , mSeverity(other.mSeverity) { } + LogStreamConsumer(const LogStreamConsumer& other) = delete; + LogStreamConsumer() = delete; + ~LogStreamConsumer() override = default; + LogStreamConsumer& operator=(const LogStreamConsumer&) = delete; + LogStreamConsumer& operator=(LogStreamConsumer&&) = delete; void setReportableSeverity(Severity reportableSeverity) { @@ -151,6 +173,16 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream mBuffer.setShouldLog(mShouldLog); } + std::mutex& getMutex() + { + return mLogMutex; + } + + bool getShouldLog() const + { + return mShouldLog; + } + private: static std::ostream& severityOstream(Severity severity) { @@ -172,8 +204,49 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream bool mShouldLog; Severity mSeverity; -}; +}; // class LogStreamConsumer + +template +LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj) +{ + if (logger.getShouldLog()) + { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + os << obj; + } + return logger; +} + +//! +//! Special handling std::endl +//! +inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, std::ostream& (*f)(std::ostream&) ) +{ + if (logger.getShouldLog()) + { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + os << f; + } + return logger; +} + +inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, const nvinfer1::Dims& dims) +{ + if (logger.getShouldLog()) + { + std::lock_guard guard(logger.getMutex()); + auto& os = static_cast(logger); + for (int32_t i = 0; i < dims.nbDims; ++i) + { + os << (i ? "x" : "") << dims.d[i]; + } + } + return logger; +} +//! //! \class Logger //! //! \brief Class which manages logging of TensorRT tools and samples @@ -197,11 +270,11 @@ class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream //! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the //! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger //! object. - +//! class Logger : public nvinfer1::ILogger { public: - Logger(Severity severity = Severity::kWARNING) + explicit Logger(Severity severity = Severity::kWARNING) : mReportableSeverity(severity) { } @@ -219,13 +292,13 @@ class Logger : public nvinfer1::ILogger }; //! - //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger + //! \brief Forward-compatible method for retrieving the nvinfer1::ILogger associated with this Logger //! \return The nvinfer1::ILogger associated with this Logger //! //! TODO Once all samples are updated to use this method to register the logger with TensorRT, //! we can eliminate the inheritance of Logger from ILogger //! - nvinfer1::ILogger& getTRTLogger() + nvinfer1::ILogger& getTRTLogger() noexcept { return *this; } @@ -236,7 +309,7 @@ class Logger : public nvinfer1::ILogger //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the //! inheritance from nvinfer1::ILogger //! - void log(Severity severity, const char* msg) override + void log(Severity severity, const char* msg) noexcept override { LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; } @@ -246,7 +319,7 @@ class Logger : public nvinfer1::ILogger //! //! \param severity The logger will only emit messages that have severity of this level or higher. //! - void setReportableSeverity(Severity severity) + void setReportableSeverity(Severity severity) noexcept { mReportableSeverity = severity; } @@ -281,7 +354,7 @@ class Logger : public nvinfer1::ILogger //! //! \brief Define a test for logging //! - //! \param[in] name The name of the test. This should be a string starting with + //! \param[in] name The name of the test. This should be a string starting with //! "TensorRT" and containing dot-separated strings containing //! the characters [A-Za-z0-9_]. //! For example, "TensorRT.sample_googlenet" @@ -303,10 +376,14 @@ class Logger : public nvinfer1::ILogger //! \param[in] argv The array of command-line arguments (given as C strings) //! //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). - static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) + //! + static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv) { + // Append TensorRT version as info + const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "] [b" + + std::to_string(NV_TENSORRT_BUILD) + "]"; auto cmdline = genCmdlineString(argc, argv); - return defineTest(name, cmdline); + return defineTest(vname, cmdline); } //! @@ -332,32 +409,32 @@ class Logger : public nvinfer1::ILogger //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, //! TestResult::kFAILED, TestResult::kWAIVED //! - static void reportTestEnd(const TestAtom& testAtom, TestResult result) + static void reportTestEnd(TestAtom const& testAtom, TestResult result) { assert(result != TestResult::kRUNNING); assert(testAtom.mStarted); reportTestResult(testAtom, result); } - static int reportPass(const TestAtom& testAtom) + static int32_t reportPass(TestAtom const& testAtom) { reportTestEnd(testAtom, TestResult::kPASSED); return EXIT_SUCCESS; } - static int reportFail(const TestAtom& testAtom) + static int32_t reportFail(TestAtom const& testAtom) { reportTestEnd(testAtom, TestResult::kFAILED); return EXIT_FAILURE; } - static int reportWaive(const TestAtom& testAtom) + static int32_t reportWaive(TestAtom const& testAtom) { reportTestEnd(testAtom, TestResult::kWAIVED); return EXIT_SUCCESS; } - static int reportTest(const TestAtom& testAtom, bool pass) + static int32_t reportTest(TestAtom const& testAtom, bool pass) { return pass ? reportPass(testAtom) : reportFail(testAtom); } @@ -410,7 +487,7 @@ class Logger : public nvinfer1::ILogger //! //! \brief method that implements logging test results //! - static void reportTestResult(const TestAtom& testAtom, TestResult result) + static void reportTestResult(TestAtom const& testAtom, TestResult result) { severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " << testAtom.mCmdline << std::endl; @@ -419,24 +496,25 @@ class Logger : public nvinfer1::ILogger //! //! \brief generate a command line string from the given (argc, argv) values //! - static std::string genCmdlineString(int argc, char const* const* argv) + static std::string genCmdlineString(int32_t argc, char const* const* argv) { std::stringstream ss; - for (int i = 0; i < argc; i++) + for (int32_t i = 0; i < argc; i++) { if (i > 0) + { ss << " "; + } ss << argv[i]; } return ss.str(); } Severity mReportableSeverity; -}; +}; // class Logger namespace { - //! //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE //! @@ -487,7 +565,7 @@ inline LogStreamConsumer LOG_ERROR(const Logger& logger) //! //! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR -// ("fatal" severity) +//! ("fatal" severity) //! //! Example usage: //! @@ -497,7 +575,6 @@ inline LogStreamConsumer LOG_FATAL(const Logger& logger) { return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); } - } // anonymous namespace - +} // namespace sample #endif // TENSORRT_LOGGING_H diff --git a/src/Detector/tensorrt_onnx/common/parserOnnxConfig.h b/src/Detector/tensorrt_onnx/common/parserOnnxConfig.h new file mode 100644 index 000000000..67ee6c717 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/parserOnnxConfig.h @@ -0,0 +1,145 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PARSER_ONNX_CONFIG_H +#define PARSER_ONNX_CONFIG_H + +#include +#include +#include + +#include "NvInfer.h" +#include "NvOnnxConfig.h" +#include "NvOnnxParser.h" + +#define ONNX_DEBUG 1 + +/** + * \class ParserOnnxConfig + * \brief Configuration Manager Class Concrete Implementation + * + * \note: + * + */ + +class ParserOnnxConfig : public nvonnxparser::IOnnxConfig +{ + +protected: + std::string mModelFilename{}; + std::string mTextFilename{}; + std::string mFullTextFilename{}; + nvinfer1::DataType mModelDtype; + nvonnxparser::IOnnxConfig::Verbosity mVerbosity; + bool mPrintLayercInfo; + +public: + ParserOnnxConfig() + : mModelDtype(nvinfer1::DataType::kFLOAT) + , mVerbosity(static_cast(nvinfer1::ILogger::Severity::kWARNING)) + , mPrintLayercInfo(false) + { +#ifdef ONNX_DEBUG + if (isDebug()) + { + std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl; + } +#endif + } + + ~ParserOnnxConfig() override + { +#ifdef ONNX_DEBUG + if (isDebug()) + { + std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl; + } +#endif + } + +public: + void setModelDtype(const nvinfer1::DataType modelDtype) noexcept override + { + mModelDtype = modelDtype; + } + + nvinfer1::DataType getModelDtype() const noexcept override + { + return mModelDtype; + } + + const char* getModelFileName() const noexcept override + { + return mModelFilename.c_str(); + } + void setModelFileName(const char* onnxFilename) noexcept override + { + mModelFilename = std::string(onnxFilename); + } + nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept override + { + return mVerbosity; + } + void addVerbosity() noexcept override + { + ++mVerbosity; + } + void reduceVerbosity() noexcept override + { + --mVerbosity; + } + void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept override + { + mVerbosity = verbosity; + } + + const char* getTextFileName() const noexcept override + { + return mTextFilename.c_str(); + } + void setTextFileName(const char* textFilename) noexcept override + { + mTextFilename = std::string(textFilename); + } + const char* getFullTextFileName() const noexcept override + { + return mFullTextFilename.c_str(); + } + void setFullTextFileName(const char* fullTextFilename) noexcept override + { + mFullTextFilename = std::string(fullTextFilename); + } + bool getPrintLayerInfo() const noexcept override + { + return mPrintLayercInfo; + } + void setPrintLayerInfo(bool src) noexcept override + { + mPrintLayercInfo = src; + } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() + + virtual bool isDebug() const noexcept + { +#if ONNX_DEBUG + return (std::getenv("ONNX_DEBUG") ? true : false); +#else + return false; +#endif + } +}; // class ParserOnnxConfig + +#endif diff --git a/src/Detector/tensorrt_onnx/common/safeCommon.h b/src/Detector/tensorrt_onnx/common/safeCommon.h new file mode 100644 index 000000000..2814c44e0 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/safeCommon.h @@ -0,0 +1,384 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_SAFE_COMMON_H +#define TENSORRT_SAFE_COMMON_H + +#include "cuda_runtime.h" +#include "sampleEntrypoints.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// For safeLoadLibrary +#ifdef _MSC_VER +// Needed so that the max/min definitions in windows.h do not conflict with std::max/min. +#define NOMINMAX +#include +#undef NOMINMAX +#else +#include +#endif +#if IS_QNX_SAFE +#include +#include +#endif // IS_QNX_SAFE + +#undef CHECK +#define CHECK(status) \ + do \ + { \ + auto ret = (status); \ + if (ret != 0) \ + { \ + std::cerr << "Cuda failure: " << ret << std::endl; \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +#undef SAFE_ASSERT +#define SAFE_ASSERT(condition) \ + do \ + { \ + if (!(condition)) \ + { \ + std::cerr << "Assertion failure: " << #condition << std::endl; \ + exit(EXIT_FAILURE); \ + } \ + } while (0) + +//! Locate path to file, given its filename or filepath suffix and possible dirs it might lie in. +//! Function will also walk back MAX_DEPTH dirs from CWD to check for such a file path. +inline std::string locateFile( + const std::string& filepathSuffix, const std::vector& directories, bool reportError = true) +{ + const int MAX_DEPTH{10}; + bool found{false}; + std::string filepath; + + for (auto& dir : directories) + { + if (!dir.empty() && dir.back() != '/') + { +#ifdef _MSC_VER + filepath = dir + "\\" + filepathSuffix; +#else + filepath = dir + "/" + filepathSuffix; +#endif + } + else + { + filepath = dir + filepathSuffix; + } + + for (int i = 0; i < MAX_DEPTH && !found; i++) + { + const std::ifstream checkFile(filepath); + found = checkFile.is_open(); + if (found) + { + break; + } + + filepath = "../" + filepath; // Try again in parent dir + } + + if (found) + { + break; + } + + filepath.clear(); + } + + // Could not find the file + if (filepath.empty()) + { + const std::string dirList = std::accumulate(directories.begin() + 1, directories.end(), directories.front(), + [](const std::string& a, const std::string& b) { return a + "\n\t" + b; }); + std::cout << "Could not find " << filepathSuffix << " in data directories:\n\t" << dirList << std::endl; + + if (reportError) + { + std::cout << "&&&& FAILED" << std::endl; + exit(EXIT_FAILURE); + } + } + + return filepath; +} + +inline void readPGMFile(const std::string& fileName, uint8_t* buffer, int32_t inH, int32_t inW) +{ + std::ifstream infile(fileName, std::ifstream::binary); + SAFE_ASSERT(infile.is_open() && "Attempting to read from a file that is not open."); + std::string magic, w, h, max; + infile >> magic >> w >> h >> max; + infile.seekg(1, infile.cur); + infile.read(reinterpret_cast(buffer), inH * inW); +} + +namespace samplesCommon +{ +template +inline std::shared_ptr infer_object(T* obj) +{ + if (!obj) + { + throw std::runtime_error("Failed to create object"); + } + return std::shared_ptr(obj); +} + +inline uint32_t elementSize(nvinfer1::DataType t) +{ + switch (t) + { +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: return 8; +#endif + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: return 4; + case nvinfer1::DataType::kHALF: +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: return 2; +#endif + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kFP8: return 1; +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT4: + SAFE_ASSERT(false && "Element size is not implemented for sub-byte data-types"); +#endif + } + return 0; +} + +template +inline A divUp(A x, B n) +{ + return (x + n - 1) / n; +} + +inline int64_t volume(nvinfer1::Dims const& d) +{ + return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies{}); +} + +//! Return m rounded up to nearest multiple of n +template +inline T1 roundUp(T1 m, T2 n) +{ + static_assert(std::is_integral::value && std::is_integral::value, "arguments must be integers"); + static_assert(std::is_signed::value == std::is_signed::value, "mixed signedness not allowed"); + static_assert(sizeof(T1) >= sizeof(T2), "first type must be as least as wide as second type"); + return ((m + n - 1) / n) * n; +} + +//! comps is the number of components in a vector. Ignored if vecDim < 0. +inline int64_t volume(nvinfer1::Dims dims, int32_t vecDim, int32_t comps, int32_t batch) +{ + if (vecDim >= 0) + { + dims.d[vecDim] = roundUp(dims.d[vecDim], comps); + } + return samplesCommon::volume(dims) * std::max(batch, 1); +} + +inline int32_t getSMVersion() +{ +#if 0 + // Use default value for 4090 + int32_t major{8}; + int32_t minor{9}; +#else + int32_t major{}; + int32_t minor{}; + int32_t deviceIndex{}; + CHECK(cudaGetDevice(&deviceIndex)); + CHECK(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); + CHECK(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); +#endif + return ((major << 8) | minor); +} + +inline bool isSMSafe() +{ + const int32_t smVersion = getSMVersion(); + return smVersion == 0x0700 || smVersion == 0x0705 || smVersion == 0x0800 || smVersion == 0x0806 + || smVersion == 0x0807; +} + +inline int32_t calculateSoftmax(float* const prob, int32_t const numDigits) +{ + SAFE_ASSERT(prob != nullptr); + SAFE_ASSERT(numDigits == 10); + float sum{0.0F}; + std::transform(prob, prob + numDigits, prob, [&sum](float v) -> float { + sum += exp(v); + return exp(v); + }); + + SAFE_ASSERT(sum != 0.0F); + std::transform(prob, prob + numDigits, prob, [sum](float v) -> float { return v / sum; }); + int32_t idx = std::max_element(prob, prob + numDigits) - prob; + return idx; +} + +//! +//! \class TrtCudaGraphSafe +//! \brief Managed CUDA graph +//! +class TrtCudaGraphSafe +{ +public: + explicit TrtCudaGraphSafe() = default; + + TrtCudaGraphSafe(const TrtCudaGraphSafe&) = delete; + + TrtCudaGraphSafe& operator=(const TrtCudaGraphSafe&) = delete; + + TrtCudaGraphSafe(TrtCudaGraphSafe&&) = delete; + + TrtCudaGraphSafe& operator=(TrtCudaGraphSafe&&) = delete; + + ~TrtCudaGraphSafe() + { + if (mGraphExec) + { + cudaGraphExecDestroy(mGraphExec); + } + } + + void beginCapture(cudaStream_t& stream) + { + // cudaStreamCaptureModeGlobal is the only allowed mode in SAFE CUDA + CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal)); + } + + bool launch(cudaStream_t& stream) + { + return cudaGraphLaunch(mGraphExec, stream) == cudaSuccess; + } + + void endCapture(cudaStream_t& stream) + { + CHECK(cudaStreamEndCapture(stream, &mGraph)); + CHECK(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); + CHECK(cudaGraphDestroy(mGraph)); + } + + void endCaptureOnError(cudaStream_t& stream) + { + // There are two possibilities why stream capture would fail: + // (1) stream is in cudaErrorStreamCaptureInvalidated state. + // (2) TRT reports a failure. + // In case (1), the returning mGraph should be nullptr. + // In case (2), the returning mGraph is not nullptr, but it should not be used. + const auto ret = cudaStreamEndCapture(stream, &mGraph); + if (ret == cudaErrorStreamCaptureInvalidated) + { + SAFE_ASSERT(mGraph == nullptr); + } + else + { + SAFE_ASSERT(ret == cudaSuccess); + SAFE_ASSERT(mGraph != nullptr); + CHECK(cudaGraphDestroy(mGraph)); + mGraph = nullptr; + } + // Clean up any CUDA error. + cudaGetLastError(); + sample::gLogError << "The CUDA graph capture on the stream has failed." << std::endl; + } + +private: + cudaGraph_t mGraph{}; + cudaGraphExec_t mGraphExec{}; +}; + +inline void safeLoadLibrary(const std::string& path) +{ +#ifdef _MSC_VER + void* handle = LoadLibraryA(path.c_str()); +#else + int32_t flags{RTLD_LAZY}; + void* handle = dlopen(path.c_str(), flags); +#endif + if (handle == nullptr) + { +#ifdef _MSC_VER + sample::gLogError << "Could not load plugin library: " << path << std::endl; +#else + sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl; +#endif + } +} + +inline std::vector safeSplitString(std::string str, char delimiter = ',') +{ + std::vector splitVect; + std::stringstream ss(str); + std::string substr; + + while (ss.good()) + { + getline(ss, substr, delimiter); + splitVect.emplace_back(std::move(substr)); + } + return splitVect; +} + +} // namespace samplesCommon + +namespace safetyCompliance +{ +inline void initSafeCuda() +{ + // According to CUDA initialization in NVIDIA CUDA SAFETY API REFERENCE FOR DRIVE OS + // We will need to do the following in order + // 1. Initialize the calling thread with CUDA specific information (Call any CUDA RT API identified as init) + // 2. Query/Configure and choose the desired CUDA device + // 3. CUDA context initialization. (Call cudaDeviceGetLimit or cuCtxCreate) + size_t stackSizeLimit = 0; + int32_t deviceIndex = 0; + CHECK(cudaGetDevice(&deviceIndex)); + CHECK(cudaDeviceGetLimit(&stackSizeLimit, cudaLimitStackSize)); +#if IS_QNX_SAFE + CHECK(cudaSafeExSelectAPIMode(cudaSafeExAPIModeAsilB)); +#endif // IS_QNX_SAFE +} + +inline void setPromgrAbility() +{ +#if IS_QNX_SAFE + // Comply with DEEPLRN_RES_117 on QNX-safe by dropping PROCMGR_AID_MEM_PHYS ability and locking out any further + // changes + procmgr_ability( + 0, PROCMGR_ADN_NONROOT | PROCMGR_AOP_DENY | PROCMGR_AOP_LOCK | PROCMGR_AID_MEM_PHYS, PROCMGR_AID_EOL); +#endif // IS_QNX_SAFE +} + +} // namespace safetyCompliance + +#endif // TENSORRT_SAFE_COMMON_H diff --git a/src/Detector/tensorrt_onnx/common/sampleConfig.h b/src/Detector/tensorrt_onnx/common/sampleConfig.h new file mode 100644 index 000000000..801a268a4 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleConfig.h @@ -0,0 +1,331 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SampleConfig_H +#define SampleConfig_H + +#include +#include +#include + +#include "NvInfer.h" +#include "NvOnnxConfig.h" +class SampleConfig : public nvonnxparser::IOnnxConfig +{ +public: + enum class InputDataFormat : int + { + kASCII = 0, + kPPM = 1 + }; + +private: + std::string mModelFilename; + std::string mEngineFilename; + std::string mTextFilename; + std::string mFullTextFilename; + std::string mImageFilename; + std::string mReferenceFilename; + std::string mOutputFilename; + std::string mCalibrationFilename; + std::string mTimingCacheFilename; + int64_t mLabel{-1}; + int64_t mMaxBatchSize{32}; + int64_t mCalibBatchSize{0}; + int64_t mMaxNCalibBatch{0}; + int64_t mFirstCalibBatch{0}; + int64_t mUseDLACore{-1}; + nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT}; + bool mTF32{true}; + Verbosity mVerbosity{static_cast(nvinfer1::ILogger::Severity::kWARNING)}; + bool mPrintLayercInfo{false}; + bool mDebugBuilder{false}; + InputDataFormat mInputDataFormat{InputDataFormat::kASCII}; + uint64_t mTopK{0}; + float mFailurePercentage{-1.0F}; + float mTolerance{0.0F}; + float mAbsTolerance{1e-5F}; + +public: + SampleConfig() + { +#ifdef ONNX_DEBUG + if (isDebug()) + { + std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl; + } +#endif + } + + ~SampleConfig() override + { +#ifdef ONNX_DEBUG + if (isDebug()) + { + std::cout << "SampleConfig::dtor(): " << this << std::endl; + } +#endif + } + +public: + void setModelDtype(const nvinfer1::DataType mdt) noexcept override + { + mModelDtype = mdt; + } + + nvinfer1::DataType getModelDtype() const noexcept override + { + return mModelDtype; + } + + bool getTF32() const noexcept + { + return mTF32; + } + + void setTF32(bool enabled) noexcept + { + mTF32 = enabled; + } + + const char* getModelFileName() const noexcept override + { + return mModelFilename.c_str(); + } + + void setModelFileName(const char* onnxFilename) noexcept override + { + mModelFilename = std::string(onnxFilename); + } + Verbosity getVerbosityLevel() const noexcept override + { + return mVerbosity; + } + void addVerbosity() noexcept override + { + ++mVerbosity; + } + void reduceVerbosity() noexcept override + { + --mVerbosity; + } + void setVerbosityLevel(Verbosity v) noexcept override + { + mVerbosity = v; + } + const char* getEngineFileName() const noexcept + { + return mEngineFilename.c_str(); + } + void setEngineFileName(const char* engineFilename) noexcept + { + mEngineFilename = std::string(engineFilename); + } + const char* getTextFileName() const noexcept override + { + return mTextFilename.c_str(); + } + void setTextFileName(const char* textFilename) noexcept override + { + mTextFilename = std::string(textFilename); + } + const char* getFullTextFileName() const noexcept override + { + return mFullTextFilename.c_str(); + } + void setFullTextFileName(const char* fullTextFilename) noexcept override + { + mFullTextFilename = std::string(fullTextFilename); + } + void setLabel(int64_t label) noexcept + { + mLabel = label; + } //!< set the Label + + int64_t getLabel() const noexcept + { + return mLabel; + } //!< get the Label + + bool getPrintLayerInfo() const noexcept override + { + return mPrintLayercInfo; + } + + void setPrintLayerInfo(bool b) noexcept override + { + mPrintLayercInfo = b; + } //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo() + + void setMaxBatchSize(int64_t maxBatchSize) noexcept + { + mMaxBatchSize = maxBatchSize; + } //!< set the Max Batch Size + int64_t getMaxBatchSize() const noexcept + { + return mMaxBatchSize; + } //!< get the Max Batch Size + + void setCalibBatchSize(int64_t CalibBatchSize) noexcept + { + mCalibBatchSize = CalibBatchSize; + } //!< set the calibration batch size + int64_t getCalibBatchSize() const noexcept + { + return mCalibBatchSize; + } //!< get calibration batch size + + void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept + { + mMaxNCalibBatch = MaxNCalibBatch; + } //!< set Max Number of Calibration Batches + int64_t getMaxNCalibBatch() const noexcept + { + return mMaxNCalibBatch; + } //!< get the Max Number of Calibration Batches + + void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept + { + mFirstCalibBatch = FirstCalibBatch; + } //!< set the first calibration batch + int64_t getFirstCalibBatch() const noexcept + { + return mFirstCalibBatch; + } //!< get the first calibration batch + + void setUseDLACore(int64_t UseDLACore) noexcept + { + mUseDLACore = UseDLACore; + } //!< set the DLA core to use + int64_t getUseDLACore() const noexcept + { + return mUseDLACore; + } //!< get the DLA core to use + + void setDebugBuilder() noexcept + { + mDebugBuilder = true; + } //!< enable the Debug info, while building the engine. + bool getDebugBuilder() const noexcept + { + return mDebugBuilder; + } //!< get the boolean variable, corresponding to the debug builder + + const char* getImageFileName() const noexcept //!< set Image file name (PPM or ASCII) + { + return mImageFilename.c_str(); + } + void setImageFileName(const char* imageFilename) noexcept //!< get the Image file name + { + mImageFilename = std::string(imageFilename); + } + const char* getReferenceFileName() const noexcept + { + return mReferenceFilename.c_str(); + } + void setReferenceFileName(const char* referenceFilename) noexcept //!< set reference file name + { + mReferenceFilename = std::string(referenceFilename); + } + + void setInputDataFormat(InputDataFormat idt) noexcept + { + mInputDataFormat = idt; + } //!< specifies expected data format of the image file (PPM or ASCII) + InputDataFormat getInputDataFormat() const noexcept + { + return mInputDataFormat; + } //!< returns the expected data format of the image file. + + const char* getOutputFileName() const noexcept //!< specifies the file to save the results + { + return mOutputFilename.c_str(); + } + void setOutputFileName(const char* outputFilename) noexcept //!< get the output file name + { + mOutputFilename = std::string(outputFilename); + } + + const char* getCalibrationFileName() const noexcept + { + return mCalibrationFilename.c_str(); + } //!< specifies the file containing the list of image files for int8 calibration + void setCalibrationFileName(const char* calibrationFilename) noexcept //!< get the int 8 calibration list file name + { + mCalibrationFilename = std::string(calibrationFilename); + } + + uint64_t getTopK() const noexcept + { + return mTopK; + } + void setTopK(uint64_t topK) noexcept + { + mTopK = topK; + } //!< If this options is specified, return the K top probabilities. + + float getFailurePercentage() const noexcept + { + return mFailurePercentage; + } + + void setFailurePercentage(float f) noexcept + { + mFailurePercentage = f; + } + + float getAbsoluteTolerance() const noexcept + { + return mAbsTolerance; + } + + void setAbsoluteTolerance(float a) noexcept + { + mAbsTolerance = a; + } + + float getTolerance() const noexcept + { + return mTolerance; + } + + void setTolerance(float t) noexcept + { + mTolerance = t; + } + + const char* getTimingCacheFilename() const noexcept + { + return mTimingCacheFilename.c_str(); + } + + void setTimingCacheFileName(const char* timingCacheFilename) noexcept + { + mTimingCacheFilename = std::string(timingCacheFilename); + } + + bool isDebug() const noexcept + { +#if ONNX_DEBUG + return (std::getenv("ONNX_DEBUG") ? true : false); +#else + return false; +#endif + } +}; // class SampleConfig + +#endif diff --git a/src/Detector/tensorrt_onnx/common/sampleDevice.cpp b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp new file mode 100644 index 000000000..7964aeb5d --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleDevice.cpp @@ -0,0 +1,133 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sampleDevice.h" + +#include + +namespace sample +{ + +void cudaCheck(cudaError_t ret, std::ostream& err) +{ + if (ret != cudaSuccess) + { + err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl; + exit(EXIT_FAILURE); + } +} + +// Construct GPU UUID string in the same format as nvidia-smi does. +std::string getUuidString(cudaUUID_t uuid) +{ + constexpr int32_t kUUID_SIZE = sizeof(cudaUUID_t); + static_assert(kUUID_SIZE == 16, "Unexpected size for cudaUUID_t!"); + + std::ostringstream ss; + std::vector const splits = {0, 4, 6, 8, 10, kUUID_SIZE}; + + ss << "GPU" << std::hex << std::setfill('0'); + for (int32_t splitIdx = 0; splitIdx < static_cast(splits.size()) - 1; ++splitIdx) + { + ss << "-"; + for (int32_t byteIdx = splits[splitIdx]; byteIdx < splits[splitIdx + 1]; ++byteIdx) + { + ss << std::setw(2) << +static_cast(uuid.bytes[byteIdx]); + } + } + return ss.str(); +} + +void setCudaDevice(int32_t device, std::ostream& os) +{ +#if !TRT_WINML + os << "=== Device Information ===" << std::endl; + + // Get the number of visible GPUs. + int32_t nbDevices{-1}; + cudaCheck(cudaGetDeviceCount(&nbDevices)); + + if (nbDevices <= 0) + { + os << "Cannot find any available devices (GPUs)!" << std::endl; + exit(EXIT_FAILURE); + } + + // Print out the GPU name and PCIe bus ID of each GPU. + os << "Available Devices: " << std::endl; + cudaDeviceProp properties; + for (int32_t deviceIdx = 0; deviceIdx < nbDevices; ++deviceIdx) + { + cudaDeviceProp tempProperties; + cudaCheck(cudaGetDeviceProperties(&tempProperties, deviceIdx)); + + // clang-format off + os << " Device " << deviceIdx << ": \"" << tempProperties.name << "\" UUID: " + << getUuidString(tempProperties.uuid) << std::endl; + // clang-format on + + // Record the properties of the desired GPU. + if (deviceIdx == device) + { + properties = tempProperties; + } + } + + // Exit with error if the requested device ID does not exist. + if (device < 0 || device >= nbDevices) + { + os << "Cannot find device ID " << device << "!" << std::endl; + exit(EXIT_FAILURE); + } + + // Set to the corresponding GPU. + cudaCheck(cudaSetDevice(device)); + + // clang-format off + os << "Selected Device: " << properties.name << std::endl; + os << "Selected Device ID: " << device << std::endl; + os << "Selected Device UUID: " << getUuidString(properties.uuid) << std::endl; + os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl; + os << "SMs: " << properties.multiProcessorCount << std::endl; + os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl; + os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl; + os << "Memory Bus Width: " << properties.memoryBusWidth << " bits" + << " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl; + os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl; + os << std::endl; + os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " + << "currently running at." << std::endl; + // clang-format on +#endif +} + +int32_t getCudaDriverVersion() +{ + int32_t version{-1}; + cudaCheck(cudaDriverGetVersion(&version)); + return version; +} + +int32_t getCudaRuntimeVersion() +{ + int32_t version{-1}; + cudaCheck(cudaRuntimeGetVersion(&version)); + return version; +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleDevice.h b/src/Detector/tensorrt_onnx/common/sampleDevice.h new file mode 100644 index 000000000..d28f02ed5 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleDevice.h @@ -0,0 +1,554 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_DEVICE_H +#define TRT_SAMPLE_DEVICE_H + +#include +#include +#include +#include +#include + +#include "sampleUtils.h" + +namespace sample +{ + +//! Check if the CUDA return status shows any error. If so, exit the program immediately. +void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr); + +class TrtCudaEvent; + +namespace +{ + +void cudaSleep(void* sleep) +{ + std::this_thread::sleep_for(std::chrono::duration(*static_cast(sleep))); +} + +} // namespace + +//! +//! \class TrtCudaStream +//! \brief Managed CUDA stream +//! +class TrtCudaStream +{ +public: + TrtCudaStream() + { + cudaCheck(cudaStreamCreate(&mStream)); + } + + TrtCudaStream(const TrtCudaStream&) = delete; + + TrtCudaStream& operator=(const TrtCudaStream&) = delete; + + TrtCudaStream(TrtCudaStream&&) = delete; + + TrtCudaStream& operator=(TrtCudaStream&&) = delete; + + ~TrtCudaStream() + { + cudaCheck(cudaStreamDestroy(mStream)); + } + + cudaStream_t get() const + { + return mStream; + } + + void synchronize() + { + cudaCheck(cudaStreamSynchronize(mStream)); + } + + void wait(TrtCudaEvent& event); + + void sleep(float* ms) + { + cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms)); + } + +private: + cudaStream_t mStream{}; +}; + +//! +//! \class TrtCudaEvent +//! \brief Managed CUDA event +//! +class TrtCudaEvent +{ +public: + explicit TrtCudaEvent(bool blocking = true) + { + const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault; + cudaCheck(cudaEventCreateWithFlags(&mEvent, flags)); + } + + TrtCudaEvent(const TrtCudaEvent&) = delete; + + TrtCudaEvent& operator=(const TrtCudaEvent&) = delete; + + TrtCudaEvent(TrtCudaEvent&&) = delete; + + TrtCudaEvent& operator=(TrtCudaEvent&&) = delete; + + ~TrtCudaEvent() + { + cudaCheck(cudaEventDestroy(mEvent)); + } + + cudaEvent_t get() const + { + return mEvent; + } + + void record(const TrtCudaStream& stream) + { + cudaCheck(cudaEventRecord(mEvent, stream.get())); + } + + void synchronize() + { + cudaCheck(cudaEventSynchronize(mEvent)); + } + + // Returns time elapsed time in milliseconds + float operator-(const TrtCudaEvent& e) const + { + float time{0}; + cudaCheck(cudaEventElapsedTime(&time, e.get(), get())); + return time; + } + +private: + cudaEvent_t mEvent{}; +}; + +inline void TrtCudaStream::wait(TrtCudaEvent& event) +{ + cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0)); +} + +//! +//! \class TrtCudaGraph +//! \brief Managed CUDA graph +//! +class TrtCudaGraph +{ +public: + explicit TrtCudaGraph() = default; + + TrtCudaGraph(const TrtCudaGraph&) = delete; + + TrtCudaGraph& operator=(const TrtCudaGraph&) = delete; + + TrtCudaGraph(TrtCudaGraph&&) = delete; + + TrtCudaGraph& operator=(TrtCudaGraph&&) = delete; + + ~TrtCudaGraph() + { + if (mGraphExec) + { + cudaGraphExecDestroy(mGraphExec); + } + } + + void beginCapture(TrtCudaStream& stream) + { + cudaCheck(cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal)); + } + + bool launch(TrtCudaStream& stream) + { + return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess; + } + + void endCapture(TrtCudaStream& stream) + { + cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph)); + cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0)); + cudaCheck(cudaGraphDestroy(mGraph)); + } + + void endCaptureOnError(TrtCudaStream& stream) + { + // There are two possibilities why stream capture would fail: + // (1) stream is in cudaErrorStreamCaptureInvalidated state. + // (2) TRT reports a failure. + // In case (1), the returning mGraph should be nullptr. + // In case (2), the returning mGraph is not nullptr, but it should not be used. + const auto ret = cudaStreamEndCapture(stream.get(), &mGraph); + if (ret == cudaErrorStreamCaptureInvalidated) + { + assert(mGraph == nullptr); + } + else + { + assert(ret == cudaSuccess); + assert(mGraph != nullptr); + cudaCheck(cudaGraphDestroy(mGraph)); + mGraph = nullptr; + } + // Clean up any CUDA error. + cudaGetLastError(); + sample::gLogWarning << "The CUDA graph capture on the stream has failed." << std::endl; + } + +private: + cudaGraph_t mGraph{}; + cudaGraphExec_t mGraphExec{}; +}; + +//! +//! \class TrtCudaBuffer +//! \brief Managed buffer for host and device +//! +template +class TrtCudaBuffer +{ +public: + TrtCudaBuffer() = default; + + TrtCudaBuffer(const TrtCudaBuffer&) = delete; + + TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete; + + TrtCudaBuffer(TrtCudaBuffer&& rhs) + { + reset(rhs.mPtr, rhs.mSize); + rhs.mPtr = nullptr; + rhs.mSize = 0; + } + + TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs) + { + if (this != &rhs) + { + reset(rhs.mPtr, rhs.mSize); + rhs.mPtr = nullptr; + rhs.mSize = 0; + } + return *this; + } + + ~TrtCudaBuffer() + { + reset(); + } + + TrtCudaBuffer(size_t size) + { + A()(&mPtr, size); + mSize = size; + } + + void allocate(size_t size) + { + reset(); + A()(&mPtr, size); + mSize = size; + } + + void reset(void* ptr = nullptr, size_t size = 0) + { + if (mPtr) + { + D()(mPtr); + } + mPtr = ptr; + mSize = size; + } + + void* get() const + { + return mPtr; + } + + size_t getSize() const + { + return mSize; + } + +private: + void* mPtr{nullptr}; + size_t mSize{0}; +}; + +struct DeviceAllocator +{ + void operator()(void** ptr, size_t size) + { + cudaCheck(cudaMalloc(ptr, size)); + } +}; + +struct DeviceDeallocator +{ + void operator()(void* ptr) + { + cudaCheck(cudaFree(ptr)); + } +}; + +struct ManagedAllocator +{ + void operator()(void** ptr, size_t size) + { + cudaCheck(cudaMallocManaged(ptr, size)); + } +}; + +struct HostAllocator +{ + void operator()(void** ptr, size_t size) + { + cudaCheck(cudaMallocHost(ptr, size)); + } +}; + +struct HostDeallocator +{ + void operator()(void* ptr) + { + cudaCheck(cudaFreeHost(ptr)); + } +}; + +using TrtDeviceBuffer = TrtCudaBuffer; +using TrtManagedBuffer = TrtCudaBuffer; + +using TrtHostBuffer = TrtCudaBuffer; + +//! +//! \class MirroredBuffer +//! \brief Coupled host and device buffers +//! +class IMirroredBuffer +{ +public: + //! + //! Allocate memory for the mirrored buffer give the size + //! of the allocation. + //! + virtual void allocate(size_t size) = 0; + + //! + //! Get the pointer to the device side buffer. + //! + //! \return pointer to device memory or nullptr if uninitialized. + //! + virtual void* getDeviceBuffer() const = 0; + + //! + //! Get the pointer to the host side buffer. + //! + //! \return pointer to host memory or nullptr if uninitialized. + //! + virtual void* getHostBuffer() const = 0; + + //! + //! Copy the memory from host to device. + //! + virtual void hostToDevice(TrtCudaStream& stream) = 0; + + //! + //! Copy the memory from device to host. + //! + virtual void deviceToHost(TrtCudaStream& stream) = 0; + + //! + //! Interface to get the size of the memory + //! + //! \return the size of memory allocated. + //! + virtual size_t getSize() const = 0; + + //! + //! Virtual destructor declaraion + //! + virtual ~IMirroredBuffer() = default; + +}; // class IMirroredBuffer + +//! +//! Class to have a separate memory buffer for discrete device and host allocations. +//! +class DiscreteMirroredBuffer : public IMirroredBuffer +{ +public: + void allocate(size_t size) override + { + mSize = size; + mHostBuffer.allocate(size); + mDeviceBuffer.allocate(size); + } + + void* getDeviceBuffer() const override + { + return mDeviceBuffer.get(); + } + + void* getHostBuffer() const override + { + return mHostBuffer.get(); + } + + void hostToDevice(TrtCudaStream& stream) override + { + cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get())); + } + + void deviceToHost(TrtCudaStream& stream) override + { + cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get())); + } + + size_t getSize() const override + { + return mSize; + } + +private: + size_t mSize{0}; + TrtHostBuffer mHostBuffer; + TrtDeviceBuffer mDeviceBuffer; +}; // class DiscreteMirroredBuffer + +//! +//! Class to have a unified memory buffer for embedded devices. +//! +class UnifiedMirroredBuffer : public IMirroredBuffer +{ +public: + void allocate(size_t size) override + { + mSize = size; + mBuffer.allocate(size); + } + + void* getDeviceBuffer() const override + { + return mBuffer.get(); + } + + void* getHostBuffer() const override + { + return mBuffer.get(); + } + + void hostToDevice(TrtCudaStream& stream) override + { + // Does nothing since we are using unified memory. + } + + void deviceToHost(TrtCudaStream& stream) override + { + // Does nothing since we are using unified memory. + } + + size_t getSize() const override + { + return mSize; + } + +private: + size_t mSize{0}; + TrtManagedBuffer mBuffer; +}; // class UnifiedMirroredBuffer + +//! +//! Class to allocate memory for outputs with data-dependent shapes. The sizes of those are unknown so pre-allocation is +//! not possible. +//! +class OutputAllocator : public nvinfer1::IOutputAllocator +{ +public: + OutputAllocator(IMirroredBuffer* buffer) + : mBuffer(buffer) + { + } + + void* reallocateOutput( + char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override + { + // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr + // even for empty tensors, so allocate a dummy byte. + size = std::max(size, static_cast(1)); + if (size > mSize) + { + mBuffer->allocate(roundUp(size, alignment)); + mSize = size; + } + return mBuffer->getDeviceBuffer(); + } + + //! IMirroredBuffer does not implement Async allocation, hence this is just a wrap around +#if (NV_TENSORRT_MAJOR > 8) + void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, + cudaStream_t /*stream*/) noexcept override + { + return reallocateOutput(tensorName, currentMemory, size, alignment); + } +#else + void* reallocateOutputAsync(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, + cudaStream_t /*stream*/) noexcept + { + return reallocateOutput(tensorName, currentMemory, size, alignment); + } +#endif + + void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override + { + mFinalDims = dims; + } + + IMirroredBuffer* getBuffer() + { + return mBuffer.get(); + } + + nvinfer1::Dims getFinalDims() + { + return mFinalDims; + } + + ~OutputAllocator() override {} + +private: + std::unique_ptr mBuffer; + uint64_t mSize{}; + nvinfer1::Dims mFinalDims; +}; + +//! Set the GPU to run the inference on. +void setCudaDevice(int32_t device, std::ostream& os); + +//! Get the CUDA version of the current CUDA driver. +int32_t getCudaDriverVersion(); + +//! Get the CUDA version of the current CUDA runtime. +int32_t getCudaRuntimeVersion(); + +} // namespace sample + +#endif // TRT_SAMPLE_DEVICE_H diff --git a/src/Detector/tensorrt_onnx/common/sampleEngines.cpp b/src/Detector/tensorrt_onnx/common/sampleEngines.cpp new file mode 100644 index 000000000..dcdfdf2d1 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleEngines.cpp @@ -0,0 +1,1739 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvInfer.h" +#include "NvOnnxParser.h" + +#include "ErrorRecorder.h" +#include "common.h" +#include "half.h" +#include "logger.h" +#include "sampleDevice.h" +#include "sampleEngines.h" +#include "sampleOptions.h" +#include "sampleUtils.h" + +using namespace nvinfer1; + +namespace sample +{ + +namespace +{ + +std::map readScalesFromCalibrationCache(std::string const& calibrationFile) +{ + std::map tensorScales; + std::ifstream cache{calibrationFile}; + if (!cache.is_open()) + { + sample::gLogError << "[TRT] Can not open provided calibration cache file" << std::endl; + return tensorScales; + } + std::string line; + while (std::getline(cache, line)) + { + auto colonPos = line.find_last_of(':'); + if (colonPos != std::string::npos) + { + // Scales should be stored in calibration cache as 32-bit floating numbers encoded as 32-bit integers + int32_t scalesAsInt = std::stoi(line.substr(colonPos + 2, 8), nullptr, 16); + auto const tensorName = line.substr(0, colonPos); + tensorScales[tensorName] = *reinterpret_cast(&scalesAsInt); + } + } + cache.close(); + return tensorScales; +} +} // namespace + +nvinfer1::ICudaEngine* LazilyDeserializedEngine::get() +{ + SMP_RETVAL_IF_FALSE( + !mIsSafe, "Safe mode is enabled, but trying to get standard engine!", nullptr, sample::gLogError); + + if (mEngine == nullptr) + { +#if (NV_TENSORRT_MAJOR > 8) + SMP_RETVAL_IF_FALSE(getFileReader().isOpen() || !getBlob().empty(), "Engine is empty. Nothing to deserialize!", + nullptr, sample::gLogError); +#endif + using time_point = std::chrono::time_point; + using duration = std::chrono::duration; + time_point const deserializeStartTime{std::chrono::high_resolution_clock::now()}; + + if (mLeanDLLPath.empty()) + { + mRuntime.reset(createRuntime()); + } + else + { + mParentRuntime.reset(createRuntime()); + ASSERT(mParentRuntime.get() != nullptr); + + mRuntime.reset(mParentRuntime->loadRuntime(mLeanDLLPath.c_str())); + } + ASSERT(mRuntime.get() != nullptr); + + if (mVersionCompatible) + { + // Application needs to opt into allowing deserialization of engines with embedded lean runtime. + mRuntime->setEngineHostCodeAllowed(true); + } + + if (!mTempdir.empty()) + { + mRuntime->setTemporaryDirectory(mTempdir.c_str()); + } + + mRuntime->setTempfileControlFlags(mTempfileControls); + + SMP_RETVAL_IF_FALSE(mRuntime != nullptr, "runtime creation failed", nullptr, sample::gLogError); + if (mDLACore != -1) + { + mRuntime->setDLACore(mDLACore); + } + mRuntime->setErrorRecorder(&gRecorder); +#if !TRT_WINML + for (auto const& pluginPath : mDynamicPlugins) + { + mRuntime->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } +#endif + +#if (NV_TENSORRT_MAJOR > 8) + if (getFileReader().isOpen()) + { + mEngine.reset(mRuntime->deserializeCudaEngine(getFileReader())); + } + else + { + auto const& engineBlob = getBlob(); + mEngine.reset(mRuntime->deserializeCudaEngine(engineBlob.data, engineBlob.size)); + } +#else + auto const& engineBlob = getBlob(); + mEngine.reset(mRuntime->deserializeCudaEngine(engineBlob.data, engineBlob.size)); + std::cerr << "getFileReader is not implemented! Use TensorRT 10.x and higher" << std::endl; +#endif + SMP_RETVAL_IF_FALSE(mEngine != nullptr, "Engine deserialization failed", nullptr, sample::gLogError); + + time_point const deserializeEndTime{std::chrono::high_resolution_clock::now()}; + sample::gLogInfo << "Engine deserialized in " << duration(deserializeEndTime - deserializeStartTime).count() + << " sec." << std::endl; + } + + return mEngine.get(); +} + +nvinfer1::ICudaEngine* LazilyDeserializedEngine::release() +{ + return mEngine.release(); +} + +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, + std::vector const& outputFormats, std::string const& calibrationFile) +{ + auto const tensorScales = readScalesFromCalibrationCache(calibrationFile); + bool const broadcastInputFormats = broadcastIOFormats(inputFormats, network.getNbInputs()); + for (int32_t i = 0, n = network.getNbInputs(); i < n; ++i) + { + int32_t formatIdx = broadcastInputFormats ? 0 : i; + if (!inputFormats.empty() && inputFormats[formatIdx].first == DataType::kINT8) + { + auto* input = network.getInput(i); + auto const calibScale = tensorScales.at(input->getName()); + input->setDynamicRange(-127 * calibScale, 127 * calibScale); + } + } + bool const broadcastOutputFormats = broadcastIOFormats(outputFormats, network.getNbOutputs()); + for (int32_t i = 0, n = network.getNbOutputs(); i < n; ++i) + { + int32_t formatIdx = broadcastOutputFormats ? 0 : i; + if (!outputFormats.empty() && outputFormats[formatIdx].first == DataType::kINT8) + { + auto* output = network.getOutput(i); + auto const calibScale = tensorScales.at(output->getName()); + output->setDynamicRange(-127 * calibScale, 127 * calibScale); + } + } +} + +//! +//! \brief Generate a network definition for a given model +//! +//! \param[in] model Model options for this network +//! \param[in,out] network Network storing the parsed results +//! \param[in,out] err Error stream +//! \param[out] vcPluginLibrariesUsed If not nullptr, will be populated with paths to VC plugin libraries required by +//! the parsed network. +//! +//! \return Parser The parser used to initialize the network and that holds the weights for the network, or an invalid +//! parser (the returned parser converts to false if tested) +//! +//! Constant input dimensions in the model must not be changed in the corresponding +//! network definition, because its correctness may rely on the constants. +//! +//! \see Parser::operator bool() +//! +Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvinfer1::INetworkDefinition& network, + std::ostream& err, std::vector* vcPluginLibrariesUsed) +{ + sample::gLogInfo << "Start parsing network model." << std::endl; + auto const tBegin = std::chrono::high_resolution_clock::now(); + + Parser parser; + switch (model.baseModel.format) + { + case ModelFormat::kONNX: + { + using namespace nvonnxparser; + parser.onnxParser.reset(createONNXParser(network)); + ASSERT(parser.onnxParser != nullptr); +#if !TRT_WINML + // kNATIVE_INSTANCENORM is ON by default in the parser and must be cleared to use the plugin implementation. + if (build.pluginInstanceNorm) + { + parser.onnxParser->clearFlag(OnnxParserFlag::kNATIVE_INSTANCENORM); + } +#endif + if (!parser.onnxParser->parseFromFile( + model.baseModel.model.c_str(), static_cast(sample::gLogger.getReportableSeverity()))) + { + err << "Failed to parse onnx file" << std::endl; + parser.onnxParser.reset(); + } +#if !TRT_WINML + if (vcPluginLibrariesUsed && parser.onnxParser.get()) + { + int64_t nbPluginLibs; + char const* const* pluginLibArray = parser.onnxParser->getUsedVCPluginLibraries(nbPluginLibs); + if (nbPluginLibs >= 0) + { + vcPluginLibrariesUsed->reserve(nbPluginLibs); + for (int64_t i = 0; i < nbPluginLibs; ++i) + { + sample::gLogInfo << "Using VC plugin library " << pluginLibArray[i] << std::endl; + vcPluginLibrariesUsed->emplace_back(std::string{pluginLibArray[i]}); + } + } + else + { + sample::gLogWarning << "Failure to query VC plugin libraries required by parsed ONNX network" + << std::endl; + } + } +#endif + break; + } + case ModelFormat::kANY: break; + } + + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const parseTime = std::chrono::duration(tEnd - tBegin).count(); + + sample::gLogInfo << "Finished parsing network model. Parse time: " << parseTime << std::endl; + return parser; +} + +namespace +{ + +class RndInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator2 +{ +public: + RndInt8Calibrator(int32_t batches, std::vector& elemCount, std::string const& cacheFile, + nvinfer1::INetworkDefinition const& network, std::ostream& err); + + ~RndInt8Calibrator() override + { + for (auto& elem : mInputDeviceBuffers) + { + cudaCheck(cudaFree(elem.second), mErr); + } + } + + bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept override; + + int32_t getBatchSize() const noexcept override + { + return 1; + } + + const void* readCalibrationCache(size_t& length) noexcept override; + + void writeCalibrationCache(void const*, size_t) noexcept override {} + +private: + int32_t mBatches{}; + int32_t mCurrentBatch{}; + std::string mCacheFile; + std::map mInputDeviceBuffers; + std::vector mCalibrationCache; + std::ostream& mErr; +}; + +RndInt8Calibrator::RndInt8Calibrator(int32_t batches, std::vector& elemCount, std::string const& cacheFile, + INetworkDefinition const& network, std::ostream& err) + : mBatches(batches) + , mCurrentBatch(0) + , mCacheFile(cacheFile) + , mErr(err) +{ + std::ifstream tryCache(cacheFile, std::ios::binary); + if (tryCache.good()) + { + return; + } + + std::default_random_engine generator; + std::uniform_real_distribution distribution(-1.0F, 1.0F); + auto gen = [&generator, &distribution]() { return distribution(generator); }; + + for (int32_t i = 0; i < network.getNbInputs(); i++) + { + auto* input = network.getInput(i); + std::vector rnd_data(elemCount[i]); + std::generate_n(rnd_data.begin(), elemCount[i], gen); + + void* data; + cudaCheck(cudaMalloc(&data, elemCount[i] * sizeof(float)), mErr); + cudaCheck(cudaMemcpy(data, rnd_data.data(), elemCount[i] * sizeof(float), cudaMemcpyHostToDevice), mErr); + + mInputDeviceBuffers.insert(std::make_pair(input->getName(), data)); + } +} + +bool RndInt8Calibrator::getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept +{ + if (mCurrentBatch >= mBatches) + { + return false; + } + + for (int32_t i = 0; i < nbBindings; ++i) + { + bindings[i] = mInputDeviceBuffers[names[i]]; + } + + ++mCurrentBatch; + + return true; +} + +const void* RndInt8Calibrator::readCalibrationCache(size_t& length) noexcept +{ + mCalibrationCache.clear(); + std::ifstream input(mCacheFile, std::ios::binary); + input >> std::noskipws; + if (input.good()) + { + std::copy( + std::istream_iterator(input), std::istream_iterator(), std::back_inserter(mCalibrationCache)); + } + + length = mCalibrationCache.size(); + return !mCalibrationCache.empty() ? mCalibrationCache.data() : nullptr; +} + +bool setTensorDynamicRange(INetworkDefinition const& network, float inRange = 2.0F, float outRange = 4.0F) +{ + // Ensure that all layer inputs have a dynamic range. + for (int32_t l = 0; l < network.getNbLayers(); l++) + { + auto* layer = network.getLayer(l); + for (int32_t i = 0; i < layer->getNbInputs(); i++) + { + ITensor* input{layer->getInput(i)}; + // Optional inputs are nullptr here and are from RNN layers. + if (input && !input->dynamicRangeIsSet()) + { + // Concat should propagate dynamic range from outputs to inputs to avoid + // Re-quantization during the concatenation + auto dynRange = (layer->getType() == LayerType::kCONCATENATION) ? outRange : inRange; + if (!input->setDynamicRange(-dynRange, dynRange)) + { + return false; + } + } + } + for (int32_t o = 0; o < layer->getNbOutputs(); o++) + { + ITensor* output{layer->getOutput(o)}; + // Optional outputs are nullptr here and are from RNN layers. + if (output && !output->dynamicRangeIsSet()) + { + // Pooling must have the same input and output dynamic range. + if (layer->getType() == LayerType::kPOOLING) + { + if (!output->setDynamicRange(-inRange, inRange)) + { + return false; + } + } + else + { + if (!output->setDynamicRange(-outRange, outRange)) + { + return false; + } + } + } + } + } + return true; +} + +bool isNonActivationType(nvinfer1::DataType const type) +{ + return type == nvinfer1::DataType::kINT32 +#if (NV_TENSORRT_MAJOR > 8) + || type == nvinfer1::DataType::kINT64 +#endif + || type == nvinfer1::DataType::kBOOL + || type == nvinfer1::DataType::kUINT8; +} + +void setLayerPrecisions(INetworkDefinition& network, LayerPrecisions const& layerPrecisions) +{ + bool hasLayerPrecisionSkipped{false}; + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) + { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + auto exactMatch = layerPrecisions.find(layerName); + auto plausibleMatch = findPlausible(layerPrecisions, layerName); + if (exactMatch != layerPrecisions.end()) + { + sample::gLogInfo << "Set layer " << layerName << " to precision " << exactMatch->second << std::endl; + layer->setPrecision(exactMatch->second); + } + else if (plausibleMatch != layerPrecisions.end()) + { + if (isNonActivationType(layer->getPrecision())) + { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because the " + << " default layer precision is of non-activation type." << std::endl; + continue; + } + if (layer->getType() == nvinfer1::LayerType::kCONSTANT + && (isNonActivationType(static_cast(layer)->getWeights().type))) + { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " + << "constant layer has weights of non-activation type." << std::endl; + continue; + } + if (layer->getNbInputs() >= 1 && layer->getInput(0)->isShapeTensor()) + { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this layer " + << "operates on a shape tensor." << std::endl; + continue; + } + if (layer->getNbInputs() >= 1 && isNonActivationType(layer->getInput(0)->getType()) + && layer->getNbOutputs() >= 1 && isNonActivationType(layer->getOutput(0)->getType())) + { + hasLayerPrecisionSkipped = true; + sample::gLogVerbose << "Skipped setting precision for layer " << layerName << " because this " + << "layer has input and output of non-activation type." << std::endl; + continue; + } + // All heuristics passed. Set the layer precision. + sample::gLogInfo << "Set layer " << layerName << " to precision " << plausibleMatch->second << std::endl; + layer->setPrecision(plausibleMatch->second); + } + } + + if (hasLayerPrecisionSkipped) + { + sample::gLogInfo << "Skipped setting precisions for some layers. Check verbose logs for more details." + << std::endl; + } +} + +void setLayerOutputTypes(INetworkDefinition& network, LayerOutputTypes const& layerOutputTypes) +{ + bool const hasGlobalOutputType{layerOutputTypes.find("*") != layerOutputTypes.end()}; + auto const globalOutputType = hasGlobalOutputType ? layerOutputTypes.at("*").at(0) : nvinfer1::DataType::kFLOAT; + bool hasLayerOutputTypeSkipped{false}; + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) + { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + auto const nbOutputs = layer->getNbOutputs(); + auto exactMatch = layerOutputTypes.find(layerName); + auto plausibleMatch = findPlausible(layerOutputTypes, layerName); + if (exactMatch != layerOutputTypes.end()) + { + auto const& outputTypes = exactMatch->second; + bool const isBroadcast = (outputTypes.size() == 1); + if (!isBroadcast && static_cast(outputTypes.size()) != nbOutputs) + { + sample::gLogError << "Layer " << layerName << " has " << nbOutputs << " outputs but " + << outputTypes.size() << " output types are given in --layerOutputTypes flag." + << std::endl; + throw std::invalid_argument("Invalid --layerOutputTypes flag."); + } + for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) + { + auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx); + sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType + << std::endl; + layer->setOutputType(outputIdx, outputType); + } + } + else if (plausibleMatch != layerOutputTypes.end()) + { + auto const& outputTypes = plausibleMatch->second; + bool const isBroadcast = (outputTypes.size() == 1); + + // We should not set the layer output types if its default precision is INT32 or Bool. + if (layer->getPrecision() == nvinfer1::DataType::kINT32 + || layer->getPrecision() == nvinfer1::DataType::kBOOL) + { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because the " + << " default layer precision is INT32 or Bool." << std::endl; + continue; + } + // We should not set the constant layer output types if its weights are in INT32. + if (layer->getType() == nvinfer1::LayerType::kCONSTANT + && static_cast(layer)->getWeights().type == nvinfer1::DataType::kINT32) + { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output types for layer " << layerName << " because this " + << "constant layer has INT32 weights." << std::endl; + continue; + } + for (int32_t outputIdx = 0; outputIdx < nbOutputs; ++outputIdx) + { + // We should not set the output type if the output is a shape tensor. + if (layer->getOutput(0)->isShapeTensor()) + { + hasLayerOutputTypeSkipped = true; + sample::gLogVerbose << "Skipped setting output type for output " << outputIdx << " of layer " + << layerName << " because it is a shape tensor." << std::endl; + continue; + } + + auto const outputType = outputTypes.at(isBroadcast ? 0 : outputIdx); + sample::gLogInfo << "Set output " << outputIdx << " of layer " << layerName << " to type " << outputType + << std::endl; + layer->setOutputType(outputIdx, globalOutputType); + } + } + } + + if (hasLayerOutputTypeSkipped) + { + sample::gLogInfo << "Skipped setting output types for some layers. Check verbose logs for more details." + << std::endl; + } +} + +void setLayerDeviceTypes( + INetworkDefinition const& network, IBuilderConfig& config, LayerDeviceTypes const& layerDeviceTypes) +{ + for (int32_t layerIdx = 0; layerIdx < network.getNbLayers(); ++layerIdx) + { + auto* layer = network.getLayer(layerIdx); + auto const layerName = layer->getName(); + auto match = findPlausible(layerDeviceTypes, layerName); + if (match != layerDeviceTypes.end()) + { + DeviceType const deviceType = match->second; + sample::gLogInfo << "Set layer " << layerName << " to device type " << (int)deviceType << std::endl; + config.setDeviceType(layer, deviceType); + } + } +} + +void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors) +{ +#if (NV_TENSORRT_MAJOR > 8) + for (int64_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) + { + auto* t = network.getInput(inputIndex); + auto const tensorName = t->getName(); + if (debugTensors.count(tensorName) > 0) + { + network.markDebug(*t); + } + } + for (int64_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) + { + auto* layer = network.getLayer(layerIndex); + for (int64_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) + { + auto* t = layer->getOutput(outputIndex); + auto const tensorName = t->getName(); + if (debugTensors.count(tensorName) > 0) + { + network.markDebug(*t); + } + } + } +#else + std::cerr << "Can not markDebugTensors. Use TensorRT 10.x or higher" << std::endl; +#endif +} + +void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) +{ + auto const roundToBytes = [](double const size, bool fromMB = true) { + return static_cast(size * (fromMB ? 1.0_MiB : 1.0_KiB)); + }; + if (build.workspace >= 0) + { + config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); + } + if (build.dlaSRAM >= 0) + { + size_t const sizeInBytes = roundToBytes(build.dlaSRAM); + size_t sizeInPowerOf2{1}; + // Using 2^30 bytes as a loose upper bound to prevent the possibility of overflows and infinite loops. + while (sizeInPowerOf2 < 31 && (static_cast(1) << sizeInPowerOf2) <= sizeInBytes) + { + ++sizeInPowerOf2; + } + --sizeInPowerOf2; + if (sizeInPowerOf2 == 30) + { + sample::gLogWarning + << "User-specified DLA managed SRAM size is too large and has been clipped to 2^30 bytes. " + << "Please make sure that this is the intended managed SRAM size." << std::endl; + } + config.setMemoryPoolLimit(MemoryPoolType::kDLA_MANAGED_SRAM, static_cast(1) << sizeInPowerOf2); + } + if (build.dlaLocalDRAM >= 0) + { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_LOCAL_DRAM, roundToBytes(build.dlaLocalDRAM)); + } + if (build.dlaGlobalDRAM >= 0) + { + config.setMemoryPoolLimit(MemoryPoolType::kDLA_GLOBAL_DRAM, roundToBytes(build.dlaGlobalDRAM)); + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.tacticSharedMem >= 0) + { + config.setMemoryPoolLimit(MemoryPoolType::kTACTIC_SHARED_MEMORY, roundToBytes(build.tacticSharedMem, false)); + } +#endif +} + +void setPreviewFeatures(IBuilderConfig& config, BuildOptions const& build) +{ + auto const setFlag = [&](PreviewFeature feat) { + int32_t featVal = static_cast(feat); + if (build.previewFeatures.find(featVal) != build.previewFeatures.end()) + { + config.setPreviewFeature(feat, build.previewFeatures.at(featVal)); + } + }; +#if (NV_TENSORRT_MAJOR > 8) + setFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03); +#endif +} + +} // namespace + +bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, IBuilder& builder, + INetworkDefinition& network, IBuilderConfig& config, std::unique_ptr& calibrator, + std::ostream& err, std::vector>& sparseWeights) +{ + std::vector profiles{}; + profiles.resize(build.optProfiles.size()); + for (auto& profile : profiles) + { + profile = builder.createOptimizationProfile(); + } + + bool hasDynamicShapes{false}; + + bool broadcastInputFormats = broadcastIOFormats(build.inputFormats, network.getNbInputs()); + + // Check if the provided input tensor names match the input tensors of the engine. + // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. + for (auto const& shapes : build.optProfiles) + { + for (auto const& shape : shapes) + { + bool tensorNameFound{false}; + for (int32_t i = 0; i < network.getNbInputs(); ++i) + { + if (matchStringWithOneWildcard(shape.first, network.getInput(i)->getName())) + { + tensorNameFound = true; + break; + } + } + if (!tensorNameFound) + { + sample::gLogError << "Cannot find input tensor with name \"" << shape.first << "\" in the network " + << "inputs! Please make sure the input tensor names are correct." << std::endl; + return false; + } + } + } + + for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) + { + // Set formats and data types of inputs + auto* input = network.getInput(i); + if (!build.inputFormats.empty()) + { + int32_t inputFormatIndex = broadcastInputFormats ? 0 : i; + input->setType(build.inputFormats[inputFormatIndex].first); + input->setAllowedFormats(build.inputFormats[inputFormatIndex].second); + } + + auto const dims = input->getDimensions(); + auto const isScalar = dims.nbDims == 0; + auto const isDynamicInput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) + || input->isShapeTensor(); + if (isDynamicInput) + { + hasDynamicShapes = true; + for (size_t i = 0; i < build.optProfiles.size(); i++) + { + auto const& optShapes = build.optProfiles[i]; + auto profile = profiles[i]; + auto const tensorName = input->getName(); + auto shape = findPlausible(optShapes, tensorName); + ShapeRange shapes{}; + + // If no shape is provided, set dynamic dimensions to 1. + if (shape == optShapes.end()) + { + constexpr int32_t kDEFAULT_DIMENSION{1}; + std::vector staticDims; + if (input->isShapeTensor()) + { + if (isScalar) + { + staticDims.push_back(1); + } + else + { + staticDims.resize(dims.d[0]); + std::fill(staticDims.begin(), staticDims.end(), kDEFAULT_DIMENSION); + } + } + else + { + staticDims.resize(dims.nbDims); + std::transform(dims.d, dims.d + dims.nbDims, staticDims.begin(), + [&](int dimension) { return dimension > 0 ? dimension : kDEFAULT_DIMENSION; }); + } + sample::gLogWarning << "Dynamic dimensions required for input: " << tensorName + << ", but no shapes were provided. Automatically overriding shape to: " + << staticDims << std::endl; + std::fill(shapes.begin(), shapes.end(), staticDims); + } + else + { + shapes = shape->second; + } + + std::vector profileDims{}; + if (input->isShapeTensor()) + { + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kMIN, + profileDims.data(), static_cast(profileDims.size())), + "Error in set shape values MIN", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kOPT, + profileDims.data(), static_cast(profileDims.size())), + "Error in set shape values OPT", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; + SMP_RETVAL_IF_FALSE(profile->setShapeValues(tensorName, OptProfileSelector::kMAX, + profileDims.data(), static_cast(profileDims.size())), + "Error in set shape values MAX", false, err); + sample::gLogInfo << "Set input shape tensor " << tensorName << " for optimization profile " << i + << " to:" + << " MIN=" << shapes[static_cast(OptProfileSelector::kMIN)] + << " OPT=" << shapes[static_cast(OptProfileSelector::kOPT)] + << " MAX=" << shapes[static_cast(OptProfileSelector::kMAX)] << std::endl; + } + else + { + profileDims = shapes[static_cast(OptProfileSelector::kMIN)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(tensorName, OptProfileSelector::kMIN, toDims(profileDims)), + "Error in set dimensions to profile MIN", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kOPT)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(tensorName, OptProfileSelector::kOPT, toDims(profileDims)), + "Error in set dimensions to profile OPT", false, err); + profileDims = shapes[static_cast(OptProfileSelector::kMAX)]; + SMP_RETVAL_IF_FALSE( + profile->setDimensions(tensorName, OptProfileSelector::kMAX, toDims(profileDims)), + "Error in set dimensions to profile MAX", false, err); + sample::gLogInfo << "Set shape of input tensor " << tensorName << " for optimization profile " << i + << " to:" + << " MIN=" << shapes[static_cast(OptProfileSelector::kMIN)] + << " OPT=" << shapes[static_cast(OptProfileSelector::kOPT)] + << " MAX=" << shapes[static_cast(OptProfileSelector::kMAX)] << std::endl; + } + } + } + } + + for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) + { + auto* output = network.getOutput(i); + auto const dims = output->getDimensions(); + // A shape tensor output with known static dimensions may have dynamic shape values inside it. + auto const isDynamicOutput = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }) + || output->isShapeTensor(); + if (isDynamicOutput) + { + hasDynamicShapes = true; + } + } + + if (!hasDynamicShapes && !build.optProfiles[0].empty()) + { + sample::gLogError << "Static model does not take explicit shapes since the shape of inference tensors will be " + "determined by the model itself" + << std::endl; + return false; + } + + if (hasDynamicShapes) + { + for (auto profile : profiles) + { + SMP_RETVAL_IF_FALSE(profile->isValid(), "Required optimization profile is invalid", false, err); + SMP_RETVAL_IF_FALSE( + config.addOptimizationProfile(profile) != -1, "Error in add optimization profile", false, err); + } + } + + bool broadcastOutputFormats = broadcastIOFormats(build.outputFormats, network.getNbOutputs(), false); + + for (uint32_t i = 0, n = network.getNbOutputs(); i < n; i++) + { + // Set formats and data types of outputs + auto* output = network.getOutput(i); + if (!build.outputFormats.empty()) + { + int32_t outputFormatIndex = broadcastOutputFormats ? 0 : i; + output->setType(build.outputFormats[outputFormatIndex].first); + output->setAllowedFormats(build.outputFormats[outputFormatIndex].second); + } + } + + setMemoryPoolLimits(config, build); + + setPreviewFeatures(config, build); + + if (build.builderOptimizationLevel != defaultBuilderOptimizationLevel) + { + config.setBuilderOptimizationLevel(build.builderOptimizationLevel); + } + + if (build.maxTactics != defaultMaxTactics) + { +#if (NV_TENSORRT_MAJOR < 8) + config.setMaxNbTactics(build.maxTactics); +#else + config.setTacticSources(build.maxTactics); +#endif + } + + if (build.timingCacheMode == TimingCacheMode::kDISABLE) + { + config.setFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.disableCompilationCache) + { + config.setFlag(BuilderFlag::kDISABLE_COMPILATION_CACHE); + } + + if (build.errorOnTimingCacheMiss) + { + config.setFlag(BuilderFlag::kERROR_ON_TIMING_CACHE_MISS); + } +#endif + if (!build.tf32) + { + config.clearFlag(BuilderFlag::kTF32); + } + + if (build.refittable) + { + config.setFlag(BuilderFlag::kREFIT); + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.stripWeights) + { + // The kREFIT_IDENTICAL is enabled by default when kSTRIP_PLAN is on. + config.setFlag(BuilderFlag::kSTRIP_PLAN); + } +#endif + if (build.versionCompatible) + { + config.setFlag(BuilderFlag::kVERSION_COMPATIBLE); + } +#if !TRT_WINML + std::vector pluginPaths; + for (auto const& pluginPath : sys.setPluginsToSerialize) + { + sample::gLogVerbose << "Setting plugin to serialize: " << pluginPath << std::endl; + pluginPaths.push_back(pluginPath.c_str()); + } + if (!pluginPaths.empty()) + { + config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); + } +#endif + if (build.excludeLeanRuntime) + { + config.setFlag(BuilderFlag::kEXCLUDE_LEAN_RUNTIME); + } + + if (build.sparsity != SparsityFlag::kDISABLE) + { + config.setFlag(BuilderFlag::kSPARSE_WEIGHTS); + if (build.sparsity == SparsityFlag::kFORCE) + { + sparsify(network, sparseWeights); + } + } + + config.setProfilingVerbosity(build.profilingVerbosity); + config.setAvgTimingIterations(build.avgTiming); + + if (build.fp16) + { + config.setFlag(BuilderFlag::kFP16); + } + if (build.int8) + { + config.setFlag(BuilderFlag::kINT8); + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.bf16) + { + config.setFlag(BuilderFlag::kBF16); + } +#endif + + SMP_RETVAL_IF_FALSE(!(build.int8 && build.fp8), "FP8 and INT8 precisions have been specified", false, err); + + if (build.fp8) + { + config.setFlag(BuilderFlag::kFP8); + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.int4) + { + config.setFlag(BuilderFlag::kINT4); + } +#endif + if (build.int8 && !build.fp16) + { + sample::gLogInfo + << "FP32 and INT8 precisions have been specified - more performance might be enabled by additionally " + "specifying --fp16 or --best" + << std::endl; + } + + auto isInt8 = [](const IOFormat& format) { return format.first == DataType::kINT8; }; + auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8) + + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8); + + auto hasQDQLayers = [](INetworkDefinition& network) { + // Determine if our network has QDQ layers. + auto const nbLayers = network.getNbLayers(); + for (int32_t i = 0; i < nbLayers; i++) + { + auto const& layer = network.getLayer(i); + if (layer->getType() == LayerType::kQUANTIZE || layer->getType() == LayerType::kDEQUANTIZE) + { + return true; + } + } + return false; + }; + + if (!hasQDQLayers(network) && (build.int8 || int8IO) && build.calibration.empty()) + { + // Explicitly set int8 scales if no calibrator is provided and if I/O tensors use int8, + // because auto calibration does not support this case. + SMP_RETVAL_IF_FALSE(setTensorDynamicRange(network), "Error in set tensor dynamic range.", false, err); + } + else if (build.int8) + { + if (!hasQDQLayers(network) && int8IO) + { + try + { + // Set dynamic ranges of int8 inputs / outputs to match scales loaded from calibration cache + // TODO http://nvbugs/3262234 Change the network validation so that this workaround can be removed + setTensorScalesFromCalibration(network, build.inputFormats, build.outputFormats, build.calibration); + } + catch (std::exception&) + { + sample::gLogError + << "Int8IO was specified but impossible to read tensor scales from provided calibration cache file" + << std::endl; + return false; + } + } + IOptimizationProfile* profileCalib{nullptr}; + if (!build.shapesCalib.empty()) + { + profileCalib = builder.createOptimizationProfile(); + for (uint32_t i = 0, n = network.getNbInputs(); i < n; i++) + { + auto* input = network.getInput(i); + Dims profileDims{}; + auto const tensorName = input->getName(); + auto shape = findPlausible(build.shapesCalib, tensorName); + + if (shape == build.shapesCalib.end()) + { + std::ostringstream msg; + msg << "Calibration profile for tensor " << tensorName << " cannot be found!"; + throw std::invalid_argument(msg.str()); + } + + auto shapesCalib = shape->second; + profileDims = toDims(shapesCalib[static_cast(OptProfileSelector::kOPT)]); + // Here we check only kMIN as all profileDims are the same. + SMP_RETVAL_IF_FALSE(profileCalib->setDimensions(tensorName, OptProfileSelector::kMIN, profileDims), + "Error in set dimensions to calibration profile OPT", false, err); + profileCalib->setDimensions(tensorName, OptProfileSelector::kOPT, profileDims); + profileCalib->setDimensions(tensorName, OptProfileSelector::kMAX, profileDims); + sample::gLogInfo << "Set calibration profile for input tensor " << tensorName << " to " << profileDims + << std::endl; + } + SMP_RETVAL_IF_FALSE(profileCalib->isValid(), "Calibration profile is invalid", false, err); + SMP_RETVAL_IF_FALSE( + config.setCalibrationProfile(profileCalib), "Error in set calibration profile", false, err); + } + + std::vector elemCount{}; + for (int i = 0; i < network.getNbInputs(); i++) + { + auto* input = network.getInput(i); + auto const dims = input->getDimensions(); + auto const isDynamicInput + = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); + + if (profileCalib) + { + elemCount.push_back(volume(profileCalib->getDimensions(input->getName(), OptProfileSelector::kOPT))); + } + else if (!profiles.empty() && isDynamicInput) + { + elemCount.push_back( + volume(profiles[build.calibProfile]->getDimensions(input->getName(), OptProfileSelector::kOPT))); + } + else + { + elemCount.push_back(volume(input->getDimensions())); + } + } + + calibrator.reset(new RndInt8Calibrator(1, elemCount, build.calibration, network, err)); + config.setInt8Calibrator(calibrator.get()); + } + + if (build.directIO) + { + config.setFlag(BuilderFlag::kDIRECT_IO); + } + + switch (build.precisionConstraints) + { + case PrecisionConstraints::kNONE: + // It's the default for TensorRT. + break; + case PrecisionConstraints::kOBEY: config.setFlag(BuilderFlag::kOBEY_PRECISION_CONSTRAINTS); break; + case PrecisionConstraints::kPREFER: config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); break; + } + + if (!build.layerPrecisions.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) + { + setLayerPrecisions(network, build.layerPrecisions); + } + + if (!build.layerOutputTypes.empty() && build.precisionConstraints != PrecisionConstraints::kNONE) + { + setLayerOutputTypes(network, build.layerOutputTypes); + } + + if (!build.layerDeviceTypes.empty()) + { + setLayerDeviceTypes(network, config, build.layerDeviceTypes); + } + + if (!build.debugTensors.empty()) + { + markDebugTensors(network, build.debugTensors); + } + + if (build.safe && sys.DLACore == -1) + { + config.setEngineCapability(EngineCapability::kSAFETY); + } + + if (build.restricted) + { + config.setFlag(BuilderFlag::kSAFETY_SCOPE); + } + + if (sys.DLACore != -1) + { + if (sys.DLACore < builder.getNbDLACores()) + { + config.setDefaultDeviceType(DeviceType::kDLA); + config.setDLACore(sys.DLACore); + config.setFlag(BuilderFlag::kPREFER_PRECISION_CONSTRAINTS); + if (build.buildDLAStandalone) + { + config.setEngineCapability(EngineCapability::kDLA_STANDALONE); + } + if (build.allowGPUFallback) + { + config.setFlag(BuilderFlag::kGPU_FALLBACK); + } + else + { + // Reformatting runs on GPU, so avoid I/O reformatting. + config.setFlag(BuilderFlag::kDIRECT_IO); + } + if (!build.int8) + { + config.setFlag(BuilderFlag::kFP16); + } + } + else + { + err << "Cannot create DLA engine, " << sys.DLACore << " not available" << std::endl; + return false; + } + } + + if (build.enabledTactics || build.disabledTactics) + { + TacticSources tacticSources = config.getTacticSources(); + tacticSources |= build.enabledTactics; + tacticSources &= ~build.disabledTactics; + config.setTacticSources(tacticSources); + } + + config.setHardwareCompatibilityLevel(build.hardwareCompatibilityLevel); +#if (NV_TENSORRT_MAJOR > 8) + config.setRuntimePlatform(build.runtimePlatform); +#endif + + if (build.maxAuxStreams != defaultMaxAuxStreams) + { + config.setMaxAuxStreams(build.maxAuxStreams); + } + + if (build.allowWeightStreaming) + { +#if (NV_TENSORRT_MAJOR > 8) + config.setFlag(BuilderFlag::kWEIGHT_STREAMING); +#else + std::cerr << "BuilderFlag::kWEIGHT_STREAMING not allowed in TensorRT with version less than 10.x" << std::endl; +#endif + } + + return true; +} + +//! +//! \brief Create a serialized engine for a network defintion +//! +//! \return Whether the engine creation succeeds or fails. +//! +bool networkToSerializedEngine( + BuildOptions const& build, SystemOptions const& sys, IBuilder& builder, BuildEnvironment& env, std::ostream& err) +{ + std::unique_ptr config{builder.createBuilderConfig()}; + std::unique_ptr calibrator; + std::vector> sparseWeights; + SMP_RETVAL_IF_FALSE(config != nullptr, "Config creation failed", false, err); + SMP_RETVAL_IF_FALSE( + setupNetworkAndConfig(build, sys, builder, *env.network, *config, calibrator, err, sparseWeights), + "Network And Config setup failed", false, err); + + std::unique_ptr timingCache{}; + // Try to load cache from file. Create a fresh cache if the file doesn't exist + if (build.timingCacheMode == TimingCacheMode::kGLOBAL) + { + timingCache + = samplesCommon::buildTimingCacheFromFile(gLogger.getTRTLogger(), *config, build.timingCacheFile, err); + } + + // CUDA stream used for profiling by the builder. + auto profileStream = samplesCommon::makeCudaStream(); + SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", false, err); + config->setProfileStream(*profileStream); + + auto const tBegin = std::chrono::high_resolution_clock::now(); + std::unique_ptr serializedEngine{builder.buildSerializedNetwork(*env.network, *config)}; + SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err); + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const buildTime = std::chrono::duration(tEnd - tBegin).count(); + sample::gLogInfo << "Engine built in " << buildTime << " sec." << std::endl; + sample::gLogInfo << "Created engine with size: " << (serializedEngine->size() / 1.0_MiB) << " MiB" << std::endl; + + env.engine.setBlob(serializedEngine); + + if (build.timingCacheMode == TimingCacheMode::kGLOBAL) + { + auto timingCache = config->getTimingCache(); + samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder); + } + + return true; +} + +//! +//! \brief Parse a given model, create a network and an engine. +//! +bool modelToBuildEnv( + ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err) +{ + env.builder.reset(createBuilder()); + SMP_RETVAL_IF_FALSE(env.builder != nullptr, "Builder creation failed", false, err); + env.builder->setErrorRecorder(&gRecorder); +#if (NV_TENSORRT_MAJOR > 8) + auto networkFlags = (build.stronglyTyped) + ? 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED) + : 0U; +#else + auto networkFlags = 0U; +#endif +#if !TRT_WINML + for (auto const& pluginPath : sys.dynamicPlugins) + { + env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } +#endif + env.network.reset(env.builder->createNetworkV2(networkFlags)); + + std::vector vcPluginLibrariesUsed; + SMP_RETVAL_IF_FALSE(env.network != nullptr, "Network creation failed", false, err); + env.parser + = modelToNetwork(model, build, *env.network, err, build.versionCompatible ? &vcPluginLibrariesUsed : nullptr); + SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err); + +#if !TRT_WINML + if (build.versionCompatible && !sys.ignoreParsedPluginLibs && !vcPluginLibrariesUsed.empty()) + { + sample::gLogInfo << "The following plugin libraries were identified by the parser as required for a " + "version-compatible engine:" + << std::endl; + for (auto const& lib : vcPluginLibrariesUsed) + { + sample::gLogInfo << " " << lib << std::endl; + } + if (!build.excludeLeanRuntime) + { + sample::gLogInfo << "These libraries will be added to --setPluginsToSerialize since --excludeLeanRuntime " + "was not specified." + << std::endl; + std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(), + std::back_inserter(sys.setPluginsToSerialize)); + } + sample::gLogInfo << "These libraries will be added to --dynamicPlugins for use at inference time." << std::endl; + std::copy(vcPluginLibrariesUsed.begin(), vcPluginLibrariesUsed.end(), std::back_inserter(sys.dynamicPlugins)); + + // Implicitly-added plugins from ONNX parser should be loaded into plugin registry as well. + for (auto const& pluginPath : vcPluginLibrariesUsed) + { + env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } + + sample::gLogInfo << "Use --ignoreParsedPluginLibs to disable this behavior." << std::endl; + } +#endif + + SMP_RETVAL_IF_FALSE( + networkToSerializedEngine(build, sys, *env.builder, env, err), "Building engine failed", false, err); + return true; +} + +namespace +{ +std::pair, std::vector> getLayerWeightsRolePair(IRefitter& refitter) +{ + // Get number of refittable items. + auto const nbAll = refitter.getAll(0, nullptr, nullptr); + std::vector layerNames(nbAll); + // Allocate buffers for the items and get them. + std::vector weightsRoles(nbAll); + refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); + std::vector layerNameStrs(nbAll); + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); + return {layerNameStrs, weightsRoles}; +} + +std::pair, std::vector> getMissingLayerWeightsRolePair(IRefitter& refitter) +{ + // Get number of refittable items. + auto const nbMissing = refitter.getMissing(0, nullptr, nullptr); + std::vector layerNames(nbMissing); + // Allocate buffers for the items and get them. + std::vector weightsRoles(nbMissing); + refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); + std::vector layerNameStrs(nbMissing); + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); + return {layerNameStrs, weightsRoles}; +} +} // namespace + +bool loadStreamingEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err) +{ +#if (NV_TENSORRT_MAJOR > 8) + auto& reader = env.engine.getFileReader(); + SMP_RETVAL_IF_FALSE(reader.open(filepath), "", false, err << "Error opening engine file: " << filepath); +#else + SMP_RETVAL_IF_FALSE(false, "", false, err << "Error opening engine file: " << filepath); +#endif + return true; +} + +bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err) +{ + auto const tBegin = std::chrono::high_resolution_clock::now(); + std::ifstream engineFile(filepath, std::ios::binary); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << filepath); + engineFile.seekg(0, std::ifstream::end); + int64_t fsize = engineFile.tellg(); + engineFile.seekg(0, std::ifstream::beg); + + std::vector engineBlob(fsize); + engineFile.read(reinterpret_cast(engineBlob.data()), fsize); + SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error loading engine file: " << filepath); + auto const tEnd = std::chrono::high_resolution_clock::now(); + float const loadTime = std::chrono::duration(tEnd - tBegin).count(); + sample::gLogInfo << "Engine loaded in " << loadTime << " sec." << std::endl; + sample::gLogInfo << "Loaded engine with size: " << (fsize / 1.0_MiB) << " MiB" << std::endl; + + env.engine.setBlob(std::move(engineBlob)); + + return true; +} + +bool printPlanVersion(BuildEnvironment& env, std::ostream& err) +{ + constexpr int64_t kPLAN_SIZE{28}; + std::vector data(kPLAN_SIZE); + auto blob = data.data(); + +#if (NV_TENSORRT_MAJOR > 8) + auto& reader = env.engine.getFileReader(); + if (reader.isOpen()) + { + SMP_RETVAL_IF_FALSE(reader.read(data.data(), kPLAN_SIZE) == kPLAN_SIZE, "Failed to read plan file", false, err); + } + else +#endif + { + SMP_RETVAL_IF_FALSE(env.engine.getBlob().data != nullptr, "Plan file is empty", false, err); + SMP_RETVAL_IF_FALSE(env.engine.getBlob().size >= 28, "Plan file is incorrect", false, err); + blob = static_cast(env.engine.getBlob().data); + } + auto blob32 = reinterpret_cast(blob); + + //! Correct TensorRT plan file starts with this tag + constexpr uint32_t kPLAN_FILE_TAG{0x74727466U}; + SMP_RETVAL_IF_FALSE(blob32[0] == kPLAN_FILE_TAG, "Failed to verify a plan tag.", false, err); + switch (blob32[1]) + { + case 0U: + { + // Blob index to store the plan version may depend on the serialization version. + sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) + << "." << static_cast(blob[25]) << "." << static_cast(blob[26]) + << "." << static_cast(blob[27]) << std::endl; + return true; + } + } + sample::gLogError << "Serialization version is not supported." << std::endl; + return false; +} + +void dumpRefittable(nvinfer1::ICudaEngine& engine) +{ + std::unique_ptr refitter{createRefitter(engine)}; + if (refitter == nullptr) + { + sample::gLogError << "Failed to create a refitter." << std::endl; + return; + } + + auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); + auto const& layerNames = layerWeightsRolePair.first; + auto const& weightsRoles = layerWeightsRolePair.second; + auto const nbAll = layerWeightsRolePair.first.size(); + for (size_t i = 0; i < nbAll; ++i) + { + sample::gLogInfo << layerNames[i] << " " << weightsRoles[i] << std::endl; + } +} + +ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err) +{ + BuildEnvironment env(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults()); + return loadEngineToBuildEnv(engine, env, err) ? env.engine.release() : nullptr; +} + +bool saveEngine(const ICudaEngine& engine, std::string const& fileName, std::ostream& err) +{ + std::ofstream engineFile(fileName, std::ios::binary); + if (!engineFile) + { + err << "Cannot open engine file: " << fileName << std::endl; + return false; + } + + std::unique_ptr serializedEngine{engine.serialize()}; + if (serializedEngine == nullptr) + { + err << "Engine serialization failed" << std::endl; + return false; + } + + engineFile.write(static_cast(serializedEngine->data()), serializedEngine->size()); + return !engineFile.fail(); +} + +bool getEngineBuildEnv( + const ModelOptions& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err) +{ + bool createEngineSuccess{false}; + + if (build.load) + { + if (build.safe) + { + createEngineSuccess = loadEngineToBuildEnv(build.engine, env, err); + } + else + { + createEngineSuccess = loadStreamingEngineToBuildEnv(build.engine, env, err); + } + } + else + { + createEngineSuccess = modelToBuildEnv(model, build, sys, env, err); + } + + SMP_RETVAL_IF_FALSE(createEngineSuccess, "Failed to create engine from model or file.", false, err); + + if (build.getPlanVersionOnly && build.load) + { + SMP_RETVAL_IF_FALSE(printPlanVersion(env, err), "Failed to get plan file version.", false, err); + return true; + } + + if (build.save) + { + std::ofstream engineFile(build.engine, std::ios::binary); + auto& engineBlob = env.engine.getBlob(); + engineFile.write(static_cast(engineBlob.data), engineBlob.size); + SMP_RETVAL_IF_FALSE(!engineFile.fail(), "Saving engine to file failed.", false, err); + engineFile.flush(); + engineFile.close(); + if (!build.safe) + { + env.engine.releaseBlob(); + SMP_RETVAL_IF_FALSE(loadStreamingEngineToBuildEnv(build.engine, env, err), "Reading engine file failed.", false, err); + } + } + + return true; +} + +// There is not a getWeightsName API, so we need to use WeightsRole. +std::vector> getAllRefitWeightsForLayer(const ILayer& l) +{ + switch (l.getType()) + { + case LayerType::kCONSTANT: + { + auto const& layer = static_cast(l); + auto const weights = layer.getWeights(); + switch (weights.type) + { + case DataType::kFLOAT: + case DataType::kHALF: +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kBF16: +#endif + case DataType::kINT8: + case DataType::kINT32: +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kINT64: +#endif + return {std::make_pair(WeightsRole::kCONSTANT, weights)}; + case DataType::kBOOL: + case DataType::kUINT8: + case DataType::kFP8: +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kINT4: +#endif + // Refit not supported for these types. + break; + } + break; + } + case LayerType::kCONVOLUTION: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kDECONVOLUTION: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kKERNEL, layer.getKernelWeights()), + std::make_pair(WeightsRole::kBIAS, layer.getBiasWeights())}; + } + case LayerType::kSCALE: + { + auto const& layer = static_cast(l); + return {std::make_pair(WeightsRole::kSCALE, layer.getScale()), + std::make_pair(WeightsRole::kSHIFT, layer.getShift())}; + } + case LayerType::kACTIVATION: + case LayerType::kASSERTION: + case LayerType::kCAST: + case LayerType::kCONCATENATION: + case LayerType::kCONDITION: + case LayerType::kCONDITIONAL_INPUT: + case LayerType::kCONDITIONAL_OUTPUT: + case LayerType::kDEQUANTIZE: + case LayerType::kEINSUM: + case LayerType::kELEMENTWISE: + case LayerType::kFILL: + case LayerType::kGATHER: + case LayerType::kGRID_SAMPLE: + case LayerType::kIDENTITY: + case LayerType::kITERATOR: + case LayerType::kLOOP_OUTPUT: + case LayerType::kLRN: + case LayerType::kMATRIX_MULTIPLY: + case LayerType::kNMS: + case LayerType::kNON_ZERO: + case LayerType::kNORMALIZATION: + case LayerType::kONE_HOT: + case LayerType::kPADDING: + case LayerType::kPARAMETRIC_RELU: + case LayerType::kPLUGIN: + case LayerType::kPLUGIN_V2: +#if (NV_TENSORRT_MAJOR > 8) + case LayerType::kPLUGIN_V3: +#endif + case LayerType::kPOOLING: + case LayerType::kQUANTIZE: + case LayerType::kRAGGED_SOFTMAX: + case LayerType::kRECURRENCE: + case LayerType::kREDUCE: + case LayerType::kRESIZE: + case LayerType::kREVERSE_SEQUENCE: + case LayerType::kSCATTER: + case LayerType::kSELECT: + case LayerType::kSHAPE: + case LayerType::kSHUFFLE: + case LayerType::kSLICE: + case LayerType::kSOFTMAX: + case LayerType::kTOPK: + case LayerType::kTRIP_LIMIT: + case LayerType::kUNARY: return {}; + } + return {}; +} + +bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, bool multiThreading) +{ + using time_point = std::chrono::time_point; + using durationMs = std::chrono::duration; + + auto const nbLayers = network.getNbLayers(); + std::unique_ptr refitter{createRefitter(engine)}; + // Set max threads that can be used by refitter. + if (multiThreading && !refitter->setMaxThreads(10)) + { + sample::gLogError << "Failed to set max threads to refitter." << std::endl; + return false; + } + auto const& layerWeightsRolePair = getLayerWeightsRolePair(*refitter); + // We use std::string instead of char const* since we can have copies of layer names. + std::set> layerRoleSet; + + auto const& layerNames = layerWeightsRolePair.first; + auto const& weightsRoles = layerWeightsRolePair.second; + + std::transform(layerNames.begin(), layerNames.end(), weightsRoles.begin(), + std::inserter(layerRoleSet, layerRoleSet.begin()), + [](std::string const& layerName, WeightsRole const role) { return std::make_pair(layerName, role); }); + + auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) { + return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); + }; + + auto const setWeights = [&] { + for (int32_t i = 0; i < nbLayers; i++) + { + auto const layer = network.getLayer(i); + auto const roleWeightsVec = getAllRefitWeightsForLayer(*layer); + for (auto const& roleWeights : roleWeightsVec) + { + if (isRefittable(layer->getName(), roleWeights.first)) + { + bool const success = refitter->setWeights(layer->getName(), roleWeights.first, roleWeights.second); + if (!success) + { + return false; + } + } + } + } + return true; + }; + + auto const reportMissingWeights = [&] { + auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); + auto const& layerNames = missingPair.first; + auto const& weightsRoles = missingPair.second; + for (size_t i = 0; i < layerNames.size(); ++i) + { + sample::gLogError << "Missing (" << layerNames[i] << ", " << weightsRoles[i] << ") for refitting." + << std::endl; + } + return layerNames.empty(); + }; +#if (NV_TENSORRT_MAJOR > 8) + // Skip weights validation since we are confident that the new weights are similar to the weights used to build engine. + refitter->setWeightsValidation(false); +#endif + // Warm up and report missing weights + // We only need to set weights for the first time and that can be reused in later refitting process. + bool const success = setWeights() && reportMissingWeights() && refitter->refitCudaEngine(); + if (!success) + { + return false; + } + + time_point const refitStartTime{std::chrono::steady_clock::now()}; + constexpr int32_t kLOOP = 10; +#if (NV_TENSORRT_MAJOR > 8) + TrtCudaStream stream; + { + for (int32_t l = 0; l < kLOOP; l++) + { + if (!refitter->refitCudaEngineAsync(stream.get())) + { + return false; + } + } + } + stream.synchronize(); +#endif + time_point const refitEndTime{std::chrono::steady_clock::now()}; + + sample::gLogInfo << "Engine refitted" + << " in " << durationMs(refitEndTime - refitStartTime).count() / kLOOP << " ms." << std::endl; + return true; +} + +namespace +{ +void* initSafeRuntime() +{ + void* handle{nullptr}; + // libsafe_executor.so will be renamed to libnvinfer_safe.so when TRTS-9421 completes. + // Currently libsafe_executor_debug.so for samplesCommon::isDebug() is not ready. +#define TRTS_9421_COMPLETED 0 +#if TRTS_9421_COMPLETED +#if !defined(_WIN32) + std::string const dllName{"libsafe_executor.so"}; +#if SANITIZER_BUILD + handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_NODELETE); +#else + // RTLD_GLOBAL is used for symbol resolution of subsequently loaded plugin libraries + handle = dlopen(dllName.c_str(), RTLD_LAZY | RTLD_GLOBAL); +#endif +#endif +#endif // TRTS_9421_COMPLETED + return handle; +} + +#if !defined(_WIN32) +struct DllDeleter +{ + void operator()(void* handle) + { + if (handle != nullptr) + { + dlclose(handle); + } + } +}; +const std::unique_ptr safeRuntimeLibrary{initSafeRuntime()}; +#endif +} // namespace + +bool hasSafeRuntime() +{ + bool ret{false}; +#if !defined(_WIN32) + ret = (safeRuntimeLibrary != nullptr); +#endif + return ret; +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleEngines.h b/src/Detector/tensorrt_onnx/common/sampleEngines.h new file mode 100644 index 000000000..e6f532acc --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleEngines.h @@ -0,0 +1,326 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_ENGINES_H +#define TRT_SAMPLE_ENGINES_H + +#include "NvInfer.h" +#include "NvOnnxParser.h" +#include "sampleOptions.h" +#include "sampleUtils.h" +#include "streamReader.h" +#include +#include + +namespace sample +{ + +struct Parser +{ + std::unique_ptr onnxParser; + + operator bool() const + { + return onnxParser != nullptr; + } +}; + +//! +//! \brief Helper struct to faciliate engine serialization and deserialization. It does not own the underlying memory. +//! +struct EngineBlob +{ + EngineBlob(void* engineData, size_t engineSize) + : data(engineData) + , size(engineSize) + { + } + void* data{}; + size_t size{}; + bool empty() const + { + return size == 0; + } +}; + +//! +//! \brief A helper class to hold a serialized engine (std or safe) and only deserialize it when being accessed. +//! +class LazilyDeserializedEngine +{ +public: + //! + //! \brief Delete default constructor to make sure isSafe and DLACore are always set. + //! + LazilyDeserializedEngine() = delete; + + //! + //! \brief Constructor of LazilyDeserializedEngine. + //! + LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, + nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath) + : mIsSafe(isSafe) + , mVersionCompatible(versionCompatible) + , mDLACore(DLACore) + , mTempdir(tempdir) + , mTempfileControls(tempfileControls) + , mLeanDLLPath(leanDLLPath) + { +#if (NV_TENSORRT_MAJOR > 8) + mFileReader = std::make_unique(); +#endif + } + + //! + //! \brief Move from another LazilyDeserializedEngine. + //! + LazilyDeserializedEngine(LazilyDeserializedEngine&& other) = default; + + //! + //! \brief Delete copy constructor. + //! + LazilyDeserializedEngine(LazilyDeserializedEngine const& other) = delete; + + //! + //! \brief Get the pointer to the ICudaEngine. Triggers deserialization if not already done so. + //! + nvinfer1::ICudaEngine* get(); + + //! + //! \brief Get the pointer to the ICudaEngine and release the ownership. + //! + nvinfer1::ICudaEngine* release(); + + //! + //! \brief Get the underlying blob storing serialized engine. + //! + EngineBlob const getBlob() const + { +#if (NV_TENSORRT_MAJOR > 8) + ASSERT((!mFileReader || !mFileReader->isOpen()) + && "Attempting to access the glob when there is an open file reader!"); +#endif + if (!mEngineBlob.empty()) + { + return EngineBlob{const_cast(static_cast(mEngineBlob.data())), mEngineBlob.size()}; + } + if (mEngineBlobHostMemory.get() != nullptr && mEngineBlobHostMemory->size() > 0) + { + return EngineBlob{mEngineBlobHostMemory->data(), mEngineBlobHostMemory->size()}; + } + ASSERT(false && "Attempting to access an empty engine!"); + return EngineBlob{nullptr, 0}; + } + + //! + //! \brief Set the underlying blob storing the serialized engine without duplicating IHostMemory. + //! + void setBlob(std::unique_ptr& data) + { + ASSERT(data.get() && data->size() > 0); + mEngineBlobHostMemory = std::move(data); + mEngine.reset(); + } + + //! + //! \brief Set the underlying blob storing the serialized engine without duplicating vector memory. + //! + void setBlob(std::vector&& engineBlob) + { + mEngineBlob = std::move(engineBlob); + mEngine.reset(); + } + + //! + //! \brief Release the underlying blob without deleting the deserialized engine. + //! + void releaseBlob() + { + mEngineBlob.clear(); + mEngineBlobHostMemory.reset(); + } +#if (NV_TENSORRT_MAJOR > 8) + //! + //! \brief Get the file stream reader used for deserialization + //! + samplesCommon::FileStreamReader& getFileReader() + { + ASSERT(mFileReader); + return *mFileReader; + } +#endif + //! + //! \brief Get if safe mode is enabled. + //! + bool isSafe() + { + return mIsSafe; + } + + void setDynamicPlugins(std::vector const& dynamicPlugins) + { + mDynamicPlugins = dynamicPlugins; + } + +private: + bool mIsSafe{false}; + bool mVersionCompatible{false}; + int32_t mDLACore{-1}; + std::vector mEngineBlob; +#if (NV_TENSORRT_MAJOR > 8) + std::unique_ptr mFileReader; +#endif + // Directly use the host memory of a serialized engine instead of duplicating the engine in CPU memory. + std::unique_ptr mEngineBlobHostMemory; + + std::string mTempdir{}; + nvinfer1::TempfileControlFlags mTempfileControls{getTempfileControlDefaults()}; + std::string mLeanDLLPath{}; + std::vector mDynamicPlugins; + + //! \name Owned TensorRT objects + //! Per TensorRT object lifetime requirements as outlined in the developer guide, + //! the runtime must remain live while any engines created by the runtime are live. + //! DO NOT ADJUST the declaration order here: runtime -> (engine). + //! Destruction occurs in reverse declaration order: (engine) -> runtime. + //!@{ + + //! The runtime used to track parent of mRuntime if one exists. + //! Needed to load mRuntime if lean.so is supplied through file system path. + std::unique_ptr mParentRuntime{}; + + //! The runtime that is used to deserialize the engine. + std::unique_ptr mRuntime{}; + + //! If mIsSafe is false, this points to the deserialized std engine + std::unique_ptr mEngine{}; + + //!@} +}; + +struct BuildEnvironment +{ + BuildEnvironment() = delete; + BuildEnvironment(BuildEnvironment const& other) = delete; + BuildEnvironment(BuildEnvironment&& other) = delete; + BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, + nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "") + : engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath) + { + } + + //! \name Owned TensorRT objects + //! Per TensorRT object lifetime requirements as outlined in the developer guide, + //! factory objects must remain live while the objects created by those factories + //! are live (with the exception of builder -> engine). + //! DO NOT ADJUST the declaration order here: builder -> network -> parser. + //! Destruction occurs in reverse declaration order: parser -> network -> builder. + //!@{ + + //! The builder used to build the engine. + std::unique_ptr builder; + + //! The network used by the builder. + std::unique_ptr network; + + //! The parser used to specify the network. + Parser parser; + + //! The engine. + LazilyDeserializedEngine engine; + //!@} +}; + +//! +//! \brief Set up network and config +//! +//! \return boolean Return true if network and config were successfully set +//! +bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder, + nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err, + std::vector>& sparseWeights); + +//! +//! \brief Log refittable layers and weights of a refittable engine +//! +void dumpRefittable(nvinfer1::ICudaEngine& engine); + +//! +//! \brief Load a serialized engine +//! +//! \return Pointer to the engine loaded or nullptr if the operation failed +//! +nvinfer1::ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err); + +//! +//! \brief Save an engine into a file +//! +//! \return boolean Return true if the engine was successfully saved +//! +bool saveEngine(nvinfer1::ICudaEngine const& engine, std::string const& fileName, std::ostream& err); + +//! +//! \brief Create an engine from model or serialized file, and optionally save engine +//! +//! \return Pointer to the engine created or nullptr if the creation failed +//! +bool getEngineBuildEnv( + ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err); + +//! +//! \brief Create a serialized network +//! +//! \return Pointer to a host memory for a serialized network +//! +nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys, + nvinfer1::IBuilder& builder, nvinfer1::INetworkDefinition& network, std::ostream& err); + +//! +//! \brief Tranfer model to a serialized network +//! +//! \return Pointer to a host memory for a serialized network +//! +nvinfer1::IHostMemory* modelToSerialized( + const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); + +//! +//! \brief Serialize network and save it into a file +//! +//! \return boolean Return true if the network was successfully serialized and saved +//! +bool serializeAndSave( + const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err); + +bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading); + +//! +//! \brief Set tensor scales from a calibration table +//! +void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, + std::vector const& outputFormats, std::string const& calibrationFile); + +//! +//! \brief Check if safe runtime is loaded. +//! +bool hasSafeRuntime(); + +bool loadStreamingEngineToBuildEnv(std::string const& engine, BuildEnvironment& env, std::ostream& err); + +bool loadEngineToBuildEnv(std::string const& engine, BuildEnvironment& env, std::ostream& err); +} // namespace sample + +#endif // TRT_SAMPLE_ENGINES_H diff --git a/src/Detector/tensorrt_onnx/common/sampleEntrypoints.h b/src/Detector/tensorrt_onnx/common/sampleEntrypoints.h new file mode 100644 index 000000000..cc8bf1b9b --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleEntrypoints.h @@ -0,0 +1,101 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_ENTRYPOINTS_H +#define TRT_SAMPLE_ENTRYPOINTS_H + +//! \file sampleEntrypoints.h +//! +//! Declares and conditionally defines entrypoints needed to create base TensorRT objects, depending +//! on whether the given sample uses TRT at link time or dynamically. Since common code is built once +//! and shared across all samples (both link-time and dynamic TRT), it does not define these entrypoints, +//! so each sample must define them individually. +//! +//! Samples that use TRT at link time can define DEFINE_TRT_ENTRYPOINTS before including this header to +//! pick up the definitions here. + +#include "NvInfer.h" +#include "NvOnnxParser.h" +#include "logger.h" + +extern nvinfer1::IBuilder* createBuilder(); +extern nvinfer1::IRuntime* createRuntime(); +extern nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine); + +extern nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network); + +#if !defined(DEFINE_TRT_ENTRYPOINTS) +#define DEFINE_TRT_ENTRYPOINTS 0 +#endif + +// Allow opting out of individual entrypoints that are unused by the sample +#if !defined(DEFINE_TRT_BUILDER_ENTRYPOINT) +#define DEFINE_TRT_BUILDER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_RUNTIME_ENTRYPOINT) +#define DEFINE_TRT_RUNTIME_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_REFITTER_ENTRYPOINT) +#define DEFINE_TRT_REFITTER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_ONNX_PARSER_ENTRYPOINT) +#define DEFINE_TRT_ONNX_PARSER_ENTRYPOINT 1 +#endif +#if !defined(DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT) +#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 1 +#endif + +#if DEFINE_TRT_ENTRYPOINTS +nvinfer1::IBuilder* createBuilder() +{ +#if DEFINE_TRT_BUILDER_ENTRYPOINT + return nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvinfer1::IRuntime* createRuntime() +{ +#if DEFINE_TRT_RUNTIME_ENTRYPOINT + return nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine) +{ +#if DEFINE_TRT_REFITTER_ENTRYPOINT + return nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network) +{ +#if DEFINE_TRT_ONNX_PARSER_ENTRYPOINT + return nvonnxparser::createParser(network, sample::gLogger.getTRTLogger()); +#else + return {}; +#endif +} + +#endif // DEFINE_TRT_ENTRYPOINTS + +#endif // TRT_SAMPLE_ENTRYPOINTS_H diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.cpp b/src/Detector/tensorrt_onnx/common/sampleInference.cpp new file mode 100644 index 000000000..b131ca326 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleInference.cpp @@ -0,0 +1,1667 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__QNX__) +#include +#include +#endif + +#include "NvInfer.h" + +#include "ErrorRecorder.h" +#include "bfloat16.h" +#include "logger.h" +#include "sampleDevice.h" +#include "sampleEngines.h" +#include "sampleInference.h" +#include "sampleOptions.h" +#include "sampleReporting.h" +#include "sampleUtils.h" +#include +using namespace nvinfer1; +namespace sample +{ + +template +bool validateTensorNames(TMapType const& map, TEngineType const* engine, int32_t const endBindingIndex) +{ + // Check if the provided input tensor names match the input tensors of the engine. + // Throw an error if the provided input tensor names cannot be found because it implies a potential typo. + for (auto const& item : map) + { + bool tensorNameFound{false}; + for (int32_t b = 0; b < endBindingIndex; ++b) + { + auto const tensorName = engine->getIOTensorName(b); + auto const tensorIOMode = engine->getTensorIOMode(tensorName); + if (tensorIOMode == nvinfer1::TensorIOMode::kINPUT && matchStringWithOneWildcard(item.first, tensorName)) + { + tensorNameFound = true; + break; + } + } + if (!tensorNameFound) + { + sample::gLogError << "Cannot find input tensor with name \"" << item.first << "\" in the engine bindings! " + << "Please make sure the input tensor names are correct." << std::endl; + return false; + } + } + return true; +} + +template +class FillBindingClosure +{ +private: + using InputsMap = std::unordered_map; + using BindingsVector = std::vector>; + + TEngineType const* mEngine; + nvinfer1::IExecutionContext const* mContext; + InputsMap const& inputs; + BindingsVector& bindings; + int32_t batch; + int32_t endBindingIndex; + int32_t profileIndex; + + void fillOneBinding(TensorInfo const& tensorInfo) + { + auto const name = tensorInfo.name; + auto const* bindingInOutStr = tensorInfo.isInput ? "Input" : "Output"; + for (auto& binding : bindings) + { + auto const input = findPlausible(inputs, name); + if (tensorInfo.isInput && input != inputs.end()) + { + sample::gLogInfo << "Using values loaded from " << input->second << " for input " << name << std::endl; + binding->addBinding(tensorInfo, input->second); + } + else + { + if (tensorInfo.isInput) + { + sample::gLogInfo << "Using random values for input " << name << std::endl; + } + binding->addBinding(tensorInfo); + } + if (tensorInfo.isDynamic) + { + sample::gLogInfo << bindingInOutStr << " binding for " << name + << " is dynamic and will be created during execution using OutputAllocator." + << std::endl; + } + else + { + sample::gLogInfo << bindingInOutStr << " binding for " << name << " with dimensions " << tensorInfo.dims + << " is created." << std::endl; + } + } + } + + bool fillAllBindings(int32_t batch, int32_t endBindingIndex) + { + if (!validateTensorNames(inputs, mEngine, endBindingIndex)) + { + sample::gLogError << "Invalid tensor names found in --loadInputs flag." << std::endl; + return false; + } + for (int32_t b = 0; b < endBindingIndex; b++) + { + TensorInfo tensorInfo; + tensorInfo.bindingIndex = b; + getTensorInfo(tensorInfo); + tensorInfo.updateVolume(batch); + fillOneBinding(tensorInfo); + } + return true; + } + + void getTensorInfo(TensorInfo& tensorInfo); + +public: + FillBindingClosure(TEngineType const* _engine, nvinfer1::IExecutionContext const* _context, + InputsMap const& _inputs, BindingsVector& _bindings, int32_t _batch, int32_t _endBindingIndex, + int32_t _profileIndex) + : mEngine(_engine) + , mContext(_context) + , inputs(_inputs) + , bindings(_bindings) + , batch(_batch) + , endBindingIndex(_endBindingIndex) + , profileIndex(_profileIndex) + { + } + + bool operator()() + { + return fillAllBindings(batch, endBindingIndex); + } +}; + +template <> +void FillBindingClosure::getTensorInfo(TensorInfo& tensorInfo) +{ + auto const b = tensorInfo.bindingIndex; + auto const name = mEngine->getIOTensorName(b); + tensorInfo.name = name; + tensorInfo.dims = mContext->getTensorShape(name); + tensorInfo.isDynamic = std::any_of( + tensorInfo.dims.d, tensorInfo.dims.d + tensorInfo.dims.nbDims, [](int32_t dim) { return dim == -1; }); + tensorInfo.comps = mEngine->getTensorComponentsPerElement(name, profileIndex); + tensorInfo.strides = mContext->getTensorStrides(name); + tensorInfo.vectorDimIndex = mEngine->getTensorVectorizedDim(name, profileIndex); + tensorInfo.isInput = mEngine->getTensorIOMode(name) == TensorIOMode::kINPUT; + tensorInfo.dataType = mEngine->getTensorDataType(name); +} + +namespace +{ +bool allocateContextMemory(InferenceEnvironment& iEnv, InferenceOptions const& inference) +{ + auto* engine = iEnv.engine.get(); + iEnv.deviceMemory.resize(inference.infStreams); + // Delay context memory allocation until input shapes are specified because runtime allocation would require actual + // input shapes. + for (int32_t i = 0; i < inference.infStreams; ++i) + { + auto const& ec = iEnv.contexts.at(i); + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) + { + sample::gLogInfo << "Created execution context with device memory size: " + << (engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl; + } + else + { +#if (NV_TENSORRT_MAJOR > 8) + size_t sizeToAlloc{0}; + const char* allocReason{nullptr}; + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kPROFILE) + { + auto const p = inference.optProfileIndex; + sizeToAlloc = engine->getDeviceMemorySizeForProfile(p); + allocReason = "current profile"; + } + else if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kRUNTIME) + { + sizeToAlloc = ec->updateDeviceMemorySizeForShapes(); + allocReason = "current input shapes"; + } + else + { + sample::gLogError << "Unrecognizable memory allocation strategy." << std::endl; + return false; + } + iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc); + ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize()); + sample::gLogInfo << "Maximum device memory size across all profiles: " + << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": " + << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; +#else + std::cerr << "setDeviceMemoryV2 worked only in TensorRT 10.x and higher" << std::endl; +#endif + } + } + return true; +} +} // namespace + +bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system) +{ +#if TRT_WINML + int32_t const isIntegrated{}; +#else + int32_t device{}; + cudaCheck(cudaGetDevice(&device)); + + cudaDeviceProp properties; + cudaCheck(cudaGetDeviceProperties(&properties, device)); + int32_t const isIntegrated{properties.integrated}; +#endif + // Use managed memory on integrated devices when transfers are skipped + // and when it is explicitly requested on the commandline. + bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged}; + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); + + using FillStdBindings = FillBindingClosure; + + auto* engine = iEnv.engine.get(); + SMP_RETVAL_IF_FALSE(engine != nullptr, "Got invalid engine!", false, sample::gLogError); + + // Release serialized blob to save memory space. + iEnv.engine.releaseBlob(); + +#if (NV_TENSORRT_MAJOR > 8) + // Setup weight streaming if enabled + if (engine->getStreamableWeightsSize() > 0) + { + auto const& budget = inference.weightStreamingBudget; + int64_t wsBudget = budget.bytes; + if (budget.percent != 100.0) + { + double const percent = budget.percent; + ASSERT(percent < 100.0); + auto const max = engine->getStreamableWeightsSize(); + wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE; + } + + if (wsBudget == WeightStreamingBudget::kDISABLE) + { + wsBudget = engine->getStreamableWeightsSize(); + } + else if (wsBudget == WeightStreamingBudget::kAUTOMATIC) + { + wsBudget = engine->getWeightStreamingAutomaticBudget(); + } + ASSERT(wsBudget >= 0); + bool success = engine->setWeightStreamingBudgetV2(wsBudget); + SMP_RETVAL_IF_FALSE(success, "Failed to set weight streaming limit!", false, sample::gLogError); + switch (wsBudget) + { + case WeightStreamingBudget::kDISABLE: + { + sample::gLogInfo << "Weight streaming has been disabled at runtime." << std::endl; + break; + } + + case WeightStreamingBudget::kAUTOMATIC: + { + sample::gLogInfo << "The weight streaming budget will automatically be chosen by TensorRT." << std::endl; + break; + } + default: + { + sample::gLogInfo << "Weight streaming is enabled with a device memory limit of " << wsBudget << " bytes." + << std::endl; + break; + } + } + } +#endif + + int32_t const nbOptProfiles = engine->getNbOptimizationProfiles(); + + if (inference.optProfileIndex >= nbOptProfiles) + { + sample::gLogError << "Selected profile index " << inference.optProfileIndex + << " exceeds the number of profiles that the engine holds. " << std::endl; + return false; + } + + if (nbOptProfiles > 1 && !inference.setOptProfile) + { + sample::gLogWarning << nbOptProfiles + << " profiles detected but not set. Running with profile 0. Please use " + "--dumpOptimizationProfile to see all available profiles." + << std::endl; + } + + cudaStream_t setOptProfileStream; + CHECK(cudaStreamCreate(&setOptProfileStream)); + + for (int32_t s = 0; s < inference.infStreams; ++s) + { + IExecutionContext* ec{nullptr}; + if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) + { + // Let TRT pre-allocate and manage the memory. + ec = engine->createExecutionContext(); + } +#if (NV_TENSORRT_MAJOR > 8) + else + { + // Allocate based on the current profile or runtime shapes. + ec = engine->createExecutionContext(ExecutionContextAllocationStrategy::kUSER_MANAGED); + } +#endif + if (ec == nullptr) + { + sample::gLogError << "Unable to create execution context for stream " << s << "." << std::endl; + return false; + } + ec->setNvtxVerbosity(inference.nvtxVerbosity); + +#if !TRT_WINML + int32_t const persistentCacheLimit + = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio; + sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl; + ec->setPersistentCacheLimit(persistentCacheLimit); +#endif + + auto setProfile = ec->setOptimizationProfileAsync(inference.optProfileIndex, setOptProfileStream); + CHECK(cudaStreamSynchronize(setOptProfileStream)); + + if (!setProfile) + { + sample::gLogError << "Set optimization profile failed. " << std::endl; + if (inference.infStreams > 1) + { + sample::gLogError + << "Please ensure that the engine is built with preview feature profileSharing0806 enabled. " + << std::endl; + } + return false; + } + + iEnv.contexts.emplace_back(ec); + iEnv.bindings.emplace_back(new Bindings(useManagedMemory)); + } + + CHECK(cudaStreamDestroy(setOptProfileStream)); + + if (iEnv.profiler) + { + iEnv.contexts.front()->setProfiler(iEnv.profiler.get()); + // Always run reportToProfiler() after enqueue launch + iEnv.contexts.front()->setEnqueueEmitsProfile(false); + } + + int32_t const endBindingIndex = engine->getNbIOTensors(); + + // Make sure that the tensor names provided in command-line args actually exist in any of the engine bindings + // to avoid silent typos. + if (!validateTensorNames(inference.shapes, engine, endBindingIndex)) + { + sample::gLogError << "Invalid tensor names found in --shapes flag." << std::endl; + return false; + } + + for (int32_t b = 0; b < endBindingIndex; ++b) + { + auto const& name = engine->getIOTensorName(b); + auto const& mode = engine->getTensorIOMode(name); + if (mode == TensorIOMode::kINPUT) + { + Dims const dims = iEnv.contexts.front()->getTensorShape(name); + bool isShapeInferenceIO{false}; + isShapeInferenceIO = engine->isShapeInferenceIO(name); + bool const hasRuntimeDim = std::any_of(dims.d, dims.d + dims.nbDims, [](int32_t dim) { return dim == -1; }); + auto const shape = findPlausible(inference.shapes, name); + if (hasRuntimeDim || isShapeInferenceIO) + { + // Set shapeData to either dimensions of the input (if it has a dynamic shape) + // or set to values of the input (if it is an input shape tensor). + std::vector shapeData; + + if (shape == inference.shapes.end()) + { + // No information provided. Use default value for missing data. + constexpr int32_t kDEFAULT_VALUE = 1; + if (isShapeInferenceIO) + { + // Set shape tensor to all ones. + shapeData.assign(volume(dims, 0, dims.nbDims), kDEFAULT_VALUE); + sample::gLogWarning << "Values missing for input shape tensor: " << name + << "Automatically setting values to: " << shapeData << std::endl; + } + else + { + // Use default value for unspecified runtime dimensions. + shapeData.resize(dims.nbDims); + std::transform(dims.d, dims.d + dims.nbDims, shapeData.begin(), + [&](int32_t dimension) { return dimension >= 0 ? dimension : kDEFAULT_VALUE; }); + sample::gLogWarning << "Shape missing for input with dynamic shape: " << name + << "Automatically setting shape to: " << shapeData << std::endl; + } + } + else if (inference.inputs.count(shape->first) && isShapeInferenceIO) + { + // Load shape tensor from file. + int64_t const size = volume(dims, 0, dims.nbDims); + shapeData.resize(size); + auto const& filename = inference.inputs.at(shape->first); + auto dst = reinterpret_cast(shapeData.data()); + loadFromFile(filename, dst, size * sizeof(decltype(shapeData)::value_type)); + } + else + { + shapeData = shape->second; + } + + int32_t* shapeTensorData{nullptr}; + if (isShapeInferenceIO) + { + // Save the data in iEnv, in a way that it's address does not change + // before enqueueV3 is called. + iEnv.inputShapeTensorValues.emplace_back(shapeData); + shapeTensorData = iEnv.inputShapeTensorValues.back().data(); + } + + for (auto& c : iEnv.contexts) + { + if (isShapeInferenceIO) + { + sample::gLogInfo << "Set input shape tensor " << name << " to: " << shapeData << std::endl; + if (!c->setTensorAddress(name, shapeTensorData)) + { + return false; + } + } + else + { + sample::gLogInfo << "Set shape of input tensor " << name << " to: " << shapeData + << std::endl; + if (!c->setInputShape(name, toDims(shapeData))) + { + return false; + } + } + } + } + else if (nbOptProfiles && shape != inference.shapes.end()) + { + // Check if the provided shape matches the static dimensions in the engine. + for (auto& c : iEnv.contexts) + { + if (!c->setInputShape(name, toDims(shape->second))) + { + sample::gLogError << "The engine was built with static shapes for input tensor " << name + << " but the provided shapes do not match the static shapes!" << std::endl; + return false; + } + } + } + } + } +#if (NV_TENSORRT_MAJOR > 8) + // Create Debug Listener and turn on debug states if client requested dumping debug tensors. + if (!inference.debugTensorFileNames.empty()) + { + iEnv.listener.reset(new DebugTensorWriter(inference.debugTensorFileNames)); + iEnv.contexts.front()->setDebugListener(iEnv.listener.get()); + for (auto const& s : inference.debugTensorFileNames) + { + iEnv.contexts.front()->setTensorDebugState(s.first.c_str(), true); + } + } +#else + std::cerr << "Can not setDebugListener. Use TensorRT 10.x or higher" << std::endl; +#endif + + if (!allocateContextMemory(iEnv, inference)) + { + return false; + } + + auto const* context = iEnv.contexts.front().get(); + return FillStdBindings( + engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)(); +} + +TaskInferenceEnvironment::TaskInferenceEnvironment( + std::string engineFile, InferenceOptions inference, int32_t deviceId, int32_t DLACore, int32_t bs) + : iOptions(inference) + , device(deviceId) + , batch(bs) +{ + BuildEnvironment bEnv(/* isSafe */ false, /* versionCompatible */ false, DLACore, "", getTempfileControlDefaults()); + loadEngineToBuildEnv(engineFile, bEnv, sample::gLogError); + std::unique_ptr tmp(new InferenceEnvironment(bEnv)); + iEnv = std::move(tmp); + + cudaCheck(cudaSetDevice(device)); + SystemOptions system{}; + system.device = device; + system.DLACore = DLACore; + if (!setUpInference(*iEnv, iOptions, system)) + { + sample::gLogError << "Inference set up failed" << std::endl; + } +} +namespace +{ + +#if defined(__QNX__) +using TimePoint = double; +#else +using TimePoint = std::chrono::time_point; +#endif + +TimePoint getCurrentTime() +{ +#if defined(__QNX__) + uint64_t const currentCycles = ClockCycles(); + uint64_t const cyclesPerSecond = SYSPAGE_ENTRY(qtime)->cycles_per_sec; + // Return current timestamp in ms. + return static_cast(currentCycles) * 1000. / cyclesPerSecond; +#else + return std::chrono::high_resolution_clock::now(); +#endif +} + +//! +//! \struct SyncStruct +//! \brief Threads synchronization structure +//! +struct SyncStruct +{ + std::mutex mutex; + TrtCudaStream mainStream; + TrtCudaEvent gpuStart{cudaEventBlockingSync}; + TimePoint cpuStart{}; + float sleep{}; +}; + +struct Enqueue +{ + explicit Enqueue(nvinfer1::IExecutionContext& context) + : mContext(context) + { + } + + nvinfer1::IExecutionContext& mContext; +}; + +//! +//! \class EnqueueExplicit +//! \brief Functor to enqueue inference with explict batch +//! +class EnqueueExplicit : private Enqueue +{ + +public: + explicit EnqueueExplicit(nvinfer1::IExecutionContext& context, Bindings const& bindings) + : Enqueue(context) + , mBindings(bindings) + { + ASSERT(mBindings.setTensorAddresses(mContext)); + } + + bool operator()(TrtCudaStream& stream) const + { + try + { + bool const result = mContext.enqueueV3(stream.get()); + // Collecting layer timing info from current profile index of execution context, except under capturing + // mode. + if (!isStreamCapturing(stream) && mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() + && !mContext.reportToProfiler()) + { + gLogWarning << "Failed to collect layer timing info from previous enqueueV3()" << std::endl; + } + return result; + } + catch (const std::exception&) + { + return false; + } + return false; + } + +private: + // Helper function to check if a stream is in capturing mode. + bool isStreamCapturing(TrtCudaStream& stream) const + { + cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone}; + cudaCheck(cudaStreamIsCapturing(stream.get(), &status)); + return status != cudaStreamCaptureStatusNone; + } + + Bindings const& mBindings; +}; + +//! +//! \class EnqueueGraph +//! \brief Functor to enqueue inference from CUDA Graph +//! +class EnqueueGraph +{ + +public: + explicit EnqueueGraph(nvinfer1::IExecutionContext& context, TrtCudaGraph& graph) + : mGraph(graph) + , mContext(context) + { + } + + bool operator()(TrtCudaStream& stream) const + { + if (mGraph.launch(stream)) + { + // Collecting layer timing info from current profile index of execution context + if (mContext.getProfiler() && !mContext.getEnqueueEmitsProfile() && !mContext.reportToProfiler()) + { + gLogWarning << "Failed to collect layer timing info from previous CUDA graph launch" << std::endl; + } + return true; + } + return false; + } + + TrtCudaGraph& mGraph; + nvinfer1::IExecutionContext& mContext; +}; + +//! +//! \class EnqueueGraphSafe +//! \brief Functor to enqueue inference from CUDA Graph +//! +class EnqueueGraphSafe +{ + +public: + explicit EnqueueGraphSafe(TrtCudaGraph& graph) + : mGraph(graph) + { + } + + bool operator()(TrtCudaStream& stream) const + { + return mGraph.launch(stream); + } + + TrtCudaGraph& mGraph; +}; + +using EnqueueFunction = std::function; + +enum class StreamType : int32_t +{ + kINPUT = 0, + kCOMPUTE = 1, + kOUTPUT = 2, + kNUM = 3 +}; + +enum class EventType : int32_t +{ + kINPUT_S = 0, + kINPUT_E = 1, + kCOMPUTE_S = 2, + kCOMPUTE_E = 3, + kOUTPUT_S = 4, + kOUTPUT_E = 5, + kNUM = 6 +}; + +using MultiStream = std::array(StreamType::kNUM)>; + +using MultiEvent = std::array, static_cast(EventType::kNUM)>; + +using EnqueueTimes = std::array; + +//! +//! \class Iteration +//! \brief Inference iteration and streams management +//! +class Iteration +{ + +public: + Iteration(int32_t id, InferenceOptions const& inference, nvinfer1::IExecutionContext& context, Bindings& bindings) + : mBindings(bindings) + , mStreamId(id) + , mDepth(1 + inference.overlap) + , mActive(mDepth) + , mEvents(mDepth) + , mEnqueueTimes(mDepth) + , mContext(&context) + { + for (int32_t d = 0; d < mDepth; ++d) + { + for (int32_t e = 0; e < static_cast(EventType::kNUM); ++e) + { + mEvents[d][e].reset(new TrtCudaEvent(!inference.spin)); + } + } + createEnqueueFunction(inference, context, bindings); + } + + bool query(bool skipTransfers) + { + if (mActive[mNext]) + { + return true; + } + + if (!skipTransfers) + { + record(EventType::kINPUT_S, StreamType::kINPUT); + setInputData(false); + record(EventType::kINPUT_E, StreamType::kINPUT); + wait(EventType::kINPUT_E, StreamType::kCOMPUTE); // Wait for input DMA before compute + } + + record(EventType::kCOMPUTE_S, StreamType::kCOMPUTE); + recordEnqueueTime(); + if (!mEnqueue(getStream(StreamType::kCOMPUTE))) + { + return false; + } + recordEnqueueTime(); + record(EventType::kCOMPUTE_E, StreamType::kCOMPUTE); + + if (!skipTransfers) + { + wait(EventType::kCOMPUTE_E, StreamType::kOUTPUT); // Wait for compute before output DMA + record(EventType::kOUTPUT_S, StreamType::kOUTPUT); + fetchOutputData(false); + record(EventType::kOUTPUT_E, StreamType::kOUTPUT); + } + + mActive[mNext] = true; + moveNext(); + return true; + } + + float sync( + TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector& trace, bool skipTransfers) + { + if (mActive[mNext]) + { + if (skipTransfers) + { + getEvent(EventType::kCOMPUTE_E).synchronize(); + } + else + { + getEvent(EventType::kOUTPUT_E).synchronize(); + } + trace.emplace_back(getTrace(cpuStart, gpuStart, skipTransfers)); + mActive[mNext] = false; + return getEvent(EventType::kCOMPUTE_S) - gpuStart; + } + return 0; + } + + void syncAll( + TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, std::vector& trace, bool skipTransfers) + { + for (int32_t d = 0; d < mDepth; ++d) + { + sync(cpuStart, gpuStart, trace, skipTransfers); + moveNext(); + } + } + + void wait(TrtCudaEvent& gpuStart) + { + getStream(StreamType::kINPUT).wait(gpuStart); + } + + void setInputData(bool sync) + { + mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kINPUT).synchronize(); + } + } + + void fetchOutputData(bool sync) + { + mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kOUTPUT).synchronize(); + } + } + +private: + void moveNext() + { + mNext = mDepth - 1 - mNext; + } + + TrtCudaStream& getStream(StreamType t) + { + return mStream[static_cast(t)]; + } + + TrtCudaEvent& getEvent(EventType t) + { + return *mEvents[mNext][static_cast(t)]; + } + + void record(EventType e, StreamType s) + { + getEvent(e).record(getStream(s)); + } + + void recordEnqueueTime() + { + mEnqueueTimes[mNext][enqueueStart] = getCurrentTime(); + enqueueStart = 1 - enqueueStart; + } + + TimePoint getEnqueueTime(bool start) + { + return mEnqueueTimes[mNext][start ? 0 : 1]; + } + + void wait(EventType e, StreamType s) + { + getStream(s).wait(getEvent(e)); + } + + InferenceTrace getTrace(TimePoint const& cpuStart, TrtCudaEvent const& gpuStart, bool skipTransfers) + { + float is + = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_S) - gpuStart; + float ie + = skipTransfers ? getEvent(EventType::kCOMPUTE_S) - gpuStart : getEvent(EventType::kINPUT_E) - gpuStart; + float os + = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_S) - gpuStart; + float oe + = skipTransfers ? getEvent(EventType::kCOMPUTE_E) - gpuStart : getEvent(EventType::kOUTPUT_E) - gpuStart; + + return InferenceTrace(mStreamId, + std::chrono::duration(getEnqueueTime(true) - cpuStart).count(), + std::chrono::duration(getEnqueueTime(false) - cpuStart).count(), is, ie, + getEvent(EventType::kCOMPUTE_S) - gpuStart, getEvent(EventType::kCOMPUTE_E) - gpuStart, os, oe); + } + + void createEnqueueFunction( + InferenceOptions const& inference, nvinfer1::IExecutionContext& context, Bindings& bindings) + { + mEnqueue = EnqueueFunction(EnqueueExplicit(context, mBindings)); + if (inference.graph) + { + sample::gLogInfo << "Capturing CUDA graph for the current execution context" << std::endl; + + TrtCudaStream& stream = getStream(StreamType::kCOMPUTE); + // Avoid capturing initialization calls by executing the enqueue function at least + // once before starting CUDA graph capture. + auto const ret = mEnqueue(stream); + if (!ret) + { + throw std::runtime_error("Inference enqueue failed."); + } + stream.synchronize(); + + mGraph.beginCapture(stream); + // The built TRT engine may contain operations that are not permitted under CUDA graph capture mode. + // When the stream is capturing, the enqueue call may return false if the current CUDA graph capture fails. + if (mEnqueue(stream)) + { + mGraph.endCapture(stream); + mEnqueue = EnqueueFunction(EnqueueGraph(context, mGraph)); + sample::gLogInfo << "Successfully captured CUDA graph for the current execution context" << std::endl; + } + else + { + mGraph.endCaptureOnError(stream); + // Ensure any CUDA error has been cleaned up. + cudaCheck(cudaGetLastError()); + sample::gLogWarning << "The built TensorRT engine contains operations that are not permitted under " + "CUDA graph capture mode." + << std::endl; + sample::gLogWarning << "The specified --useCudaGraph flag has been ignored. The inference will be " + "launched without using CUDA graph launch." + << std::endl; + } + } + } + + Bindings& mBindings; + + TrtCudaGraph mGraph; + EnqueueFunction mEnqueue; + + int32_t mStreamId{0}; + int32_t mNext{0}; + int32_t mDepth{2}; // default to double buffer to hide DMA transfers + + std::vector mActive; + MultiStream mStream; + std::vector mEvents; + + int32_t enqueueStart{0}; + std::vector mEnqueueTimes; + nvinfer1::IExecutionContext* mContext{nullptr}; +}; + +bool inferenceLoop(std::vector>& iStreams, TimePoint const& cpuStart, + TrtCudaEvent const& gpuStart, int iterations, float maxDurationMs, float warmupMs, + std::vector& trace, bool skipTransfers, float idleMs) +{ + float durationMs = 0; + int32_t skip = 0; + + if (maxDurationMs == -1.F) + { + sample::gLogWarning << "--duration=-1 is specified, inference will run in an endless loop until" + << " aborted with CTRL-C (SIGINT)" << std::endl; + while (true) + { + for (auto& s : iStreams) + { + if (!s->query(skipTransfers)) + { + return false; + } + } + for (auto& s : iStreams) + { + s->sync(cpuStart, gpuStart, trace, skipTransfers); + } + } + } + + for (int32_t i = 0; i < iterations + skip || durationMs < maxDurationMs; ++i) + { + for (auto& s : iStreams) + { + if (!s->query(skipTransfers)) + { + return false; + } + } + for (auto& s : iStreams) + { + durationMs = std::max(durationMs, s->sync(cpuStart, gpuStart, trace, skipTransfers)); + } + if (durationMs < warmupMs) // Warming up + { + if (durationMs) // Skip complete iterations + { + ++skip; + } + continue; + } + if (idleMs != 0.F) + { + std::this_thread::sleep_for(std::chrono::duration(idleMs)); + } + } + for (auto& s : iStreams) + { + s->syncAll(cpuStart, gpuStart, trace, skipTransfers); + } + return true; +} + +void inferenceExecution(InferenceOptions const& inference, InferenceEnvironment& iEnv, SyncStruct& sync, + int32_t const threadIdx, int32_t const streamsPerThread, int32_t device, + std::vector& trace) noexcept +{ + try + { + float warmupMs = inference.warmup; + float durationMs = -1.F; + if (inference.duration != -1.F) + { + durationMs = inference.duration * 1000.F + warmupMs; + } + + cudaCheck(cudaSetDevice(device)); + + std::vector> iStreams; + + for (int32_t s = 0; s < streamsPerThread; ++s) + { + int32_t const streamId{threadIdx * streamsPerThread + s}; + auto* iteration = new Iteration(streamId, inference, *iEnv.getContext(streamId), *iEnv.bindings[streamId]); + if (inference.skipTransfers) + { + iteration->setInputData(true); + } + iStreams.emplace_back(iteration); + } + + for (auto& s : iStreams) + { + s->wait(sync.gpuStart); + } + + std::vector localTrace; + if (!inferenceLoop(iStreams, sync.cpuStart, sync.gpuStart, inference.iterations, durationMs, warmupMs, + localTrace, inference.skipTransfers, inference.idle)) + { + sync.mutex.lock(); + iEnv.error = true; + sync.mutex.unlock(); + } + + if (inference.skipTransfers) + { + for (auto& s : iStreams) + { + s->fetchOutputData(true); + } + } + + sync.mutex.lock(); + trace.insert(trace.end(), localTrace.begin(), localTrace.end()); + sync.mutex.unlock(); + } + catch (...) + { + sync.mutex.lock(); + iEnv.error = true; + sync.mutex.unlock(); + } +} + +inline std::thread makeThread(InferenceOptions const& inference, InferenceEnvironment& iEnv, SyncStruct& sync, + int32_t threadIdx, int32_t streamsPerThread, int32_t device, std::vector& trace) +{ + return std::thread(inferenceExecution, std::cref(inference), std::ref(iEnv), std::ref(sync), threadIdx, + streamsPerThread, device, std::ref(trace)); +} + +} // namespace + +bool runInference( + InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace) +{ + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); + cudaCheck(cudaProfilerStart()); + + trace.resize(0); + + SyncStruct sync; + sync.sleep = inference.sleep; + sync.mainStream.sleep(&sync.sleep); + sync.cpuStart = getCurrentTime(); + sync.gpuStart.record(sync.mainStream); + + // When multiple streams are used, trtexec can run inference in two modes: + // (1) if inference.threads is true, then run each stream on each thread. + // (2) if inference.threads is false, then run all streams on the same thread. + int32_t const numThreads = inference.threads ? inference.infStreams : 1; + int32_t const streamsPerThread = inference.threads ? 1 : inference.infStreams; + + std::vector threads; + for (int32_t threadIdx = 0; threadIdx < numThreads; ++threadIdx) + { + threads.emplace_back(makeThread(inference, iEnv, sync, threadIdx, streamsPerThread, device, trace)); + } + for (auto& th : threads) + { + th.join(); + } + + cudaCheck(cudaProfilerStop()); + + auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; + std::sort(trace.begin(), trace.end(), cmpTrace); + + return !iEnv.error; +} + +bool runMultiTasksInference(std::vector>& tEnvList) +{ + cudaCheck(cudaProfilerStart()); + cudaSetDeviceFlags(cudaDeviceScheduleSpin); + + SyncStruct sync; + sync.sleep = 0; + sync.mainStream.sleep(&sync.sleep); + sync.cpuStart = getCurrentTime(); + sync.gpuStart.record(sync.mainStream); + + std::vector threads; + for (size_t i = 0; i < tEnvList.size(); ++i) + { + auto& tEnv = tEnvList[i]; + threads.emplace_back(makeThread( + tEnv->iOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, tEnv->device, tEnv->trace)); + } + for (auto& th : threads) + { + th.join(); + } + + cudaCheck(cudaProfilerStop()); + + auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; + for (auto& tEnv : tEnvList) + { + std::sort(tEnv->trace.begin(), tEnv->trace.end(), cmpTrace); + } + + return std::none_of(tEnvList.begin(), tEnvList.end(), + [](std::unique_ptr& tEnv) { return tEnv->iEnv->error; }); +} + +namespace +{ +size_t reportGpuMemory() +{ + static size_t prevFree{0}; + size_t free{0}; + size_t total{0}; + size_t newlyAllocated{0}; + cudaCheck(cudaMemGetInfo(&free, &total)); + sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; + if (prevFree != 0) + { + newlyAllocated = (prevFree - free); + sample::gLogInfo << ", newly allocated GPU memory = " << newlyAllocated / 1024.0_MiB << " GiB"; + } + sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" << std::endl; + prevFree = free; + return newlyAllocated; +} +} // namespace + +//! Returns true if deserialization is slower than expected or fails. +bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys) +{ + constexpr int32_t kNB_ITERS{20}; + std::unique_ptr rt{createRuntime()}; + std::unique_ptr engine; + + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); + + auto timeDeserializeFn = [&]() -> float { + bool deserializeOK{false}; + engine.reset(nullptr); + auto startClock = std::chrono::high_resolution_clock::now(); + SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); +#if (NV_TENSORRT_MAJOR > 8) + auto& reader = iEnv.engine.getFileReader(); + reader.reset(); + ASSERT(reader.isOpen()); +#endif +#if !TRT_WINML + for (auto const& pluginPath : sys.dynamicPlugins) + { + rt->getPluginRegistry().loadLibrary(pluginPath.c_str()); + } +#endif +#if (NV_TENSORRT_MAJOR > 8) + engine.reset(rt->deserializeCudaEngine(reader)); +#else + std::cerr << "FileReader is not implemented! Use TensorRT 10.x and higher" << std::endl; +#endif + deserializeOK = (engine != nullptr); + auto endClock = std::chrono::high_resolution_clock::now(); + // return NAN if deserialization failed. + return deserializeOK ? std::chrono::duration(endClock - startClock).count() : NAN; + }; + + // Warmup the caches to make sure that cache thrashing isn't throwing off the results + { + sample::gLogInfo << "Begin deserialization warmup..." << std::endl; + for (int32_t i = 0, e = 2; i < e; ++i) + { + timeDeserializeFn(); + } + } + sample::gLogInfo << "Begin deserialization engine timing..." << std::endl; + float const first = timeDeserializeFn(); + + // Check if first deserialization succeeded. + if (std::isnan(first)) + { + sample::gLogError << "Engine deserialization failed." << std::endl; + return true; + } + + sample::gLogInfo << "First deserialization time = " << first << " milliseconds" << std::endl; + + // Record initial gpu memory state. + reportGpuMemory(); + + float totalTime{0.F}; + for (int32_t i = 0; i < kNB_ITERS; ++i) + { + totalTime += timeDeserializeFn(); + } + auto const averageTime = totalTime / kNB_ITERS; + // reportGpuMemory sometimes reports zero after a single deserialization of a small engine, + // so use the size of memory for all the iterations. + auto const totalEngineSizeGpu = reportGpuMemory(); + sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS + << " iterations, average time = " << averageTime << " milliseconds, first time = " << first + << " milliseconds." << std::endl; + sample::gLogInfo << "Deserialization Bandwidth = " << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" << std::endl; + + // If the first deserialization is more than tolerance slower than + // the average deserialization, return true, which means an error occurred. + // The tolerance is set to 2x since the deserialization time is quick and susceptible + // to caching issues causing problems in the first timing. + auto const tolerance = 2.0F; + bool const isSlowerThanExpected = first > averageTime * tolerance; + if (isSlowerThanExpected) + { + sample::gLogInfo << "First deserialization time divided by average time is " << (first / averageTime) + << ". Exceeds tolerance of " << tolerance << "x." << std::endl; + } + return isSlowerThanExpected; +} + +std::string getLayerInformation( + nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format) +{ + auto runtime = std::unique_ptr{createRuntime()}; + auto inspector = std::unique_ptr(engine->createEngineInspector()); + if (context != nullptr) + { + inspector->setExecutionContext(context); + } + std::string result = inspector->getEngineInformation(format); + return result; +} + +void Binding::fill(std::string const& fileName) +{ + loadFromFile(fileName, static_cast(buffer->getHostBuffer()), buffer->getSize()); +} + +void Binding::fill() +{ + switch (dataType) + { + case nvinfer1::DataType::kBOOL: + { + fillBuffer(buffer->getHostBuffer(), volume, 0, 1); + break; + } + case nvinfer1::DataType::kINT32: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } +#endif + case nvinfer1::DataType::kINT8: + { + fillBuffer(buffer->getHostBuffer(), volume, -128, 127); + break; + } + case nvinfer1::DataType::kFLOAT: + { + fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } + case nvinfer1::DataType::kHALF: + { + fillBuffer<__half>(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: + { + fillBuffer(buffer->getHostBuffer(), volume, -1.0F, 1.0F); + break; + } +#endif + case nvinfer1::DataType::kUINT8: + { + fillBuffer(buffer->getHostBuffer(), volume, 0, 255); + break; + } + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + fillBuffer<__nv_fp8_e4m3>(buffer->getHostBuffer(), volume, __nv_fp8_e4m3(- 1.0f), __nv_fp8_e4m3(1.0f)); +#endif + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); +#endif + } +} + +void Binding::dump(std::ostream& os, Dims dims, Dims strides, int32_t vectorDim, int32_t spv, + std::string const separator /*= " "*/) const +{ + void* outputBuffer{}; + if (outputAllocator != nullptr) + { + outputBuffer = outputAllocator->getBuffer()->getHostBuffer(); + // Overwrite dimensions with those reported by the output allocator. + dims = outputAllocator->getFinalDims(); + os << "Final shape is " << dims << " reported by the output allocator." << std::endl; + } + else + { + outputBuffer = buffer->getHostBuffer(); + } + switch (dataType) + { + case nvinfer1::DataType::kBOOL: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT32: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kINT8: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kFLOAT: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } + case nvinfer1::DataType::kHALF: + { + dumpBuffer<__half>(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } +#endif + case nvinfer1::DataType::kUINT8: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: + { + dumpBuffer(outputBuffer, separator, os, dims, strides, vectorDim, spv); + break; + } +#endif + case nvinfer1::DataType::kFP8: + { +#if 0 + ASSERT(false && "FP8 is not supported"); +#else + dumpBuffer<__nv_fp8_e4m3>(outputBuffer, separator, os, dims, strides, vectorDim, spv); +#endif + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT4: ASSERT(false && "INT4 is not supported"); +#endif + } +} + +void Bindings::addBinding(TensorInfo const& tensorInfo, std::string const& fileName /*= ""*/) +{ + auto const b = tensorInfo.bindingIndex; + while (mBindings.size() <= static_cast(b)) + { + mBindings.emplace_back(); + mDevicePointers.emplace_back(); + } + mNames[tensorInfo.name] = b; + mBindings[b].isInput = tensorInfo.isInput; + mBindings[b].volume = tensorInfo.vol; + mBindings[b].dataType = tensorInfo.dataType; + if (tensorInfo.isDynamic) + { + ASSERT(!tensorInfo.isInput); // Only output shape can be possibly unknown because of DDS. + if (mBindings[b].outputAllocator == nullptr) + { + if (mUseManaged) + { + mBindings[b].outputAllocator.reset(new OutputAllocator(new UnifiedMirroredBuffer)); + } + else + { + mBindings[b].outputAllocator.reset(new OutputAllocator(new DiscreteMirroredBuffer)); + } + } + } + else + { + if (mBindings[b].buffer == nullptr) + { + if (mUseManaged) + { + mBindings[b].buffer.reset(new UnifiedMirroredBuffer); + } + else + { + mBindings[b].buffer.reset(new DiscreteMirroredBuffer); + } + } + // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr + // even for empty tensors, so allocate a dummy byte. + if (tensorInfo.vol == 0) + { + mBindings[b].buffer->allocate(1); + } + else + { + mBindings[b].buffer->allocate( + static_cast(tensorInfo.vol) * static_cast(dataTypeSize(tensorInfo.dataType))); + } + mDevicePointers[b] = mBindings[b].buffer->getDeviceBuffer(); + } + if (tensorInfo.isInput) + { + if (fileName.empty()) + { + fill(b); + } + else + { + fill(b, fileName); + } + } +} + +void** Bindings::getDeviceBuffers() +{ + return mDevicePointers.data(); +} + +void Bindings::transferInputToDevice(TrtCudaStream& stream) +{ + for (auto& b : mNames) + { + if (mBindings[b.second].isInput) + { + mBindings[b.second].buffer->hostToDevice(stream); + } + } +} + +void Bindings::transferOutputToHost(TrtCudaStream& stream) +{ + for (auto& b : mNames) + { + if (!mBindings[b.second].isInput) + { + if (mBindings[b.second].outputAllocator != nullptr) + { + mBindings[b.second].outputAllocator->getBuffer()->deviceToHost(stream); + } + else + { + mBindings[b.second].buffer->deviceToHost(stream); + } + } + } +} + +void Bindings::dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, + std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const +{ + auto const tensorName = context.getEngine().getIOTensorName(binding); + Dims dims = context.getTensorShape(tensorName); + Dims strides = context.getTensorStrides(tensorName); + int32_t vectorDim = context.getEngine().getTensorVectorizedDim(tensorName); + int32_t const spv = context.getEngine().getTensorComponentsPerElement(tensorName); + + mBindings[binding].dump(os, dims, strides, vectorDim, spv, separator); +} + +namespace +{ + +std::string genFilenameSafeString(std::string const& s) +{ + std::string res = s; + static std::string const allowedSpecialChars{"._-,"}; + for (auto& c : res) + { + if (!isalnum(c) && allowedSpecialChars.find(c) == std::string::npos) + { + c = '_'; + } + } + return res; +} + +Dims getBindingDimensions(nvinfer1::IExecutionContext const& context, std::string const& name) +{ + return context.getTensorShape(name.c_str()); +} +} // namespace + +void Bindings::dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const +{ + os << "Dumping I/O Bindings to RAW Files:" << std::endl; + for (auto const& n : mNames) + { + auto name = n.first; + auto bIndex = n.second; + auto const& binding = mBindings[bIndex]; + void* outputBuffer{}; + if (binding.outputAllocator != nullptr) + { + outputBuffer = binding.outputAllocator->getBuffer()->getHostBuffer(); + } + else + { + outputBuffer = binding.buffer->getHostBuffer(); + } + + Dims dims = getBindingDimensions(context, name); + std::string dimsStr; + std::string dotStr; + + for (int32_t i = 0; i < dims.nbDims; i++) + { + dimsStr += dotStr + std::to_string(dims.d[i]); + dotStr = "."; + } + + std::string const bindingTypeStr = (binding.isInput ? "input" : "output"); + + std::stringstream fileName; + fileName << genFilenameSafeString(name) << "." << bindingTypeStr << "." << dimsStr << "." << binding.dataType + << ".raw"; + + os << "Writing file for " << bindingTypeStr << " binding " << name << " (with datatype " << binding.dataType + << " and dimensions " << dimsStr << ") to " << fileName.str() << std::endl; + + std::ofstream f(fileName.str(), std::ios::out | std::ios::binary); + ASSERT(f && "Cannot open file for write"); + f.write(static_cast(outputBuffer), binding.volume * samplesCommon::elementSize(binding.dataType)); + f.close(); + } +} + +void Bindings::dumpBindingDimensions( + std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const +{ + auto const dims = context.getTensorShape(name.c_str()); + // Do not add a newline terminator, because the caller may be outputting a JSON string. + os << dims; +} + +std::unordered_map Bindings::getBindings(std::function predicate) const +{ + std::unordered_map bindings; + for (auto const& n : mNames) + { + auto const binding = n.second; + if (predicate(mBindings[binding])) + { + bindings.insert(n); + } + } + return bindings; +} + +bool Bindings::setTensorAddresses(nvinfer1::IExecutionContext& context) const +{ + for (auto const& b : mNames) + { + auto const name = b.first.c_str(); + auto const location = context.getEngine().getTensorLocation(name); + if (location == TensorLocation::kDEVICE) + { + if (mBindings[b.second].outputAllocator != nullptr) + { + if (!context.setOutputAllocator(name, mBindings[b.second].outputAllocator.get())) + { + return false; + } + } + else + { + if (!context.setTensorAddress(name, mDevicePointers[b.second])) + { + return false; + } + } + } + } + return true; +} +#if (NV_TENSORRT_MAJOR > 8) +bool DebugTensorWriter::processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type, + nvinfer1::Dims const& shape, char const* name, cudaStream_t stream) +{ + CHECK(cudaStreamSynchronize(stream)); + // Store data from callback. + int64_t size = std::accumulate(shape.d, shape.d + shape.nbDims, 1LL, std::multiplies{}) + * samplesCommon::elementSize(type); + std::vector hostDataOut(size, 0); + CHECK(cudaMemcpy(hostDataOut.data(), addr, size, cudaMemcpyDeviceToHost)); + + auto it = mDebugTensorFileNames.find(name); + ASSERT(it != mDebugTensorFileNames.end()); + std::string fileName = it->second; + + std::ofstream f(fileName, std::ios::out | std::ios::binary); + ASSERT(f && "Cannot open file for write"); + sample::gLogInfo << "Writing to file " << fileName << " for debug tensor " << name << std::endl; + f.write(hostDataOut.data(), size); + f.close(); + + CHECK(cudaStreamSynchronize(stream)); + return true; +} +#endif +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleInference.h b/src/Detector/tensorrt_onnx/common/sampleInference.h new file mode 100644 index 000000000..b1d24df67 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleInference.h @@ -0,0 +1,263 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_INFERENCE_H +#define TRT_SAMPLE_INFERENCE_H + +#include "sampleDevice.h" +#include "sampleEngines.h" +#include "sampleReporting.h" +#include "sampleUtils.h" + +#include +#include +#include +#include +#include +#include + +namespace sample +{ +#if (NV_TENSORRT_MAJOR > 8) +// IDebugListener class for writing debug tensors to output file. +class DebugTensorWriter : public nvinfer1::IDebugListener +{ +public: + DebugTensorWriter(std::unordered_map fileNames) + : mDebugTensorFileNames(fileNames) + { + } + + bool processDebugTensor(void const* addr, nvinfer1::TensorLocation location, nvinfer1::DataType type, + nvinfer1::Dims const& shape, char const* name, cudaStream_t stream) override; + +private: + std::unordered_map mDebugTensorFileNames; +}; +#endif + +struct InferenceEnvironment +{ + InferenceEnvironment() = delete; + InferenceEnvironment(InferenceEnvironment const& other) = delete; + InferenceEnvironment(InferenceEnvironment&& other) = delete; + InferenceEnvironment(BuildEnvironment& bEnv) : engine(std::move(bEnv.engine)), safe(bEnv.engine.isSafe()) + { + } + + LazilyDeserializedEngine engine; + std::unique_ptr profiler; + std::vector> contexts; + std::vector + deviceMemory; //< Device memory used for inference when the allocation strategy is not static. + std::vector> bindings; +#if (NV_TENSORRT_MAJOR > 8) + std::unique_ptr listener; +#endif + bool error{false}; + + bool safe{false}; + + inline nvinfer1::IExecutionContext* getContext(int32_t streamIdx); + + //! Storage for input shape tensors. + //! + //! It's important that the addresses of the data do not change between the calls to + //! setTensorAddress/setInputShape (which tells TensorRT where the input shape tensor is) + //! and enqueueV3 (when TensorRT might use the input shape tensor). + //! + //! The input shape tensors could alternatively be handled via member bindings, + //! but it simplifies control-flow to store the data here since it's shared across + //! the bindings. + std::list> inputShapeTensorValues; +}; + +inline nvinfer1::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx) +{ + return contexts[streamIdx].get(); +} + +//! +//! \brief Set up contexts and bindings for inference +//! +bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system); + +//! +//! \brief Deserialize the engine and time how long it takes. +//! +bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys); + +//! +//! \brief Run inference and collect timing, return false if any error hit during inference +//! +bool runInference( + InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector& trace); + +//! +//! \brief Get layer information of the engine. +//! +std::string getLayerInformation( + nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format); + +struct Binding +{ + bool isInput{false}; + std::unique_ptr buffer; + std::unique_ptr outputAllocator; + int64_t volume{0}; + nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT}; + + void fill(std::string const& fileName); + + void fill(); + + void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv, + std::string const separator = " ") const; +}; + +struct TensorInfo +{ + int32_t bindingIndex{-1}; + char const* name{nullptr}; + nvinfer1::Dims dims{}; + bool isDynamic{}; + int32_t comps{-1}; + nvinfer1::Dims strides{}; + int32_t vectorDimIndex{-1}; + bool isInput{}; + nvinfer1::DataType dataType{}; + int64_t vol{-1}; + + void updateVolume(int32_t batch) + { + vol = volume(dims, strides, vectorDimIndex, comps, batch); + } +}; + +class Bindings +{ +public: + Bindings() = delete; + explicit Bindings(bool useManaged) + : mUseManaged(useManaged) + { + } + + void addBinding(TensorInfo const& tensorInfo, std::string const& fileName = ""); + + void** getDeviceBuffers(); + + void transferInputToDevice(TrtCudaStream& stream); + + void transferOutputToHost(TrtCudaStream& stream); + + void fill(int binding, std::string const& fileName) + { + mBindings[binding].fill(fileName); + } + + void fill(int binding) + { + mBindings[binding].fill(); + } + + void dumpBindingDimensions( + std::string const& name, nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, + std::string const& separator = " ", int32_t batch = 1) const; + + void dumpRawBindingToFiles(nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpInputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const + { + auto isInput = [](Binding const& b) { return b.isInput; }; + dumpBindings(context, isInput, os); + } + + void dumpOutputs(nvinfer1::IExecutionContext const& context, std::ostream& os) const; + + void dumpBindings(nvinfer1::IExecutionContext const& context, std::ostream& os) const + { + auto all = [](Binding const& b) { return true; }; + dumpBindings(context, all, os); + } + + void dumpBindings(nvinfer1::IExecutionContext const& context, std::function predicate, + std::ostream& os) const + { + for (auto const& n : mNames) + { + auto const name = n.first; + auto const binding = n.second; + if (predicate(mBindings[binding])) + { + os << n.first << ": ("; + dumpBindingDimensions(name, context, os); + os << ")" << std::endl; + + dumpBindingValues(context, binding, os); + os << std::endl; + } + } + } + + std::unordered_map getInputBindings() const + { + auto isInput = [](Binding const& b) { return b.isInput; }; + return getBindings(isInput); + } + + std::unordered_map getOutputBindings() const + { + auto isOutput = [](Binding const& b) { return !b.isInput; }; + return getBindings(isOutput); + } + + std::unordered_map getBindings() const + { + auto all = [](Binding const& b) { return true; }; + return getBindings(all); + } + + std::unordered_map getBindings(std::function predicate) const; + + bool setTensorAddresses(nvinfer1::IExecutionContext& context) const; + +private: + std::unordered_map mNames; + std::vector mBindings; + std::vector mDevicePointers; + bool mUseManaged{false}; +}; + +struct TaskInferenceEnvironment +{ + TaskInferenceEnvironment(std::string engineFile, InferenceOptions inference, int32_t deviceId = 0, + int32_t DLACore = -1, int32_t bs = batchNotProvided); + InferenceOptions iOptions{}; + int32_t device{defaultDevice}; + int32_t batch{batchNotProvided}; + std::unique_ptr iEnv; + std::vector trace; +}; + +bool runMultiTasksInference(std::vector>& tEnvList); + +} // namespace sample + +#endif // TRT_SAMPLE_INFERENCE_H diff --git a/src/Detector/tensorrt_onnx/common/sampleOptions.cpp b/src/Detector/tensorrt_onnx/common/sampleOptions.cpp new file mode 100644 index 000000000..2baa4e2a1 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleOptions.cpp @@ -0,0 +1,2827 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvInfer.h" + +#include "logger.h" +#include "sampleOptions.h" +#include "sampleUtils.h" +using namespace nvinfer1; +namespace sample +{ + +namespace +{ + +static const std::map> kUNIT_MULTIPLIERS{ + {'B', {1, "Bytes"}}, + {'K', {1 << 10, "Kibibytes"}}, + {'M', {1 << 20, "Mebibytes"}}, + {'G', {1 << 30, "Gibibytes"}}, +}; + +std::string addDefaultUnitSuffixIfNotSpecified(std::string const& option, char defaultUnit) +{ + char lastChar = option.at(option.size() - 1); + return std::isdigit(lastChar) ? option + defaultUnit : option; +} + +// Returns "B (Bytes), K (Kilobytes), ..." +std::string getAvailableUnitSuffixes() +{ + std::ostringstream ss; + for (auto it = kUNIT_MULTIPLIERS.begin(); it != kUNIT_MULTIPLIERS.end(); ++it) + { + if (it != kUNIT_MULTIPLIERS.begin()) + { + ss << ", "; + } + ss << it->first << " (" << it->second.second << ")"; + } + return ss.str(); +} + +// Numeric trtexec arguments can have unit specifiers in similar to polygraphy. +// E.g. --weightStreamingBudget=20M would be 20 Mebibytes (base 2). +int64_t getUnitMultiplier(std::string const& option) +{ + char lastChar = option.at(option.size() - 1); + if (!std::isdigit(lastChar)) + { + char unit = std::toupper(lastChar); + auto found = kUNIT_MULTIPLIERS.find(unit); + if (found == kUNIT_MULTIPLIERS.end()) + { + std::ostringstream ss; + ss << "Error parsing \"" << option << "\": invalid unit specifier '" << unit + << "'. Valid base-2 unit suffixes include: "; + ss << getAvailableUnitSuffixes() << "."; + throw std::invalid_argument(ss.str()); + } + return found->second.first; + } + + // Return bytes by default + return kUNIT_MULTIPLIERS.at('B').first; +} + +template +T stringToValue(const std::string& option) +{ + return T{option}; +} + +template <> +int32_t stringToValue(const std::string& option) +{ + return std::stoi(option); +} + +template <> +size_t stringToValue(const std::string& option) +{ + return std::stoi(option) * getUnitMultiplier(option); +} + +template <> +float stringToValue(const std::string& option) +{ + return std::stof(option); +} + +template <> +double stringToValue(const std::string& option) +{ + return std::stod(option) * getUnitMultiplier(option); +} + +template <> +bool stringToValue(const std::string& option) +{ + return true; +} + +template <> +std::vector stringToValue>(const std::string& option) +{ + std::vector shape; + if (option == "scalar") + { + return shape; + } + std::vector dimsStrings = splitToStringVec(option, 'x'); + for (const auto& d : dimsStrings) + { + shape.push_back(stringToValue(d)); + } + return shape; +} + +template <> +nvinfer1::DataType stringToValue(const std::string& option) +{ + const std::unordered_map strToDT{{"fp32", nvinfer1::DataType::kFLOAT}, + {"fp16", nvinfer1::DataType::kHALF}, +#if (NV_TENSORRT_MAJOR > 8) + {"bf16", nvinfer1::DataType::kBF16}, +#endif + {"int8", nvinfer1::DataType::kINT8}, + {"fp8", nvinfer1::DataType::kFP8}, + {"int32", nvinfer1::DataType::kINT32}, +#if (NV_TENSORRT_MAJOR > 8) + {"int64", nvinfer1::DataType::kINT64}, +#endif + {"bool", nvinfer1::DataType::kBOOL}, + {"uint8", nvinfer1::DataType::kUINT8} +#if (NV_TENSORRT_MAJOR > 8) + , + {"int4", nvinfer1::DataType::kINT4} +#endif + }; + const auto& dt = strToDT.find(option); + if (dt == strToDT.end()) + { + throw std::invalid_argument("Invalid DataType " + option); + } + return dt->second; +} + +template <> +nvinfer1::DeviceType stringToValue(std::string const& option) +{ + std::unordered_map const strToDevice = { + {"GPU", nvinfer1::DeviceType::kGPU}, + {"DLA", nvinfer1::DeviceType::kDLA}, + }; + auto const& device = strToDevice.find(option); + if (device == strToDevice.end()) + { + throw std::invalid_argument("Invalid Device Type " + option); + } + return device->second; +} + +template <> +nvinfer1::TensorFormats stringToValue(const std::string& option) +{ + std::vector optionStrings = splitToStringVec(option, '+'); + const std::unordered_map strToFmt{{"chw", nvinfer1::TensorFormat::kLINEAR}, + {"chw2", nvinfer1::TensorFormat::kCHW2}, {"chw4", nvinfer1::TensorFormat::kCHW4}, + {"hwc8", nvinfer1::TensorFormat::kHWC8}, {"chw16", nvinfer1::TensorFormat::kCHW16}, + {"chw32", nvinfer1::TensorFormat::kCHW32}, {"dhwc8", nvinfer1::TensorFormat::kDHWC8}, + {"cdhw32", nvinfer1::TensorFormat::kCDHW32}, {"hwc", nvinfer1::TensorFormat::kHWC}, + {"dhwc", nvinfer1::TensorFormat::kDHWC}, {"dla_linear", nvinfer1::TensorFormat::kDLA_LINEAR}, + {"dla_hwc4", nvinfer1::TensorFormat::kDLA_HWC4}}; + nvinfer1::TensorFormats formats{}; + for (auto f : optionStrings) + { + const auto& tf = strToFmt.find(f); + if (tf == strToFmt.end()) + { + throw std::invalid_argument(std::string("Invalid TensorFormat ") + f); + } + formats |= 1U << static_cast(tf->second); + } + + return formats; +} + +template <> +IOFormat stringToValue(const std::string& option) +{ + IOFormat ioFormat{}; + const size_t colon = option.find(':'); + + if (colon == std::string::npos) + { + throw std::invalid_argument(std::string("Invalid IOFormat ") + option); + } + + ioFormat.first = stringToValue(option.substr(0, colon)); + ioFormat.second = stringToValue(option.substr(colon + 1)); + + return ioFormat; +} + +template <> +SparsityFlag stringToValue(std::string const& option) +{ + std::unordered_map const table{ + {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, {"force", SparsityFlag::kFORCE}}; + auto search = table.find(option); + if (search == table.end()) + { + throw std::invalid_argument(std::string("Unknown sparsity mode: ") + option); + } + if (search->second == SparsityFlag::kFORCE) + { + sample::gLogWarning << "--sparsity=force has been deprecated. " + << "Please use to rewrite the weights to a sparsity pattern " + << "and then run with --sparsity=enable" << std::endl; + } + + return search->second; +} + +template <> +WeightStreamingBudget stringToValue(std::string const& option) +{ + WeightStreamingBudget budget; + if (option.find('%') != std::string::npos) + { + double percent = std::stod(option); + if (!(percent >= 0 && percent <= 100.0)) + { + std::ostringstream err; + err << "The weight streaming percent must be between 0 and 100."; + throw std::invalid_argument(err.str()); + } + budget.percent = percent; + } + else + { + double bytes = stringToValue(option); + if (!(bytes == WeightStreamingBudget::kAUTOMATIC || bytes == WeightStreamingBudget::kDISABLE || bytes >= 0)) + { + std::ostringstream err; + err << "The weight streaming budget must be " << WeightStreamingBudget::kDISABLE << ", " + << WeightStreamingBudget::kAUTOMATIC << ", or at least 0."; + throw std::invalid_argument(err.str()); + } + budget.bytes = static_cast(bytes); + } + return budget; +} + +template +std::pair splitNameAndValue(const std::string& s) +{ + std::string tensorName; + std::string valueString; + + // Support 'inputName':Path format for --loadInputs flag when dealing with Windows paths. + // i.e. 'inputName':c:\inputData + std::vector quoteNameRange{ splitToStringVec(s, '\'') }; + // splitToStringVec returns the entire string when delimiter is not found, so it's size is always at least 1 + if (quoteNameRange.size() != 1) + { + if (quoteNameRange.size() != 3) + { + std::string errorMsg = std::string("Found invalid number of \'s when parsing ") + s + + std::string(". Expected: 2, received: ") + std::to_string(quoteNameRange.size() -1) + + ". Please ensure that a singular comma is used within each comma-separated key-value pair for options like --inputIOFormats, --optShapes, --optShapesCalib, --layerPrecisions, etc."; + throw std::invalid_argument(errorMsg); + } + // Everything before the second "'" is the name. + tensorName = quoteNameRange[0] + quoteNameRange[1]; + // Path is the last string - ignoring leading ":" so slice it with [1:] + valueString = quoteNameRange[2].substr(1); + return std::pair(tensorName, stringToValue(valueString)); + } + + // Split on the last : + std::vector nameRange{splitToStringVec(s, ':')}; + // Everything before the last : is the name + tensorName = nameRange[0]; + for (size_t i = 1; i < nameRange.size() - 1; i++) + { + tensorName += ":" + nameRange[i]; + } + // Value is the string element after the last : + valueString = nameRange[nameRange.size() - 1]; + return std::pair(tensorName, stringToValue(valueString)); +} + +template +void splitInsertKeyValue(const std::vector& kvList, T& map) +{ + for (const auto& kv : kvList) + { + map.insert(splitNameAndValue(kv)); + } +} + +const char* boolToEnabled(bool enable) +{ + return enable ? "Enabled" : "Disabled"; +} + +//! A helper function similar to sep.join(list) in Python. +template +std::string joinValuesToString(std::vector const& list, std::string const& sep) +{ + std::ostringstream os; + for (int32_t i = 0, n = list.size(); i < n; ++i) + { + os << list[i]; + if (i != n - 1) + { + os << sep; + } + } + return os.str(); +} + +template +std::string joinValuesToString(std::array const& list, std::string const& sep) +{ + return joinValuesToString(std::vector(list.begin(), list.end()), sep); +} + +//! Check if input option exists in input arguments. +//! If it does: set its value, and return true +//! If it does not: return false. +template +bool getOption(Arguments& arguments, const std::string& option, T& value) +{ + auto const match = arguments.find(option); + if (match != arguments.end()) + { + value = stringToValue(match->second.first); + return true; + } + + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: set its value, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOption(Arguments& arguments, const std::string& option, T_& value) +{ + bool found = getOption(arguments, option, value); + if (found) + { + const auto match = arguments.find(option); + arguments.erase(match); + } + + return found; +} + +//! Check if input option exists in input arguments. +//! If it does: set its value and position, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOptionWithPosition(Arguments& arguments, std::string const& option, T_& value, int32_t& pos) +{ + auto const match = arguments.find(option); + if (match != arguments.end()) + { + value = stringToValue(match->second.first); + pos = match->second.second; + arguments.erase(match); + return true; + } + + return false; +} + +//! Check if input option exists in input arguments behind the position spcecified by pos. +//! If it does: set its value, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelOptionBehind(Arguments& arguments, std::string const& option, int32_t pos, T_& value) +{ + auto const match = arguments.equal_range(option); + if (match.first == match.second) + { + return false; + } + for (auto i = match.first; i != match.second; ++i) + { + if (i->second.second - pos == 1) + { + value = stringToValue(i->second.first); + arguments.erase(i); + return true; + } + } + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: set false in value, erase the argument and return true. +//! If it does not: return false. +bool getAndDelNegOption(Arguments& arguments, const std::string& option, bool& value) +{ + bool dummy; + if (getAndDelOption(arguments, option, dummy)) + { + value = false; + return true; + } + return false; +} + +//! Check if input option exists in input arguments. +//! If it does: add all the matched arg values to values vector, erase the argument and return true. +//! If it does not: return false. +template +bool getAndDelRepeatedOption(Arguments& arguments, const std::string& option, std::vector& values) +{ + const auto match = arguments.equal_range(option); + if (match.first == match.second) + { + return false; + } + + auto addToValues + = [&values](Arguments::value_type& argValue) { values.emplace_back(stringToValue(argValue.second.first)); }; + std::for_each(match.first, match.second, addToValues); + arguments.erase(match.first, match.second); + + return true; +} + +void insertShapesBuild(BuildOptions::ShapeProfile& shapes, nvinfer1::OptProfileSelector selector, + const std::string& name, const std::vector& dims) +{ + shapes[name][static_cast(selector)] = dims; +} + +void insertShapesInference( + InferenceOptions::ShapeProfile& shapes, std::string const& name, std::vector const& dims) +{ + shapes[name] = dims; +} + +std::string removeSingleQuotationMarks(std::string& str) +{ + std::vector strList{splitToStringVec(str, '\'')}; + // Remove all the escaped single quotation marks + std::string retVal; + // Do not really care about unterminated sequences + for (size_t i = 0; i < strList.size(); i++) + { + retVal += strList[i]; + } + return retVal; +} + +void getLayerPrecisions(Arguments& arguments, char const* argument, LayerPrecisions& layerPrecisions) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerPrecisions flag contains comma-separated layerName:precision pairs. + std::vector precisionList{splitToStringVec(list, ',')}; + for (auto const& s : precisionList) + { + auto namePrecisionPair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); + layerPrecisions[layerName] = namePrecisionPair.second; + } +} + +void getLayerOutputTypes(Arguments& arguments, char const* argument, LayerOutputTypes& layerOutputTypes) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerOutputTypes flag contains comma-separated layerName:types pairs. + std::vector precisionList{splitToStringVec(list, ',')}; + for (auto const& s : precisionList) + { + auto namePrecisionPair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(namePrecisionPair.first); + auto const typeStrings = splitToStringVec(namePrecisionPair.second, '+'); + std::vector typeVec(typeStrings.size(), nvinfer1::DataType::kFLOAT); + std::transform(typeStrings.begin(), typeStrings.end(), typeVec.begin(), stringToValue); + layerOutputTypes[layerName] = typeVec; + } +} + +void getLayerDeviceTypes(Arguments& arguments, char const* argument, LayerDeviceTypes& layerDeviceTypes) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerDeviceTypes flag contains comma-separated layerName:deviceType pairs. + std::vector deviceList{splitToStringVec(list, ',')}; + for (auto const& s : deviceList) + { + auto nameDevicePair = splitNameAndValue(s); + auto const layerName = removeSingleQuotationMarks(nameDevicePair.first); + layerDeviceTypes[layerName] = stringToValue(nameDevicePair.second); + } +} + +void getStringsSet(Arguments& arguments, char const* argument, StringSet& stringSet) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + // The layerPrecisions flag contains comma-separated layerName:precision pairs. + std::vector strings{splitToStringVec(list, ',')}; + for (auto const& s : strings) + { + stringSet.insert(s); + } +} + +bool getShapesBuild(Arguments& arguments, BuildOptions::ShapeProfile& shapes, char const* argument, + nvinfer1::OptProfileSelector selector) +{ + std::string list; + bool retVal = getAndDelOption(arguments, argument, list); + std::vector shapeList{splitToStringVec(list, ',')}; + for (const auto& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + return retVal; +} + +bool getShapesInference(Arguments& arguments, InferenceOptions::ShapeProfile& shapes, const char* argument) +{ + std::string list; + bool retVal = getAndDelOption(arguments, argument, list); + std::vector shapeList{splitToStringVec(list, ',')}; + for (const auto& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesInference(shapes, tensorName, dims); + } + return retVal; +} + +void fillShapes(BuildOptions::ShapeProfile& shapes, std::string const& name, ShapeRange const& sourceShapeRange, + nvinfer1::OptProfileSelector minDimsSource, nvinfer1::OptProfileSelector optDimsSource, + nvinfer1::OptProfileSelector maxDimsSource) +{ + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kMIN, name, sourceShapeRange[static_cast(minDimsSource)]); + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kOPT, name, sourceShapeRange[static_cast(optDimsSource)]); + insertShapesBuild( + shapes, nvinfer1::OptProfileSelector::kMAX, name, sourceShapeRange[static_cast(maxDimsSource)]); +} + +void processShapes(BuildOptions::ShapeProfile& shapes, bool minShapes, bool optShapes, bool maxShapes, bool calib) +{ + // Only accept optShapes only or all three of minShapes, optShapes, maxShapes when calib is set + if (((minShapes || maxShapes) && !optShapes) // minShapes only, maxShapes only, both minShapes and maxShapes + || (minShapes && !maxShapes && optShapes) // both minShapes and optShapes + || (!minShapes && maxShapes && optShapes)) // both maxShapes and optShapes + { + if (calib) + { + throw std::invalid_argument( + "Must specify only --optShapesCalib or all of --minShapesCalib, --optShapesCalib, --maxShapesCalib"); + } + } + + if (!minShapes && !optShapes && !maxShapes) + { + return; + } + + BuildOptions::ShapeProfile newShapes; + for (auto& s : shapes) + { + nvinfer1::OptProfileSelector minDimsSource, optDimsSource, maxDimsSource; + minDimsSource = nvinfer1::OptProfileSelector::kMIN; + optDimsSource = nvinfer1::OptProfileSelector::kOPT; + maxDimsSource = nvinfer1::OptProfileSelector::kMAX; + + // Populate missing minShapes + if (!minShapes) + { + if (optShapes) + { + minDimsSource = optDimsSource; + sample::gLogWarning << "optShapes is being broadcasted to minShapes for tensor " << s.first + << std::endl; + } + else + { + minDimsSource = maxDimsSource; + sample::gLogWarning << "maxShapes is being broadcasted to minShapes for tensor " << s.first + << std::endl; + } + } + + // Populate missing optShapes + if (!optShapes) + { + if (maxShapes) + { + optDimsSource = maxDimsSource; + sample::gLogWarning << "maxShapes is being broadcasted to optShapes for tensor " << s.first + << std::endl; + } + else + { + optDimsSource = minDimsSource; + sample::gLogWarning << "minShapes is being broadcasted to optShapes for tensor " << s.first + << std::endl; + } + } + + // Populate missing maxShapes + if (!maxShapes) + { + if (optShapes) + { + maxDimsSource = optDimsSource; + sample::gLogWarning << "optShapes is being broadcasted to maxShapes for tensor " << s.first + << std::endl; + } + else + { + maxDimsSource = minDimsSource; + sample::gLogWarning << "minShapes is being broadcasted to maxShapes for tensor " << s.first + << std::endl; + } + } + + fillShapes(newShapes, s.first, s.second, minDimsSource, optDimsSource, maxDimsSource); + } + shapes = newShapes; +} + +bool getOptimizationProfiles( + Arguments& arguments, std::vector& optProfiles, char const* argument) +{ + bool retValue{false}; + int32_t pos{}; + size_t profileIndex{}; + + auto getShapes + = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) { + std::vector shapeList{splitToStringVec(list, ',')}; + for (auto const& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + }; + + while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos)) + { + BuildOptions::ShapeProfile optProfile{}; + bool minShapes{false}, maxShapes{false}, optShapes{false}; + for (int32_t i = 0; i < nvinfer1::EnumMax(); i++, pos++) + { + std::string value; + + if (!minShapes && getAndDelOptionBehind(arguments, "--minShapes", pos, value)) + { + minShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMIN); + } + else if (!maxShapes && getAndDelOptionBehind(arguments, "--maxShapes", pos, value)) + { + maxShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kMAX); + } + else if (!optShapes && getAndDelOptionBehind(arguments, "--optShapes", pos, value)) + { + optShapes = true; + getShapes(optProfile, value, nvinfer1::OptProfileSelector::kOPT); + } + else + { + break; + } + } + processShapes(optProfile, minShapes, optShapes, maxShapes, false); + if (profileIndex >= optProfiles.size()) + { + optProfiles.resize(profileIndex + 1); + } + if (!optProfiles[profileIndex].empty()) + { + throw std::invalid_argument("Optimization profile index cannot be the same."); + } + optProfiles[profileIndex] = optProfile; + retValue = true; + } + + profileIndex = 0; + for (auto const& optProfile : optProfiles) + { + if (optProfile.empty()) + { + throw std::invalid_argument(std::string("Found invalid or missing shape spec at profile index ") + + std::to_string(profileIndex) + std::string(". ")); + } + ++profileIndex; + } + return retValue; +} + +template +void printShapes(std::ostream& os, char const* phase, T const& shapes, int32_t profileIndex) +{ + if (shapes.empty()) + { + os << "Input " << phase << " shapes: model" << std::endl; + } + else + { + std::string profileString = (profileIndex != -1 && strcmp(phase, "build") == 0) + ? "(profile " + std::to_string(profileIndex) + ")" + : ""; + for (auto const& s : shapes) + { + os << "Input " << phase << " shape " << profileString << ": " << s.first << "=" << s.second << std::endl; + } + } +} + +std::ostream& printTacticSources( + std::ostream& os, nvinfer1::TacticSources enabledSources, nvinfer1::TacticSources disabledSources) +{ + if (!enabledSources && !disabledSources) + { + os << "Using default tactic sources"; + } + else + { + auto const addSource = [&](uint32_t source, std::string const& name) { + if (enabledSources & source) + { + os << name << " [ON], "; + } + else if (disabledSources & source) + { + os << name << " [OFF], "; + } + }; + + addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS), "cublas"); + addSource(1U << static_cast(nvinfer1::TacticSource::kCUBLAS_LT), "cublasLt"); + addSource(1U << static_cast(nvinfer1::TacticSource::kCUDNN), "cudnn"); + addSource(1U << static_cast(nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS), "edge mask convolutions"); + addSource(1U << static_cast(nvinfer1::TacticSource::kJIT_CONVOLUTIONS), "JIT convolutions"); + } + return os; +} + +std::ostream& printPrecision(std::ostream& os, BuildOptions const& options) +{ + if (options.stronglyTyped) + { + os << "Strongly Typed"; + return os; + } + os << "FP32"; + if (options.fp16) + { + os << "+FP16"; + } + if (options.bf16) + { + os << "+BF16"; + } + if (options.int8) + { + os << "+INT8"; + } + if (options.fp8) + { + os << "+FP8"; + } + if (options.int4) + { + os << "+INT4"; + } + if (options.precisionConstraints == PrecisionConstraints::kOBEY) + { + os << " (obey precision constraints)"; + } + if (options.precisionConstraints == PrecisionConstraints::kPREFER) + { + os << " (prefer precision constraints)"; + } + return os; +} + +std::ostream& printTempfileControls(std::ostream& os, TempfileControlFlags const tempfileControls) +{ + auto getFlag = [&](TempfileControlFlag f) -> char const* { + bool allowed = !!(tempfileControls & (1U << static_cast(f))); + return allowed ? "allow" : "deny"; + }; + auto const inMemory = getFlag(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); + auto const temporary = getFlag(TempfileControlFlag::kALLOW_TEMPORARY_FILES); + + os << "{ in_memory: " << inMemory << ", temporary: " << temporary << " }"; + + return os; +} + +std::ostream& printTimingCache(std::ostream& os, TimingCacheMode const& timingCacheMode) +{ + switch (timingCacheMode) + { + case TimingCacheMode::kGLOBAL: os << "global"; break; + case TimingCacheMode::kLOCAL: os << "local"; break; + case TimingCacheMode::kDISABLE: os << "disable"; break; + } + return os; +} + +std::ostream& printSparsity(std::ostream& os, BuildOptions const& options) +{ + switch (options.sparsity) + { + case SparsityFlag::kDISABLE: os << "Disabled"; break; + case SparsityFlag::kENABLE: os << "Enabled"; break; + case SparsityFlag::kFORCE: os << "Forced"; break; + } + + return os; +} + +std::ostream& printMemoryPools(std::ostream& os, BuildOptions const& options) +{ + auto const printValueOrDefault = [&os](double const val, char const* unit = "MiB") { + if (val >= 0) + { + os << val << " " << unit; + } + else + { + os << "default"; + } + }; + os << "workspace: "; + printValueOrDefault(options.workspace); + os << ", "; + os << "dlaSRAM: "; + printValueOrDefault(options.dlaSRAM); + os << ", "; + os << "dlaLocalDRAM: "; + printValueOrDefault(options.dlaLocalDRAM); + os << ", "; + os << "dlaGlobalDRAM: "; + printValueOrDefault(options.dlaGlobalDRAM); + os << ", "; + os << "tacticSharedMem: "; + printValueOrDefault(options.tacticSharedMem, "KiB"); + return os; +} + +std::string previewFeatureToString(PreviewFeature feature) +{ + // clang-format off + switch (feature) + { + case PreviewFeature::kPROFILE_SHARING_0806: + { + gLogWarning << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." << std::endl; + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case PreviewFeature::kALIASED_PLUGIN_IO_10_03: return "kALIASED_PLUGIN_IO_10_03"; +#endif + } + return "Invalid Preview Feature"; + // clang-format on +} + +std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options) +{ + if (options.previewFeatures.empty()) + { + os << "Use default preview flags."; + return os; + } + + auto const addFlag = [&](PreviewFeature feat) { + int32_t featVal = static_cast(feat); + if (options.previewFeatures.find(featVal) != options.previewFeatures.end()) + { + os << previewFeatureToString(feat) << (options.previewFeatures.at(featVal) ? " [ON], " : " [OFF], "); + } + }; +#if (NV_TENSORRT_MAJOR > 8) + addFlag(PreviewFeature::kALIASED_PLUGIN_IO_10_03); +#endif + return os; +} + +} // namespace + +Arguments argsToArgumentsMap(int32_t argc, char* argv[]) +{ + Arguments arguments; + for (int32_t i = 1; i < argc; ++i) + { + auto valuePtr = strchr(argv[i], '='); + if (valuePtr) + { + std::string value{valuePtr + 1}; + arguments.emplace(std::string(argv[i], valuePtr - argv[i]), std::make_pair(value, i)); + } + else + { + arguments.emplace(argv[i], std::make_pair(std::string(""), i)); + } + } + return arguments; +} + +namespace +{ +std::string resolveHomeDirectoryOnLinux(std::string const& model) +{ + std::string filePath{model}; +#ifndef _WIN32 + if (filePath[0] == '~') + { + char const* home = std::getenv("HOME"); + if (home) + { + filePath.replace(0, 1, home); + } + } +#endif + return filePath; +} +} // namespace + +void BaseModelOptions::parse(Arguments& arguments) +{ + if (getAndDelOption(arguments, "--onnx", model)) + { + format = ModelFormat::kONNX; + model = resolveHomeDirectoryOnLinux(model); + } +} + +void ModelOptions::parse(Arguments& arguments) +{ + baseModel.parse(arguments); + + switch (baseModel.format) + { + case ModelFormat::kONNX: + case ModelFormat::kANY: + { + break; + } + } + + if (baseModel.format == ModelFormat::kONNX) + { + if (!outputs.empty()) + { + throw std::invalid_argument("The --output flag should not be used with ONNX models."); + } + } +} + +void getTempfileControls(Arguments& arguments, char const* argument, TempfileControlFlags& tempfileControls) +{ + std::string list; + if (!getAndDelOption(arguments, argument, list)) + { + return; + } + + std::vector controlList{splitToStringVec(list, ',')}; + for (auto const& s : controlList) + { + auto controlAllowPair = splitNameAndValue(s); + bool allowed{false}; + int32_t offset{-1}; + + if (controlAllowPair.second.compare("allow") == 0) + { + allowed = true; + } + else if (controlAllowPair.second.compare("deny") != 0) + { + throw std::invalid_argument("--tempfileControls value should be `deny` or `allow`"); + } + + if (controlAllowPair.first.compare("in_memory") == 0) + { + offset = static_cast(TempfileControlFlag::kALLOW_IN_MEMORY_FILES); + } + else if (controlAllowPair.first.compare("temporary") == 0) + { + offset = static_cast(TempfileControlFlag::kALLOW_TEMPORARY_FILES); + } + else + { + throw std::invalid_argument(std::string{"Unknown --tempfileControls key "} + controlAllowPair.first); + } + + if (allowed) + { + tempfileControls |= (1U << offset); + } + else + { + tempfileControls &= ~(1U << offset); + } + } +} + +void BuildOptions::parse(Arguments& arguments) +{ + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + std::string list; + getAndDelOption(arguments, argument, list); + std::vector formats{splitToStringVec(list, ',')}; + for (const auto& f : formats) + { + formatsVector.push_back(stringToValue(f)); + } + }; + + getFormats(inputFormats, "--inputIOFormats"); + getFormats(outputFormats, "--outputIOFormats"); + + bool getCalibProfile = getAndDelOption(arguments, "--calibProfile", calibProfile); + if (!getOptimizationProfiles(arguments, optProfiles, "--profile")) + { + ShapeProfile shapes; + bool minShapes{false}, optShapes{false}, maxShapes{false}; + try + { + minShapes = getShapesBuild(arguments, shapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); + optShapes = getShapesBuild(arguments, shapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); + maxShapes = getShapesBuild(arguments, shapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); + } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string(" conversion failure: failed to parse minShapes/optShapes/maxShapes. Please double check " + "your input string.")); + } + + processShapes(shapes, minShapes, optShapes, maxShapes, false); + optProfiles.emplace_back(shapes); + } + + if (calibProfile >= optProfiles.size()) + { + throw std::invalid_argument( + std::string("--calibProfile shouldn't greater than the size of optimization profile.")); + } + + BuildOptions::ShapeProfile dummyShapes; + + bool remainingMinShapes = getShapesBuild(arguments, dummyShapes, "--minShapes", nvinfer1::OptProfileSelector::kMIN); + bool remainingOptShapes = getShapesBuild(arguments, dummyShapes, "--optShapes", nvinfer1::OptProfileSelector::kOPT); + bool remainingMaxShapes = getShapesBuild(arguments, dummyShapes, "--maxShapes", nvinfer1::OptProfileSelector::kMAX); + if (remainingMinShapes || remainingOptShapes || remainingMaxShapes) + { + throw std::invalid_argument("Multiple --minShapes/--optShapes/--maxShapes without --profile are not allowed. "); + } + + bool minShapesCalib{false}, optShapesCalib{false}, maxShapesCalib{false}; + try + { + minShapesCalib = getShapesBuild(arguments, shapesCalib, "--minShapesCalib", nvinfer1::OptProfileSelector::kMIN); + optShapesCalib = getShapesBuild(arguments, shapesCalib, "--optShapesCalib", nvinfer1::OptProfileSelector::kOPT); + maxShapesCalib = getShapesBuild(arguments, shapesCalib, "--maxShapesCalib", nvinfer1::OptProfileSelector::kMAX); + } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string(" conversion failure: failed to parse minShapesCalib/optShapesCalib/maxShapesCalib. Please " + "double check your input string.")); + } + + processShapes(shapesCalib, minShapesCalib, optShapesCalib, maxShapesCalib, true); + + std::string memPoolSizes; + getAndDelOption(arguments, "--memPoolSize", memPoolSizes); + std::vector memPoolSpecs{splitToStringVec(memPoolSizes, ',')}; + for (auto const& memPoolSpec : memPoolSpecs) + { + std::string memPoolName; + double memPoolSize; + try + { + std::string strPoolSize; + std::tie(memPoolName, strPoolSize) = splitNameAndValue(memPoolSpec); + memPoolSize = stringToValue(addDefaultUnitSuffixIfNotSpecified(strPoolSize, 'M')); + } + catch (std::invalid_argument const& arg) + { + throw std::invalid_argument(arg.what() + + std::string( + " conversion failure: failed to parse --memPoolSize. Please double check your input string.")); + } + + if (memPoolSize < 0) + { + throw std::invalid_argument(std::string("Negative memory pool size: ") + std::to_string(memPoolSize)); + } + if (memPoolName == "workspace") + { + // use unit in MB. + workspace = memPoolSize / 1.0_MiB; + } + else if (memPoolName == "dlaSRAM") + { + // use unit in MB. + dlaSRAM = memPoolSize / 1.0_MiB; + } + else if (memPoolName == "dlaLocalDRAM") + { + // use unit in MB. + dlaLocalDRAM = memPoolSize / 1.0_MiB; + } + else if (memPoolName == "dlaGlobalDRAM") + { + // use unit in MB. + dlaGlobalDRAM = memPoolSize / 1.0_MiB; + } + else if (memPoolName == "tacticSharedMem") + { + // use unit in KB. + tacticSharedMem = memPoolSize / 1.0_KiB; + } + else if (!memPoolName.empty()) + { + throw std::invalid_argument(std::string("Unknown memory pool: ") + memPoolName); + } + } + + getAndDelOption(arguments, "--avgTiming", avgTiming); + + bool best{false}; + getAndDelOption(arguments, "--best", best); + if (best) + { + int8 = true; + fp16 = true; + + // BF16 only supported on Ampere+ + if (samplesCommon::getSMVersion() >= 0x0800) + { + bf16 = true; + } + } + + getAndDelOption(arguments, "--refit", refittable); + + getAndDelOption(arguments, "--weightless", stripWeights); + getAndDelOption(arguments, "--stripWeights", stripWeights); + + bool stripAllWeights{}; + getAndDelOption(arguments, "--stripAllWeights", stripAllWeights); + if (stripAllWeights) + { + refittable = true; + stripWeights = true; + } + + // --vc and --versionCompatible are synonyms + getAndDelOption(arguments, "--vc", versionCompatible); + if (!versionCompatible) + { + getAndDelOption(arguments, "--versionCompatible", versionCompatible); + } + +#if !TRT_WINML + // --pi and --pluginInstanceNorm are synonyms + getAndDelOption(arguments, "--pi", pluginInstanceNorm); + if (!pluginInstanceNorm) + { + getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm); + } +#endif + + getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime); + getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache); + getAndDelNegOption(arguments, "--noTF32", tf32); + getAndDelOption(arguments, "--fp16", fp16); + getAndDelOption(arguments, "--bf16", bf16); + getAndDelOption(arguments, "--int8", int8); + getAndDelOption(arguments, "--fp8", fp8); + getAndDelOption(arguments, "--int4", int4); + getAndDelOption(arguments, "--stronglyTyped", stronglyTyped); + if (stronglyTyped) + { + auto disableAndLog = [](bool& flag, std::string mode, std::string type) { + if (flag) + { + flag = false; + sample::gLogWarning << "Invalid usage, setting " << mode + << " mode is not allowed if graph is strongly typed. Disabling BuilderFlag::" + << type << "." << std::endl; + } + }; + disableAndLog(fp16, "fp16", "kFP16"); + disableAndLog(int8, "int8", "kINT8"); + disableAndLog(bf16, "bf16", "kBF16"); + disableAndLog(fp8, "fp8", "kFP8"); + disableAndLog(int4, "int4", "kINT4"); + } + + if (fp8 && int8) + { + throw std::invalid_argument("Invalid usage, fp8 and int8 aren't allowed to be enabled together."); + } + getAndDelOption(arguments, "--safe", safe); + getAndDelOption(arguments, "--buildDLAStandalone", buildDLAStandalone); + getAndDelOption(arguments, "--allowGPUFallback", allowGPUFallback); + getAndDelOption(arguments, "--restricted", restricted); + getAndDelOption(arguments, "--skipInference", skipInference); + getAndDelOption(arguments, "--directIO", directIO); + + std::string precisionConstraintsString; + getAndDelOption(arguments, "--precisionConstraints", precisionConstraintsString); + if (!precisionConstraintsString.empty()) + { + const std::unordered_map precisionConstraintsMap + = {{"obey", PrecisionConstraints::kOBEY}, {"prefer", PrecisionConstraints::kPREFER}, + {"none", PrecisionConstraints::kNONE}}; + auto it = precisionConstraintsMap.find(precisionConstraintsString); + if (it == precisionConstraintsMap.end()) + { + throw std::invalid_argument(std::string("Unknown precision constraints: ") + precisionConstraintsString); + } + precisionConstraints = it->second; + } + else + { + precisionConstraints = PrecisionConstraints::kNONE; + } + + getLayerPrecisions(arguments, "--layerPrecisions", layerPrecisions); + getLayerOutputTypes(arguments, "--layerOutputTypes", layerOutputTypes); + getLayerDeviceTypes(arguments, "--layerDeviceTypes", layerDeviceTypes); + + if (layerPrecisions.empty() && layerOutputTypes.empty() && precisionConstraints != PrecisionConstraints::kNONE) + { + sample::gLogWarning << R"(When --precisionConstraints flag is set to "obey" or "prefer", please add )" + << "--layerPrecision/--layerOutputTypes flags to set layer-wise precisions and output " + << "types." << std::endl; + } + else if ((!layerPrecisions.empty() || !layerOutputTypes.empty()) + && precisionConstraints == PrecisionConstraints::kNONE) + { + sample::gLogWarning << "--layerPrecision/--layerOutputTypes flags have no effect when --precisionConstraints " + << R"(flag is set to "none".)" << std::endl; + } + + getStringsSet(arguments, "--markDebug", debugTensors); + + getAndDelOption(arguments, "--sparsity", sparsity); + + bool calibCheck = getAndDelOption(arguments, "--calib", calibration); + if (int8 && calibCheck && !optProfiles[calibProfile].empty() && shapesCalib.empty()) + { + shapesCalib = optProfiles[calibProfile]; + } + else if (!shapesCalib.empty() && getCalibProfile) + { + sample::gLogWarning + << "--calibProfile have no effect when --minShapesCalib/--optShapesCalib/--maxShapesCalib is set." + << std::endl; + } + + std::string profilingVerbosityString; + + getAndDelOption(arguments, "--profilingVerbosity", profilingVerbosityString); + if (profilingVerbosityString == "layer_names_only") + { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; + } + else if (profilingVerbosityString == "none") + { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kNONE; + } + else if (profilingVerbosityString == "detailed") + { + profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; + } + else if (profilingVerbosityString == "default") + { + sample::gLogWarning << "--profilingVerbosity=default has been deprecated by " + "--profilingVerbosity=layer_names_only." + << std::endl; + profilingVerbosity = nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY; + } + else if (profilingVerbosityString == "verbose") + { + sample::gLogWarning << "--profilingVerbosity=verbose has been deprecated by --profilingVerbosity=detailed." + << std::endl; + profilingVerbosity = nvinfer1::ProfilingVerbosity::kDETAILED; + } + else if (!profilingVerbosityString.empty()) + { + throw std::invalid_argument(std::string("Unknown profilingVerbosity: ") + profilingVerbosityString); + } + + if (getAndDelOption(arguments, "--loadEngine", engine)) + { + load = true; + } + getAndDelOption(arguments, "--getPlanVersionOnly", getPlanVersionOnly); + + if (getAndDelOption(arguments, "--saveEngine", engine)) + { + save = true; + } + if (load && save) + { + throw std::invalid_argument("Incompatible load and save engine options selected"); + } + + std::string tacticSourceArgs; + if (getAndDelOption(arguments, "--tacticSources", tacticSourceArgs)) + { + std::vector tacticList = splitToStringVec(tacticSourceArgs, ','); + for (auto& t : tacticList) + { + bool enable{false}; + if (t.front() == '+') + { + enable = true; + } + else if (t.front() != '-') + { + throw std::invalid_argument( + "Tactic source must be prefixed with + or -, indicating whether it should be enabled or disabled " + "respectively."); + } + t.erase(0, 1); + + const auto toUpper = [](std::string& sourceName) { + std::transform( + sourceName.begin(), sourceName.end(), sourceName.begin(), [](char c) { return std::toupper(c); }); + return sourceName; + }; + + nvinfer1::TacticSource source{}; + t = toUpper(t); + if (t == "CUBLAS") + { + source = nvinfer1::TacticSource::kCUBLAS; + } + else if (t == "CUBLASLT" || t == "CUBLAS_LT") + { + source = nvinfer1::TacticSource::kCUBLAS_LT; + } + else if (t == "CUDNN") + { + source = nvinfer1::TacticSource::kCUDNN; + } + else if (t == "EDGE_MASK_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; + } + else if (t == "JIT_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; + } + else + { + throw std::invalid_argument(std::string("Unknown tactic source: ") + t); + } + + uint32_t sourceBit = 1U << static_cast(source); + + if (enable) + { + enabledTactics |= sourceBit; + } + else + { + disabledTactics |= sourceBit; + } + + if (enabledTactics & disabledTactics) + { + throw std::invalid_argument(std::string("Cannot enable and disable ") + t); + } + } + } + + bool noBuilderCache{false}; + getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); + getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); + if (noBuilderCache) + { + timingCacheMode = TimingCacheMode::kDISABLE; + } + else if (!timingCacheFile.empty()) + { + timingCacheMode = TimingCacheMode::kGLOBAL; + } + else + { + timingCacheMode = TimingCacheMode::kLOCAL; + } + getAndDelOption(arguments, "--errorOnTimingCacheMiss", errorOnTimingCacheMiss); + getAndDelOption(arguments, "--builderOptimizationLevel", builderOptimizationLevel); + getAndDelOption(arguments, "--maxTactics", maxTactics); +#if (NV_TENSORRT_MAJOR > 8) + std::string runtimePlatformArgs; + getAndDelOption(arguments, "--runtimePlatform", runtimePlatformArgs); + if (runtimePlatformArgs == "SameAsBuild" || runtimePlatformArgs.empty()) + { + runtimePlatform = RuntimePlatform::kSAME_AS_BUILD; + } + else if (runtimePlatformArgs == "WindowsAMD64") + { + runtimePlatform = RuntimePlatform::kWINDOWS_AMD64; + } + else + { + throw std::invalid_argument(std::string("Unknown runtime platform: ") + runtimePlatformArgs + + ". Valid options: SameAsBuild, WindowsAMD64."); + } +#endif + std::string hardwareCompatibleArgs; + getAndDelOption(arguments, "--hardwareCompatibilityLevel", hardwareCompatibleArgs); + if (hardwareCompatibleArgs == "none" || hardwareCompatibleArgs.empty()) + { + hardwareCompatibilityLevel = HardwareCompatibilityLevel::kNONE; + } + else if (samplesCommon::toLower(hardwareCompatibleArgs) == "ampere+") + { + hardwareCompatibilityLevel = HardwareCompatibilityLevel::kAMPERE_PLUS; + } + else + { + throw std::invalid_argument(std::string("Unknown hardwareCompatibilityLevel: ") + hardwareCompatibleArgs + + ". Valid options: none, ampere+."); + } + + if (pluginInstanceNorm && (versionCompatible || hardwareCompatibilityLevel == HardwareCompatibilityLevel::kAMPERE_PLUS)) + { + throw std::invalid_argument("Plugin InstanceNorm cannot be used with version compatible or hardware compatible engines!"); + } + + getAndDelOption(arguments, "--maxAuxStreams", maxAuxStreams); + + std::string previewFeaturesBuf; + getAndDelOption(arguments, "--preview", previewFeaturesBuf); + std::vector previewFeaturesVec{splitToStringVec(previewFeaturesBuf, ',')}; + for (auto featureName : previewFeaturesVec) + { + bool enable{false}; + if (featureName.front() == '+') + { + enable = true; + } + else if (featureName.front() != '-') + { + throw std::invalid_argument( + "Preview features must be prefixed with + or -, indicating whether it should be enabled or disabled " + "respectively."); + } + featureName.erase(0, 1); + + PreviewFeature feat{}; + if (featureName == "profileSharing0806") + { + sample::gLogWarning + << "profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect." + << std::endl; + } +#if (NV_TENSORRT_MAJOR > 8) + else if (featureName == "aliasedPluginIO1003") + { + feat = PreviewFeature::kALIASED_PLUGIN_IO_10_03; + } +#endif + else + { + throw std::invalid_argument(std::string("Unknown preview feature: ") + featureName); + } + previewFeatures[static_cast(feat)] = enable; + } + + getAndDelOption(arguments, "--tempdir", tempdir); + getTempfileControls(arguments, "--tempfileControls", tempfileControls); + + std::string runtimeMode; + getAndDelOption(arguments, "--useRuntime", runtimeMode); + if (runtimeMode == "full") + { + useRuntime = RuntimeMode::kFULL; + } + else if (runtimeMode == "dispatch") + { + useRuntime = RuntimeMode::kDISPATCH; + } + else if (runtimeMode == "lean") + { + useRuntime = RuntimeMode::kLEAN; + } + else if (!runtimeMode.empty()) + { + throw std::invalid_argument(std::string("Unknown useRuntime: ") + runtimeMode); + } + + if ((useRuntime == RuntimeMode::kDISPATCH || useRuntime == RuntimeMode::kLEAN) && !versionCompatible) + { + versionCompatible = true; + sample::gLogWarning << "Implicitly enabling --versionCompatible since --useRuntime=" << runtimeMode + << " is set." << std::endl; + } + + if (useRuntime != RuntimeMode::kFULL && !load) + { + throw std::invalid_argument(std::string("Building a TensorRT engine requires --useRuntime=full.")); + } + + getAndDelOption(arguments, "--leanDLLPath", leanDLLPath); + + // Don't delete the option because the inference option parser requires it + getOption(arguments, "--allowWeightStreaming", allowWeightStreaming); +} + +void SystemOptions::parse(Arguments& arguments) +{ + getAndDelOption(arguments, "--device", device); + getAndDelOption(arguments, "--useDLACore", DLACore); +#if !TRT_WINML + std::string pluginName; + while (getAndDelOption(arguments, "--plugins", pluginName)) + { + sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; + plugins.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--staticPlugins", pluginName)) + { + plugins.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--setPluginsToSerialize", pluginName)) + { + setPluginsToSerialize.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--dynamicPlugins", pluginName)) + { + dynamicPlugins.emplace_back(pluginName); + } + getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs); +#endif +} + +constexpr int64_t WeightStreamingBudget::kDISABLE; +constexpr int64_t WeightStreamingBudget::kAUTOMATIC; + +void InferenceOptions::parse(Arguments& arguments) +{ + + if (getAndDelOption(arguments, "--streams", infStreams)) + { + sample::gLogWarning << "--streams flag has been deprecated, use --infStreams flag instead." << std::endl; + } + getAndDelOption(arguments, "--infStreams", infStreams); + + getAndDelOption(arguments, "--iterations", iterations); + getAndDelOption(arguments, "--duration", duration); + getAndDelOption(arguments, "--warmUp", warmup); + getAndDelOption(arguments, "--sleepTime", sleep); + getAndDelOption(arguments, "--idleTime", idle); + bool exposeDMA{false}; + if (getAndDelOption(arguments, "--exposeDMA", exposeDMA)) + { + overlap = !exposeDMA; + } + getAndDelOption(arguments, "--noDataTransfers", skipTransfers); + getAndDelOption(arguments, "--useManagedMemory", useManaged); + getAndDelOption(arguments, "--useSpinWait", spin); + getAndDelOption(arguments, "--threads", threads); + getAndDelOption(arguments, "--useCudaGraph", graph); + getAndDelOption(arguments, "--separateProfileRun", rerun); + getAndDelOption(arguments, "--timeDeserialize", timeDeserialize); + getAndDelOption(arguments, "--timeRefit", timeRefit); + getAndDelOption(arguments, "--persistentCacheRatio", persistentCacheRatio); + + std::string list; + getAndDelOption(arguments, "--loadInputs", list); + std::vector inputsList{splitToStringVec(list, ',')}; + splitInsertKeyValue(inputsList, inputs); + + getShapesInference(arguments, shapes, "--shapes"); + setOptProfile = getAndDelOption(arguments, "--useProfile", optProfileIndex); + + std::string allocationStrategyString; + getAndDelOption(arguments, "--allocationStrategy", allocationStrategyString); + if (allocationStrategyString == "static") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kSTATIC; + } + else if (allocationStrategyString == "profile") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kPROFILE; + } + else if (allocationStrategyString == "runtime") + { + memoryAllocationStrategy = MemoryAllocationStrategy::kRUNTIME; + } + else if (!allocationStrategyString.empty()) + { + throw std::invalid_argument(std::string("Unknown allocationStrategy: ") + allocationStrategyString); + } + + bool allowWs{false}; + getAndDelOption(arguments, "--allowWeightStreaming", allowWs); + bool wsBudgetFound = getAndDelOption(arguments, "--weightStreamingBudget", weightStreamingBudget); + if (wsBudgetFound && !allowWs) + { + throw std::invalid_argument( + "The weight streaming budget can only be set with --allowWeightStreaming specified."); + } + if (allowWs && weightStreamingBudget.isDisabled()) + { + sample::gLogWarning << "The engine can stream its weights but it will not at runtime because " + "--weightStreamingBudget unset or set to " + << WeightStreamingBudget::kDISABLE << "." << std::endl; + } + + std::string debugTensorList; + getAndDelOption(arguments, "--saveDebugTensors", debugTensorList); + std::vector fileNames{splitToStringVec(debugTensorList, ',')}; + splitInsertKeyValue(fileNames, debugTensorFileNames); +} + +void ReportingOptions::parse(Arguments& arguments) +{ + getAndDelOption(arguments, "--avgRuns", avgs); + getAndDelOption(arguments, "--verbose", verbose); + getAndDelOption(arguments, "--dumpRefit", refit); + getAndDelOption(arguments, "--dumpOutput", output); + getAndDelOption(arguments, "--dumpRawBindingsToFile", dumpRawBindings); + getAndDelOption(arguments, "--dumpProfile", profile); + getAndDelOption(arguments, "--dumpLayerInfo", layerInfo); + getAndDelOption(arguments, "--dumpOptimizationProfile", optProfileInfo); + getAndDelOption(arguments, "--exportTimes", exportTimes); + getAndDelOption(arguments, "--exportOutput", exportOutput); + getAndDelOption(arguments, "--exportProfile", exportProfile); + getAndDelOption(arguments, "--exportLayerInfo", exportLayerInfo); + + std::string percentileString; + getAndDelOption(arguments, "--percentile", percentileString); + std::vector percentileStrings = splitToStringVec(percentileString, ','); + if (!percentileStrings.empty()) + { + percentiles.clear(); + } + for (const auto& p : percentileStrings) + { + percentiles.push_back(stringToValue(p)); + } + + for (auto percentile : percentiles) + { + if (percentile < 0.F || percentile > 100.F) + { + throw std::invalid_argument(std::string("Percentile ") + std::to_string(percentile) + "is not in [0,100]"); + } + } +} + +bool parseHelp(Arguments& arguments) +{ + bool helpLong{false}; + bool helpShort{false}; + getAndDelOption(arguments, "--help", helpLong); + getAndDelOption(arguments, "-h", helpShort); + return helpLong || helpShort; +} + +void AllOptions::parse(Arguments& arguments) +{ + model.parse(arguments); + build.parse(arguments); + system.parse(arguments); + inference.parse(arguments); + + if (build.useRuntime != RuntimeMode::kFULL && inference.timeRefit) + { + throw std::invalid_argument("--timeRefit requires --useRuntime=full."); + } + + if (inference.optProfileIndex < static_cast(build.optProfiles.size())) + { + // Propagate shape profile between builder and inference + for (auto const& s : build.optProfiles[inference.optProfileIndex]) + { + if (inference.shapes.find(s.first) == inference.shapes.end()) + { + insertShapesInference( + inference.shapes, s.first, s.second[static_cast(nvinfer1::OptProfileSelector::kOPT)]); + } + } + for (auto const& s : inference.shapes) + { + if (build.optProfiles[inference.optProfileIndex].find(s.first) + == build.optProfiles[inference.optProfileIndex].end()) + { + // assume min/opt/max all the same + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMIN, + s.first, s.second); + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kOPT, + s.first, s.second); + insertShapesBuild(build.optProfiles[inference.optProfileIndex], nvinfer1::OptProfileSelector::kMAX, + s.first, s.second); + } + } + } + + // Set nvtxVerbosity to be the same as build-time profilingVerbosity. + inference.nvtxVerbosity = build.profilingVerbosity; + + reporting.parse(arguments); + helps = parseHelp(arguments); + + if (!helps) + { + if (!build.load && model.baseModel.format == ModelFormat::kANY) + { + throw std::invalid_argument("Model missing or format not recognized"); + } + if (build.safe && system.DLACore >= 0) + { + build.buildDLAStandalone = true; + } +#if (NV_TENSORRT_MAJOR > 8) + if (build.runtimePlatform != nvinfer1::RuntimePlatform::kSAME_AS_BUILD) + { + build.skipInference = true; + } +#endif + if (build.buildDLAStandalone) + { + build.skipInference = true; + auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) { + return fmt.empty() ? false : std::all_of(fmt.begin(), fmt.end(), [&](IOFormat const& pair) { + bool supported{false}; + bool const isDLA_LINEAR{ + pair.second == 1U << static_cast(nvinfer1::TensorFormat::kDLA_LINEAR)}; + bool const isHWC4{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW4) + || pair.second == 1U << static_cast(nvinfer1::TensorFormat::kDLA_HWC4)}; + bool const isCHW32{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW32)}; + bool const isCHW16{pair.second == 1U << static_cast(nvinfer1::TensorFormat::kCHW16)}; + supported |= pair.first == nvinfer1::DataType::kINT8 + && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW32); + supported |= pair.first == nvinfer1::DataType::kHALF + && (isDLA_LINEAR || (isInput ? isHWC4 : false) || isCHW16); + return supported; + }); + }; + if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false)) + { + throw std::invalid_argument( + "I/O formats for safe DLA capability are restricted to fp16/int8:dla_linear, fp16/int8:hwc4, " + "fp16:chw16 or " + "int8:chw32"); + } + if (build.allowGPUFallback) + { + throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for DLA standalone mode"); + } + } + } +} + +void TaskInferenceOptions::parse(Arguments& arguments) +{ + getAndDelOption(arguments, "engine", engine); + getAndDelOption(arguments, "device", device); + getAndDelOption(arguments, "batch", batch); + getAndDelOption(arguments, "DLACore", DLACore); + getAndDelOption(arguments, "graph", graph); + getAndDelOption(arguments, "persistentCacheRatio", persistentCacheRatio); +} + +void SafeBuilderOptions::parse(Arguments& arguments) +{ + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + std::string list; + getAndDelOption(arguments, argument, list); + std::vector formats{splitToStringVec(list, ',')}; + for (const auto& f : formats) + { + formatsVector.push_back(stringToValue(f)); + } + }; + + getAndDelOption(arguments, "--serialized", serialized); + getAndDelOption(arguments, "--onnx", onnxModelFile); + getAndDelOption(arguments, "--help", help); + getAndDelOption(arguments, "-h", help); + getAndDelOption(arguments, "--verbose", verbose); + getAndDelOption(arguments, "-v", verbose); + getFormats(inputFormats, "--inputIOFormats"); + getFormats(outputFormats, "--outputIOFormats"); + getAndDelOption(arguments, "--int8", int8); + getAndDelOption(arguments, "--calib", calibFile); + getAndDelOption(arguments, "--std", standard); +#if !TRT_WINML + std::string pluginName; + while (getAndDelOption(arguments, "--plugins", pluginName)) + { + sample::gLogWarning << "--plugins flag has been deprecated, use --staticPlugins flag instead." << std::endl; + plugins.emplace_back(pluginName); + } + while (getAndDelOption(arguments, "--staticPlugins", pluginName)) + { + plugins.emplace_back(pluginName); + } +#endif + bool noBuilderCache{false}; + getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); + getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); + getAndDelOption(arguments, "--avgTiming", avgTiming); + if (noBuilderCache) + { + timingCacheMode = TimingCacheMode::kDISABLE; + } + else if (!timingCacheFile.empty()) + { + timingCacheMode = TimingCacheMode::kGLOBAL; + } + else + { + timingCacheMode = TimingCacheMode::kLOCAL; + } + getAndDelOption(arguments, "--sparsity", sparsity); +} + +std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options) +{ + os << "=== Model Options ===" << std::endl; + + os << "Format: "; + switch (options.format) + { + case ModelFormat::kONNX: + { + os << "ONNX"; + break; + } + case ModelFormat::kANY: os << "*"; break; + } + os << std::endl << "Model: " << options.model << std::endl; + + return os; +} + +std::ostream& operator<<(std::ostream& os, const ModelOptions& options) +{ + os << options.baseModel; + switch (options.baseModel.format) + { + case ModelFormat::kONNX: // Fallthrough: No options to report for ONNX or the generic case + case ModelFormat::kANY: break; + } + + os << "Output:"; + for (const auto& o : options.outputs) + { + os << " " << o; + } + os << std::endl; + + return os; +} + +std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype) +{ + switch (dtype) + { + case nvinfer1::DataType::kFLOAT: + { + os << "fp32"; + break; + } + case nvinfer1::DataType::kHALF: + { + os << "fp16"; + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: + { + os << "bf16"; + break; + } +#endif + case nvinfer1::DataType::kINT8: + { + os << "int8"; + break; + } + case nvinfer1::DataType::kINT32: + { + os << "int32"; + break; + } + case nvinfer1::DataType::kBOOL: + { + os << "bool"; + break; + } + case nvinfer1::DataType::kUINT8: + { + os << "uint8"; + break; + } + case nvinfer1::DataType::kFP8: + { + os << "fp8"; + break; + } +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: + { + os << "int64"; + break; + } + case nvinfer1::DataType::kINT4: + { + os << "int4"; + break; + } +#endif + } + return os; +} + +std::ostream& operator<<(std::ostream& os, IOFormat const& format) +{ + os << format.first << ":"; + + for (int32_t f = 0; f < nvinfer1::EnumMax(); ++f) + { + if ((1U << f) & format.second) + { + if (f) + { + os << "+"; + } + switch (nvinfer1::TensorFormat(f)) + { + case nvinfer1::TensorFormat::kLINEAR: + { + os << "chw"; + break; + } + case nvinfer1::TensorFormat::kCHW2: + { + os << "chw2"; + break; + } + case nvinfer1::TensorFormat::kHWC8: + { + os << "hwc8"; + break; + } + case nvinfer1::TensorFormat::kHWC16: + { + os << "hwc16"; + break; + } + case nvinfer1::TensorFormat::kCHW4: + { + os << "chw4"; + break; + } + case nvinfer1::TensorFormat::kCHW16: + { + os << "chw16"; + break; + } + case nvinfer1::TensorFormat::kCHW32: + { + os << "chw32"; + break; + } + case nvinfer1::TensorFormat::kDHWC8: + { + os << "dhwc8"; + break; + } + case nvinfer1::TensorFormat::kCDHW32: + { + os << "cdhw32"; + break; + } + case nvinfer1::TensorFormat::kHWC: + { + os << "hwc"; + break; + } + case nvinfer1::TensorFormat::kDHWC: + { + os << "dhwc"; + break; + } + case nvinfer1::TensorFormat::kDLA_LINEAR: + { + os << "dla_linear"; + break; + } + case nvinfer1::TensorFormat::kDLA_HWC4: + { + os << "dla_hwc4"; + break; + } + } + } + } + return os; +} + +std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) +{ + switch (devType) + { + case nvinfer1::DeviceType::kGPU: + { + os << "GPU"; + break; + } + case nvinfer1::DeviceType::kDLA: + { + os << "DLA"; + break; + } + } + return os; +} + +#if (NV_TENSORRT_MAJOR > 8) +std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) +{ + switch (platform) + { + case nvinfer1::RuntimePlatform::kSAME_AS_BUILD: + { + os << "Same As Build"; + break; + } + case nvinfer1::RuntimePlatform::kWINDOWS_AMD64: + { + os << "Windows AMD64"; + break; + } + } + return os; +} +#endif + +std::ostream& operator<<(std::ostream& os, const ShapeRange& dims) +{ + int32_t i = 0; + for (const auto& d : dims) + { + if (!d.size()) + { + break; + } + os << (i ? "+" : "") << d; + ++i; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, LayerPrecisions const& layerPrecisions) +{ + int32_t i = 0; + for (auto const& layerPrecision : layerPrecisions) + { + os << (i ? "," : "") << layerPrecision.first << ":" << layerPrecision.second; + ++i; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, LayerDeviceTypes const& layerDeviceTypes) +{ + int32_t i = 0; + for (auto const& layerDevicePair : layerDeviceTypes) + { + os << (i++ ? ", " : "") << layerDevicePair.first << ":" << layerDevicePair.second; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, StringSet const& stringSet) +{ + int64_t i = 0; + for (auto const& s : stringSet) + { + os << (i ? "," : "") << s; + ++i; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const BuildOptions& options) +{ + // if loadEngine is specified, BuildOptions are N/A + if (options.load) + { + os << std::endl; + return os; + } + // clang-format off + os << "=== Build Options ===" << std::endl << + "Memory Pools: "; printMemoryPools(os, options) << std::endl << + "avgTiming: " << options.avgTiming << std::endl << + "Precision: "; printPrecision(os, options) << std::endl << + "LayerPrecisions: " << options.layerPrecisions << std::endl << + "Layer Device Types: " << options.layerDeviceTypes << std::endl << + "Calibration: " << (options.int8 && options.calibration.empty() ? "Dynamic" : options.calibration.c_str()) << std::endl << + "Refit: " << boolToEnabled(options.refittable) << std::endl << + "Strip weights: " << boolToEnabled(options.stripWeights) << std::endl << + "Version Compatible: " << boolToEnabled(options.versionCompatible) << std::endl << +#if !TRT_WINML + "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm) << std::endl << +#endif + "TensorRT runtime: " << options.useRuntime << std::endl << + "Lean DLL Path: " << options.leanDLLPath << std::endl << + "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls) << std::endl << + "Exclude Lean Runtime: " << boolToEnabled(options.excludeLeanRuntime) << std::endl << + "Sparsity: "; printSparsity(os, options) << std::endl << + "Safe mode: " << boolToEnabled(options.safe) << std::endl << + "Build DLA standalone loadable: " << boolToEnabled(options.buildDLAStandalone) << std::endl << + "Allow GPU fallback for DLA: " << boolToEnabled(options.allowGPUFallback) << std::endl << + "DirectIO mode: " << boolToEnabled(options.directIO) << std::endl << + "Restricted mode: " << boolToEnabled(options.restricted) << std::endl << + "Skip inference: " << boolToEnabled(options.skipInference) << std::endl << + "Save engine: " << (options.save ? options.engine : "") << std::endl << + "Load engine: " << (options.load ? options.engine : "") << std::endl << + "Profiling verbosity: " << static_cast(options.profilingVerbosity) << std::endl << + "Tactic sources: "; printTacticSources(os, options.enabledTactics, options.disabledTactics) << std::endl << + "timingCacheMode: "; printTimingCache(os, options.timingCacheMode) << std::endl << + "timingCacheFile: " << options.timingCacheFile << std::endl << + "Enable Compilation Cache: "<< boolToEnabled(!options.disableCompilationCache) << std::endl << + "errorOnTimingCacheMiss: " << boolToEnabled(options.errorOnTimingCacheMiss) << std::endl << + "Preview Features: "; printPreviewFlags(os, options) << std::endl << + "MaxAuxStreams: " << options.maxAuxStreams << std::endl << + "BuilderOptimizationLevel: " << options.builderOptimizationLevel << std::endl << + "MaxTactics: " << options.maxTactics << std::endl << + "Calibration Profile Index: " << options.calibProfile << std::endl << + "Weight Streaming: " << boolToEnabled(options.allowWeightStreaming) << std::endl << +#if (NV_TENSORRT_MAJOR > 8) + "Runtime Platform: " << options.runtimePlatform << std::endl << +#endif + "Debug Tensors: " << options.debugTensors << std::endl; + // clang-format on + + auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { + if (formats.empty()) + { + os << direction << "s format: fp32:CHW" << std::endl; + } + else + { + for (const auto& f : formats) + { + os << direction << ": " << f << std::endl; + } + } + }; + + printIOFormats(os, "Input(s)", options.inputFormats); + printIOFormats(os, "Output(s)", options.outputFormats); + for (size_t i = 0; i < options.optProfiles.size(); i++) + { + printShapes(os, "build", options.optProfiles[i], i); + } + printShapes(os, "calibration", options.shapesCalib, -1); + + return os; +} + +std::ostream& operator<<(std::ostream& os, const SystemOptions& options) +{ + // clang-format off + os << "=== System Options ===" << std::endl << + + "Device: " << options.device << std::endl << + "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << std::endl; +#if !TRT_WINML + os << "Plugins:"; + + for (const auto& p : options.plugins) + { + os << " " << p; + } + os << std::endl; + + os << "setPluginsToSerialize:"; + + for (const auto& p : options.setPluginsToSerialize) + { + os << " " << p; + } + os << std::endl; + + os << "dynamicPlugins:"; + + for (const auto& p : options.dynamicPlugins) + { + os << " " << p; + } + os << std::endl; + + os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl; + os << std::endl; +#endif + return os; + // clang-format on +} + +std::ostream& operator<<(std::ostream& os, const InferenceOptions& options) +{ + // clang-format off + os << "=== Inference Options ===" << std::endl << + + "Batch: "; + if (options.batch && options.shapes.empty()) + { + os << options.batch << std::endl; + } + else + { + os << "Explicit" << std::endl; + } + printShapes(os, "inference", options.shapes, options.optProfileIndex); + + std::string wsBudget{"Disabled"}; + if (options.weightStreamingBudget.bytes == WeightStreamingBudget::kAUTOMATIC) + { + wsBudget = "Automatic"; + } + else if (options.weightStreamingBudget.bytes != WeightStreamingBudget::kDISABLE) + { + wsBudget = std::to_string(options.weightStreamingBudget.bytes) + " bytes"; + } + else if (options.weightStreamingBudget.percent != WeightStreamingBudget::kDISABLE) + { + wsBudget = std::to_string(options.weightStreamingBudget.percent) + "%"; + } + + os << "Iterations: " << options.iterations << std::endl << + "Duration: " << options.duration << "s (+ " + << options.warmup << "ms warm up)" << std::endl << + "Sleep time: " << options.sleep << "ms" << std::endl << + "Idle time: " << options.idle << "ms" << std::endl << + "Inference Streams: " << options.infStreams << std::endl << + "ExposeDMA: " << boolToEnabled(!options.overlap) << std::endl << + "Data transfers: " << boolToEnabled(!options.skipTransfers) << std::endl << + "Spin-wait: " << boolToEnabled(options.spin) << std::endl << + "Multithreading: " << boolToEnabled(options.threads) << std::endl << + "CUDA Graph: " << boolToEnabled(options.graph) << std::endl << + "Separate profiling: " << boolToEnabled(options.rerun) << std::endl << + "Time Deserialize: " << boolToEnabled(options.timeDeserialize) << std::endl << + "Time Refit: " << boolToEnabled(options.timeRefit) << std::endl << + "NVTX verbosity: " << static_cast(options.nvtxVerbosity) << std::endl << + "Persistent Cache Ratio: " << static_cast(options.persistentCacheRatio) << std::endl << + "Optimization Profile Index: "<< options.optProfileIndex << std::endl << + "Weight Streaming Budget: " << wsBudget << std::endl; + // clang-format on + + os << "Inputs:" << std::endl; + for (const auto& input : options.inputs) + { + os << input.first << "<-" << input.second << std::endl; + } + + os << "Debug Tensor Save Destinations:" << std::endl; + for (auto const& fileName : options.debugTensorFileNames) + { + os << fileName.first << ": " << fileName.second << std::endl; + } + + return os; +} + +std::ostream& operator<<(std::ostream& os, const ReportingOptions& options) +{ + // clang-format off + os << "=== Reporting Options ===" << std::endl << + "Verbose: " << boolToEnabled(options.verbose) << std::endl << + "Averages: " << options.avgs << " inferences" << std::endl << + "Percentiles: " << joinValuesToString(options.percentiles, ",") << std::endl << + "Dump refittable layers:" << boolToEnabled(options.refit) << std::endl << + "Dump output: " << boolToEnabled(options.output) << std::endl << + "Profile: " << boolToEnabled(options.profile) << std::endl << + "Export timing to JSON file: " << options.exportTimes << std::endl << + "Export output to JSON file: " << options.exportOutput << std::endl << + "Export profile to JSON file: " << options.exportProfile << std::endl; + // clang-format on + + return os; +} + +std::ostream& operator<<(std::ostream& os, const AllOptions& options) +{ + os << options.model << options.build << options.system << options.inference << options.reporting << std::endl; + return os; +} + +std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) +{ + auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { + if (formats.empty()) + { + os << direction << "s format: fp32:CHW" << std::endl; + } + else + { + for (const auto& f : formats) + { + os << direction << ": " << f << std::endl; + } + } + }; + + os << "=== Build Options ===" << std::endl; + os << "Model ONNX: " << options.onnxModelFile << std::endl; + + os << "Precision: FP16"; + if (options.int8) + { + os << " + INT8"; + } + if (options.fp8) + { + os << " + FP8"; + } + if (options.int4) + { + os << " + INT4"; + } + os << std::endl; + os << "Calibration file: " << options.calibFile << std::endl; + os << "Serialized Network: " << options.serialized << std::endl; + + printIOFormats(os, "Input(s)", options.inputFormats); + printIOFormats(os, "Output(s)", options.outputFormats); +#if !TRT_WINML + os << "Plugins:"; + for (const auto& p : options.plugins) + { + os << " " << p; + } +#endif + os << "timingCacheMode: "; + printTimingCache(os, options.timingCacheMode) << std::endl; + os << "timingCacheFile: " << options.timingCacheFile << std::endl; + os << std::endl; + return os; +} + +void BaseModelOptions::help(std::ostream& os) +{ + // clang-format off + os << " --onnx= ONNX model" << std::endl; + // clang-format on +} + +void ModelOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Model Options ===" << std::endl; + BaseModelOptions::help(os); + // clang-format on +} + +void BuildOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Build Options ===" "\n" + " --minShapes=spec Build with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapes=spec Build with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapes=spec Build with dynamic shapes using a profile with the max shapes provided" "\n" + " --minShapesCalib=spec Calibrate with dynamic shapes using a profile with the min shapes provided" "\n" + " --optShapesCalib=spec Calibrate with dynamic shapes using a profile with the opt shapes provided" "\n" + " --maxShapesCalib=spec Calibrate with dynamic shapes using a profile with the max shapes provided" "\n" + " Note: All three of min, opt and max shapes must be supplied." "\n" + " However, if only opt shapes is supplied then it will be expanded so" "\n" + " that min shapes and max shapes are set to the same values as opt shapes." "\n" + " Input names can be wrapped with escaped single quotes (ex: 'Input:0')." "\n" + " Example input shapes spec: input0:1x3x256x256,input1:1x3x128x128" "\n" + " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon." "\n" + " Each input shape is supplied as a key-value pair where key is the input name and" "\n" + " value is the dimensions (including the batch dimension) to be used for that input." "\n" + " Each key-value pair has the key and value separated using a colon (:)." "\n" + " Multiple input shapes can be provided via comma-separated key-value pairs, and each input name can" "\n" + " contain at most one wildcard ('*') character." "\n" + " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" "\n" + " See --outputIOFormats help for the grammar of type and format list." "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " inputs following the same order as network inputs ID (even if only one input" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" "\n" + " Note: If this option is specified, please set comma-separated types and formats for all" "\n" + " outputs following the same order as network outputs ID (even if only one output" "\n" + " needs specifying IO format) or set the type and format once for broadcasting." "\n" + R"( IO Formats: spec ::= IOfmt[","spec])" "\n" + " IOfmt ::= type:fmt" "\n" + R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int64"|"int8"|"uint8"|"bool")" "\n" + R"( fmt ::= ("chw"|"chw2"|"chw4"|"hwc8"|"chw16"|"chw32"|"dhwc8"|)" "\n" + R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4")["+"fmt])" "\n" + " --memPoolSize=poolspec Specify the size constraints of the designated memory pool(s)" "\n" + " Supports the following base-2 suffixes: " << getAvailableUnitSuffixes() << "." "\n" + " If none of suffixes is appended, the defualt unit is in MiB." "\n" + " Note: Also accepts decimal sizes, e.g. 0.25M. Will be rounded down to the nearest integer bytes." "\n" + " In particular, for dlaSRAM the bytes will be rounded down to the nearest power of 2." "\n" + R"( Pool constraint: poolspec ::= poolfmt[","poolspec])" "\n" + " poolfmt ::= pool:size" "\n" + R"( pool ::= "workspace"|"dlaSRAM"|"dlaLocalDRAM"|"dlaGlobalDRAM"|"tacticSharedMem")" "\n" + " --profilingVerbosity=mode Specify profiling verbosity. mode ::= layer_names_only|detailed|none (default = layer_names_only)." "\n" + " Please only assign once." "\n" + " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " + << defaultAvgTiming << ")" "\n" + " --refit Mark the engine as refittable. This will allow the inspection of refittable layers " "\n" + " and weights within the engine." "\n" + " --stripWeights Strip weights from plan. This flag works with either refit or refit with identical weights. Default""\n" + " to latter, but you can switch to the former by enabling both --stripWeights and --refit at the same""\n" + " time." "\n" + " --stripAllWeights Alias for combining the --refit and --stripWeights options. It marks all weights as refittable," "\n" + " disregarding any performance impact. Additionally, it strips all refittable weights after the " "\n" + " engine is built." "\n" + " --weightless [Deprecated] this knob has been deprecated. Please use --stripWeights" "\n" + " --versionCompatible, --vc Mark the engine as version compatible. This allows the engine to be used with newer versions" "\n" + " of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes." "\n" +#if !TRT_WINML + " --pluginInstanceNorm, --pi Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use" "\n" + " a plugin InstanceNorm implementation over the native implementation when parsing." "\n" +#endif + R"( --useRuntime=runtime TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)" "\n" + " not support building an engine." "\n" + R"( runtime::= "full"|"lean"|"dispatch")" "\n" + " --leanDLLPath= External lean runtime DLL to use in version compatiable mode." "\n" + " --excludeLeanRuntime When --versionCompatible is enabled, this flag indicates that the generated engine should" "\n" + " not include an embedded lean runtime. If this is set, the user must explicitly specify a" "\n" + " valid lean runtime to use when loading the engine." "\n" + " --sparsity=spec Control sparsity (default = disabled). " "\n" + R"( Sparsity: spec ::= "disable", "enable", "force")" "\n" + " Note: Description about each of these options is as below" "\n" + " disable = do not enable sparse tactics in the builder (this is the default)" "\n" + " enable = enable sparse tactics in the builder (but these tactics will only be" "\n" + " considered if the weights have the right sparsity pattern)" "\n" + " force = enable sparse tactics in the builder and force-overwrite the weights to have" "\n" + " a sparsity pattern (even if you loaded a model yourself)" "\n" + " [Deprecated] this knob has been deprecated." "\n" + " Please use to rewrite the weights." "\n" + " --noTF32 Disable tf32 precision (default is to enable tf32, in addition to fp32)" "\n" + " --fp16 Enable fp16 precision, in addition to fp32 (default = disabled)" "\n" + " --bf16 Enable bf16 precision, in addition to fp32 (default = disabled)" "\n" + " --int8 Enable int8 precision, in addition to fp32 (default = disabled)" "\n" + " --fp8 Enable fp8 precision, in addition to fp32 (default = disabled)" "\n" + " --int4 Enable int4 precision, in addition to fp32 (default = disabled)" "\n" + " --best Enable all precisions to achieve the best performance (default = disabled)" "\n" + " --stronglyTyped Create a strongly typed network. (default = disabled)" "\n" + " --directIO Avoid reformatting at network boundaries. (default = disabled)" "\n" + " --precisionConstraints=spec Control precision constraint setting. (default = none)" "\n" + R"( Precision Constraints: spec ::= "none" | "obey" | "prefer")" "\n" + " none = no constraints" "\n" + " prefer = meet precision constraints set by --layerPrecisions/--layerOutputTypes if possible" "\n" + " obey = meet precision constraints set by --layerPrecisions/--layerOutputTypes or fail" "\n" + " otherwise" "\n" + " --layerPrecisions=spec Control per-layer precision constraints. Effective only when precisionConstraints is set to" "\n" + R"( "obey" or "prefer". (default = none))" "\n" + R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" + " contain at most one wildcard ('*') character." "\n" + R"( Per-layer precision spec ::= layerPrecision[","spec])" "\n" + R"( layerPrecision ::= layerName":"precision)" "\n" + R"( precision ::= "fp32"|"fp16"|"bf16"|"int32"|"int8")" "\n" + " --layerOutputTypes=spec Control per-layer output type constraints. Effective only when precisionConstraints is set to" "\n" + R"( "obey" or "prefer". (default = none)" "\n" + R"( The specs are read left-to-right, and later ones override earlier ones. Each layer name can)" "\n" + " contain at most one wildcard ('*') character. If a layer has more than" "\n" + R"( one output, then multiple types separated by "+" can be provided for this layer.)" "\n" + R"( Per-layer output type spec ::= layerOutputTypes[","spec])" "\n" + R"( layerOutputTypes ::= layerName":"type)" "\n" + R"( type ::= "fp32"|"fp16"|"bf16"|"int32"|"int8"["+"type])" "\n" + " --layerDeviceTypes=spec Specify layer-specific device type." "\n" + " The specs are read left-to-right, and later ones override earlier ones. If a layer does not have" "\n" + " a device type specified, the layer will opt for the default device type." "\n" + R"( Per-layer device type spec ::= layerDeviceTypePair[","spec])" "\n" + R"( layerDeviceTypePair ::= layerName":"deviceType)" "\n" + R"( deviceType ::= "GPU"|"DLA")" "\n" + " --calib= Read INT8 calibration cache file" "\n" + " --safe Enable build safety certified engine, if DLA is enable, --buildDLAStandalone will be specified" "\n" + " automatically (default = disabled)" "\n" + " --buildDLAStandalone Enable build DLA standalone loadable which can be loaded by cuDLA, when this option is enabled, " "\n" + " --allowGPUFallback is disallowed and --skipInference is enabled by default. Additionally, " "\n" + " specifying --inputIOFormats and --outputIOFormats restricts I/O data type and memory layout" "\n" + " (default = disabled)" "\n" + " --allowGPUFallback When DLA is enabled, allow GPU fallback for unsupported layers (default = disabled)" "\n" + " --restricted Enable safety scope checking with kSAFETY_SCOPE build flag" "\n" + " --saveEngine= Save the serialized engine" "\n" + " --loadEngine= Load a serialized engine" "\n" + " --getPlanVersionOnly Print TensorRT version when loaded plan was created. Works without deserialization of the plan." "\n" + " Use together with --loadEngine. Supported only for engines created with 8.6 and forward." "\n" + " --tacticSources=tactics Specify the tactics to be used by adding (+) or removing (-) tactics from the default " "\n" + " tactic sources (default = all available tactics)." "\n" + " Note: Currently only cuDNN, cuBLAS, cuBLAS-LT, and edge mask convolutions are listed as optional" "\n" + " tactics." "\n" + R"( Tactic Sources: tactics ::= [","tactic])" "\n" + " tactic ::= (+|-)lib" "\n" + R"( lib ::= "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS")" "\n" + R"( |"JIT_CONVOLUTIONS")" "\n" + " For example, to disable cudnn and enable cublas: --tacticSources=-CUDNN,+CUBLAS" "\n" + " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" "\n" + " --noCompilationCache Disable Compilation cache in builder, and the cache is part of timing cache (default is to enable compilation cache)" "\n" + " --errorOnTimingCacheMiss Emit error when a tactic being timed is not present in the timing cache (default = false)" "\n" + " --timingCacheFile= Save/load the serialized global timing cache" "\n" + " --preview=features Specify preview feature to be used by adding (+) or removing (-) preview features from the default" "\n" + R"( Preview Features: features ::= [","feature])" "\n" + " feature ::= (+|-)flag" "\n" + R"( flag ::= "aliasedPluginIO1003")" "\n" + R"( |"profileSharing0806")" "\n" + " --builderOptimizationLevel Set the builder optimization level. (default is 3)" "\n" + " Higher level allows TensorRT to spend more building time for more optimization options." "\n" + " Valid values include integers from 0 to the maximum optimization level, which is currently 5." "\n" + " --maxTactics Set the maximum number of tactics to time when there is a choice of tactics. (default is -1)" "\n" + " Larger number of tactics allow TensorRT to spend more building time on evaluating tactics." "\n" + " Default value -1 means TensorRT can decide the number of tactics based on its own heuristic." "\n" + " --hardwareCompatibilityLevel=mode Make the engine file compatible with other GPU architectures. (default = none)" "\n" + R"( Hardware Compatibility Level: mode ::= "none" | "ampere+")" "\n" + " none = no compatibility" "\n" + " ampere+ = compatible with Ampere and newer GPUs" "\n" + " --runtimePlatform=platform Set the target platform for runtime execution. (default = SameAsBuild)" "\n" + " When this option is enabled, --skipInference is enabled by default." "\n" + R"( RuntimePlatfrom: platform ::= "SameAsBuild" | "WindowsAMD64")" "\n" + " SameAsBuild = no requirement for cross-platform compatibility." "\n" + " WindowsAMD64 = set the target platform for engine execution as Windows AMD64 system" "\n" + " --tempdir= Overrides the default temporary directory TensorRT will use when creating temporary files." "\n" + " See IRuntime::setTemporaryDirectory API documentation for more information." "\n" + " --tempfileControls=controls Controls what TensorRT is allowed to use when creating temporary executable files." "\n" + " Should be a comma-separated list with entries in the format (in_memory|temporary):(allow|deny)." "\n" + " in_memory: Controls whether TensorRT is allowed to create temporary in-memory executable files." "\n" + " temporary: Controls whether TensorRT is allowed to create temporary executable files in the" "\n" + " filesystem (in the directory given by --tempdir)." "\n" + " For example, to allow in-memory files and disallow temporary files:" "\n" + " --tempfileControls=in_memory:allow,temporary:deny" "\n" + R"( If a flag is unspecified, the default behavior is "allow".)" "\n" + " --maxAuxStreams=N Set maximum number of auxiliary streams per inference stream that TRT is allowed to use to run " "\n" + " kernels in parallel if the network contains ops that can run in parallel, with the cost of more " "\n" + " memory usage. Set this to 0 for optimal memory usage. (default = using heuristics)" "\n" + " --profile Build with dynamic shapes using a profile with the min/max/opt shapes provided. Can be specified" "\n" + " multiple times to create multiple profiles with contiguous index." "\n" + " (ex: --profile=0 --minShapes= --optShapes= --maxShapes= --profile=1 ...)" "\n" + " --calibProfile Select the optimization profile to calibrate by index. (default = " + << defaultOptProfileIndex << ")" "\n" + " --allowWeightStreaming Enable a weight streaming engine. Must be specified with --stronglyTyped. TensorRT will disable" "\n" + " weight streaming at runtime unless --weightStreamingBudget is specified." "\n" + " --markDebug Specify list of names of tensors to be marked as debug tensors. Separate names with a comma" "\n" + ; + // clang-format on + os << std::flush; +} + +void SystemOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== System Options ===" << std::endl << + " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << + " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << +#if TRT_WINML + std::endl; +#else + " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << + " --dynamicPlugins Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl << + " --setPluginsToSerialize Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl << + " --ignoreParsedPluginLibs By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl << + " are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl << + " Enable this flag to ignore these plugin libraries instead." << std::endl; +#endif + // clang-format on +} + +void InferenceOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Inference Options ===" << std::endl << + " --shapes=spec Set input shapes for dynamic shapes inference inputs." << std::endl << + R"( Note: Input names can be wrapped with escaped single quotes (ex: 'Input:0').)" << std::endl << + " Example input shapes spec: input0:1x3x256x256, input1:1x3x128x128" << std::endl << + " For scalars (0-D shapes), use input0:scalar or simply input0: with nothing after the colon."<< std::endl << + " Each input shape is supplied as a key-value pair where key is the input name and" << std::endl << + " value is the dimensions (including the batch dimension) to be used for that input." << std::endl << + " Each key-value pair has the key and value separated using a colon (:)." << std::endl << + " Multiple input shapes can be provided via comma-separated key-value pairs, and each input " << std::endl << + " name can contain at most one wildcard ('*') character." << std::endl << + " --loadInputs=spec Load input values from files (default = generate random inputs). Input names can be " + "wrapped with single quotes (ex: 'Input:0')" << std::endl << + R"( Input values spec ::= Ival[","spec])" << std::endl << + R"( Ival ::= name":"file)" << std::endl << + " Consult the README for more information on generating files for custom inputs." << std::endl << + " --iterations=N Run at least N inference iterations (default = " << defaultIterations << ")" << std::endl << + " --warmUp=N Run for N milliseconds to warmup before measuring performance (default = " + << defaultWarmUp << ")" << std::endl << + " --duration=N Run performance measurements for at least N seconds wallclock time (default = " + << defaultDuration << ")" << std::endl << + " If -1 is specified, inference will keep running unless stopped manually" << std::endl << + " --sleepTime=N Delay inference start with a gap of N milliseconds between launch and compute " + "(default = " << defaultSleep << ")" << std::endl << + " --idleTime=N Sleep N milliseconds between two continuous iterations" + "(default = " << defaultIdle << ")" << std::endl << + " --infStreams=N Instantiate N execution contexts to run inference concurrently " + "(default = " << defaultStreams << ")" << std::endl << + " --exposeDMA Serialize DMA transfers to and from device (default = disabled)." << std::endl << + " --noDataTransfers Disable DMA transfers to and from device (default = enabled)." << std::endl << + " --useManagedMemory Use managed memory instead of separate host and device allocations (default = disabled)." << std::endl << + " --useSpinWait Actively synchronize on GPU events. This option may decrease synchronization time but " + "increase CPU usage and power (default = disabled)" << std::endl << + " --threads Enable multithreading to drive engines with independent threads" + " or speed up refitting (default = disabled) " << std::endl << + " --useCudaGraph Use CUDA graph to capture engine execution and then launch inference (default = disabled)." << std::endl << + " This flag may be ignored if the graph capture fails." << std::endl << + " --timeDeserialize Time the amount of time it takes to deserialize the network and exit." << std::endl << + " --timeRefit Time the amount of time it takes to refit the engine before inference." << std::endl << + " --separateProfileRun Do not attach the profiler in the benchmark run; if profiling is enabled, a second " + "profile run will be executed (default = disabled)" << std::endl << + " --skipInference Exit after the engine has been built and skip inference perf measurement " + "(default = disabled)" << std::endl << + " --persistentCacheRatio Set the persistentCacheLimit in ratio, 0.5 represent half of max persistent L2 size " + "(default = 0)" << std::endl << + " --useProfile Set the optimization profile for the inference context " + "(default = " << defaultOptProfileIndex << " )." << std::endl << + " --allocationStrategy=spec Specify how the internal device memory for inference is allocated." << std::endl << + R"( Strategy: spec ::= "static", "profile", "runtime")" << std::endl << + " static = Allocate device memory based on max size across all profiles." << std::endl << + " profile = Allocate device memory based on max size of the current profile." << std::endl << + " runtime = Allocate device memory based on the actual input shapes." << std::endl << + " --saveDebugTensors Specify list of names of tensors to turn on the debug state" << std::endl << + " and filename to save raw outputs to." << std::endl << + " These tensors must be specified as debug tensors during build time." << std::endl << + R"( Input values spec ::= Ival[","spec])" << std::endl << + R"( Ival ::= name":"file)" << std::endl << + " --weightStreamingBudget Set the maximum amount of GPU memory TensorRT is allowed to use for weights." << std::endl << + " It can take on the following values:" << std::endl << + " -2: (default) Disable weight streaming at runtime." << std::endl << + " -1: TensorRT will automatically decide the budget." << std::endl << + " 0-100%: Percentage of streamable weights that reside on the GPU." << std::endl << + " 0% saves the most memory but will have the worst performance." << std::endl << + " Requires the % character." << std::endl << + " >=0B: The exact amount of streamable weights that reside on the GPU. Supports the " << std::endl << + " following base-2 suffixes: " << getAvailableUnitSuffixes() << "." << std::endl; + // clang-format on +} + +void ReportingOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Reporting Options ===" << std::endl << + " --verbose Use verbose logging (default = false)" << std::endl << + " --avgRuns=N Report performance measurements averaged over N consecutive " + "iterations (default = " << defaultAvgRuns << ")" << std::endl << + " --percentile=P1,P2,P3,... Report performance for the P1,P2,P3,... percentages (0<=P_i<=100, 0 " + "representing max perf, and 100 representing min perf; (default" + " = " << joinValuesToString(defaultPercentiles, ",") << "%)" << std::endl << + " --dumpRefit Print the refittable layers and weights from a refittable " + "engine" << std::endl << + " --dumpOutput Print the output tensor(s) of the last inference iteration " + "(default = disabled)" << std::endl << + " --dumpRawBindingsToFile Print the input/output tensor(s) of the last inference iteration to file" + "(default = disabled)" << std::endl << + " --dumpProfile Print profile information per layer (default = disabled)" << std::endl << + " --dumpLayerInfo Print layer information of the engine to console " + "(default = disabled)" << std::endl << + " --dumpOptimizationProfile Print the optimization profile(s) information " + "(default = disabled)" << std::endl << + " --exportTimes= Write the timing results in a json file (default = disabled)" << std::endl << + " --exportOutput= Write the output tensors to a json file (default = disabled)" << std::endl << + " --exportProfile= Write the profile information per layer in a json file " + "(default = disabled)" << std::endl << + " --exportLayerInfo= Write the layer information of the engine in a json file " + "(default = disabled)" << std::endl; + // clang-format on +} + +void TaskInferenceOptions::help(std::ostream& os) +{ + // clang-format off + os << "=== Task Inference Options ===" << std::endl << + " engine= Specify a serialized engine for this task" << std::endl << + " device=N Specify a GPU device for this task" << std::endl << + " DLACore=N Specify a DLACore for this task" << std::endl << + " batch=N Set batch size for implicit batch engines (default = " << defaultBatch << ")" << std::endl << + " This option should not be used for explicit batch engines" << std::endl << + " graph=1 Use cuda graph for this task" << std::endl << + " persistentCacheRatio=[0-1] Set the persistentCacheLimit ratio for this task (default = 0)" << std::endl; + // clang-format on +} + +void helpHelp(std::ostream& os) +{ + // clang-format off + os << "=== Help ===" << std::endl << + " --help, -h Print this message" << std::endl; + // clang-format on +} + +void AllOptions::help(std::ostream& os) +{ + ModelOptions::help(os); + os << std::endl; + BuildOptions::help(os); + os << std::endl; + InferenceOptions::help(os); + os << std::endl; + ReportingOptions::help(os); + os << std::endl; + SystemOptions::help(os); + os << std::endl; + helpHelp(os); +} + +void SafeBuilderOptions::printHelp(std::ostream& os) +{ + // clang-format off + os << "=== Mandatory ===" << std::endl << + " --onnx= ONNX model" << std::endl << + " " << std::endl << + "=== Optional ===" << std::endl << + " --inputIOFormats=spec Type and format of each of the input tensors (default = all inputs in fp32:chw)" << std::endl << + " See --outputIOFormats help for the grammar of type and format list." << std::endl << + " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << + " inputs following the same order as network inputs ID (even if only one input" << std::endl << + " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << + " --outputIOFormats=spec Type and format of each of the output tensors (default = all outputs in fp32:chw)" << std::endl << + " Note: If this option is specified, please set comma-separated types and formats for all" << std::endl << + " outputs following the same order as network outputs ID (even if only one output" << std::endl << + " needs specifying IO format) or set the type and format once for broadcasting." << std::endl << + R"( IO Formats: spec ::= IOfmt[","spec])" << std::endl << + " IOfmt ::= type:fmt" << std::endl << + R"( type ::= "fp32"|"fp16"|"int32"|"int8")" << std::endl << + R"( fmt ::= ("chw"|"chw2"|"chw4"|"hwc8"|"chw16"|"chw32"|"dhwc8"|)" << std::endl << + R"( "cdhw32"|"hwc"|"dla_linear"|"dla_hwc4")["+"fmt])" << std::endl << + " --int8 Enable int8 precision, in addition to fp16 (default = disabled)" << std::endl << + " --std Build standard serialized engine, (default = disabled)" << std::endl << + " --calib= Read INT8 calibration cache file" << std::endl << + " --serialized= Save the serialized network" << std::endl << +#if !TRT_WINML + " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << +#endif + " --verbose or -v Use verbose logging (default = false)" << std::endl << + " --help or -h Print this message" << std::endl << + " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl << + " --timingCacheFile= Save/load the serialized global timing cache" << std::endl << + " --sparsity=spec Control sparsity (default = disabled). " << std::endl << + R"( Sparsity: spec ::= "disable", "enable", "force")" << std::endl << + " Note: Description about each of these options is as below" << std::endl << + " disable = do not enable sparse tactics in the builder (this is the default)" << std::endl << + " enable = enable sparse tactics in the builder (but these tactics will only be" << std::endl << + " considered if the weights have the right sparsity pattern)" << std::endl << + " force = enable sparse tactics in the builder and force-overwrite the weights to have" << std::endl << + " a sparsity pattern" << std::endl << + " --avgTiming=M Set the number of times averaged in each iteration for kernel selection (default = " << std::endl << + "" << defaultAvgTiming << ")" << std::endl << + "" << std::endl; + // clang-format on +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleOptions.h b/src/Detector/tensorrt_onnx/common/sampleOptions.h new file mode 100644 index 000000000..d374b3a4b --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleOptions.h @@ -0,0 +1,495 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_OPTIONS_H +#define TRT_SAMPLE_OPTIONS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NvInfer.h" + +namespace sample +{ + +// Build default params +constexpr int32_t defaultAvgTiming{8}; +constexpr int32_t defaultMaxAuxStreams{-1}; +constexpr int32_t defaultBuilderOptimizationLevel{-1}; +constexpr int32_t defaultMaxTactics{-1}; + +// System default params +constexpr int32_t defaultDevice{0}; + +// Inference default params +constexpr int32_t defaultBatch{1}; +constexpr int32_t batchNotProvided{0}; +constexpr int32_t defaultStreams{1}; +constexpr int32_t defaultIterations{10}; +constexpr int32_t defaultOptProfileIndex{0}; +constexpr float defaultWarmUp{200.F}; +constexpr float defaultDuration{3.F}; +constexpr float defaultSleep{}; +constexpr float defaultIdle{}; +constexpr float defaultPersistentCacheRatio{0}; + +// Reporting default params +constexpr int32_t defaultAvgRuns{10}; +constexpr std::array defaultPercentiles{90, 95, 99}; + +enum class PrecisionConstraints +{ + kNONE, + kOBEY, + kPREFER +}; + +enum class ModelFormat +{ + kANY, + kONNX +}; + +enum class SparsityFlag +{ + kDISABLE, + kENABLE, + kFORCE +}; + +enum class TimingCacheMode +{ + kDISABLE, + kLOCAL, + kGLOBAL +}; + +enum class MemoryAllocationStrategy +{ + kSTATIC, //< Allocate device memory based on max size across all profiles. + kPROFILE, //< Allocate device memory based on max size of the current profile. + kRUNTIME, //< Allocate device memory based on the current input shapes. +}; + +//! +//! \enum RuntimeMode +//! +//! \brief Used to dictate which TensorRT runtime library to dynamically load. +//! +enum class RuntimeMode +{ + //! Maps to libnvinfer.so or nvinfer.dll + kFULL, + + //! Maps to libnvinfer_dispatch.so or nvinfer_dispatch.dll + kDISPATCH, + + //! Maps to libnvinfer_lean.so or nvinfer_lean.dll + kLEAN, +}; + +inline std::ostream& operator<<(std::ostream& os, RuntimeMode const mode) +{ + switch (mode) + { + case RuntimeMode::kFULL: + { + os << "full"; + break; + } + case RuntimeMode::kDISPATCH: + { + os << "dispatch"; + break; + } + case RuntimeMode::kLEAN: + { + os << "lean"; + break; + } + } + + return os; +} + +using Arguments = std::unordered_multimap>; + +using IOFormat = std::pair; + +using ShapeRange = std::array, nvinfer1::EnumMax()>; + +using LayerPrecisions = std::unordered_map; +using LayerOutputTypes = std::unordered_map>; +using LayerDeviceTypes = std::unordered_map; + +using StringSet = std::unordered_set; + +class WeightStreamingBudget +{ +public: + static constexpr int64_t kDISABLE{-2}; + static constexpr int64_t kAUTOMATIC{-1}; + int64_t bytes{kDISABLE}; + double percent{static_cast(100.0)}; + + bool isDisabled() + { + return bytes == kDISABLE && percent == kDISABLE; + } +}; + +class Options +{ +public: + virtual ~Options() = default; + virtual void parse(Arguments& arguments) = 0; +}; + +class BaseModelOptions : public Options +{ +public: + ModelFormat format{ModelFormat::kANY}; + std::string model; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class ModelOptions : public Options +{ +public: + BaseModelOptions baseModel; + std::string prototxt; + std::vector outputs; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults() +{ + using F = nvinfer1::TempfileControlFlag; + return (1U << static_cast(F::kALLOW_TEMPORARY_FILES)) + | (1U << static_cast(F::kALLOW_IN_MEMORY_FILES)); +} + +class BuildOptions : public Options +{ +public: + // Unit in MB. + double workspace{-1.0}; + // Unit in MB. + double dlaSRAM{-1.0}; + // Unit in MB. + double dlaLocalDRAM{-1.0}; + // Unit in MB. + double dlaGlobalDRAM{-1.0}; + // Unit in KB. + double tacticSharedMem{-1.0}; + int32_t avgTiming{defaultAvgTiming}; + size_t calibProfile{defaultOptProfileIndex}; + bool tf32{true}; + bool fp16{false}; + bool bf16{false}; + bool int8{false}; + bool fp8{false}; + bool int4{false}; + bool stronglyTyped{false}; + bool directIO{false}; + PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE}; + LayerPrecisions layerPrecisions; + LayerOutputTypes layerOutputTypes; + LayerDeviceTypes layerDeviceTypes; + StringSet debugTensors; + StringSet debugTensorStates; + bool safe{false}; + bool buildDLAStandalone{false}; + bool allowGPUFallback{false}; + bool restricted{false}; + bool skipInference{false}; + bool save{false}; + bool load{false}; + bool refittable{false}; + bool stripWeights{false}; + bool versionCompatible{false}; + bool pluginInstanceNorm{false}; + bool excludeLeanRuntime{false}; + bool disableCompilationCache{false}; + int32_t builderOptimizationLevel{defaultBuilderOptimizationLevel}; + int32_t maxTactics{defaultMaxTactics}; + SparsityFlag sparsity{SparsityFlag::kDISABLE}; + nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; + std::string engine; + std::string calibration; + using ShapeProfile = std::unordered_map; + std::vector optProfiles; + ShapeProfile shapesCalib; + std::vector inputFormats; + std::vector outputFormats; + nvinfer1::TacticSources enabledTactics{0}; + nvinfer1::TacticSources disabledTactics{0}; + TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; + std::string timingCacheFile{}; + bool errorOnTimingCacheMiss{false}; + // C++11 does not automatically generate hash function for enum class. + // Use int32_t to support C++11 compilers. + std::unordered_map previewFeatures; + nvinfer1::HardwareCompatibilityLevel hardwareCompatibilityLevel{nvinfer1::HardwareCompatibilityLevel::kNONE}; +#if (NV_TENSORRT_MAJOR > 8) + nvinfer1::RuntimePlatform runtimePlatform{nvinfer1::RuntimePlatform::kSAME_AS_BUILD}; +#endif + std::string tempdir{}; + nvinfer1::TempfileControlFlags tempfileControls{getTempfileControlDefaults()}; + RuntimeMode useRuntime{RuntimeMode::kFULL}; + std::string leanDLLPath{}; + int32_t maxAuxStreams{defaultMaxAuxStreams}; + bool getPlanVersionOnly{false}; + + bool allowWeightStreaming{false}; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class SystemOptions : public Options +{ +public: + int32_t device{defaultDevice}; + int32_t DLACore{-1}; + bool ignoreParsedPluginLibs{false}; + std::vector plugins; + std::vector setPluginsToSerialize; + std::vector dynamicPlugins; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class InferenceOptions : public Options +{ +public: + int32_t batch{batchNotProvided}; + int32_t iterations{defaultIterations}; + int32_t infStreams{defaultStreams}; + int32_t optProfileIndex{defaultOptProfileIndex}; + float warmup{defaultWarmUp}; + float duration{defaultDuration}; + float sleep{defaultSleep}; + float idle{defaultIdle}; + float persistentCacheRatio{defaultPersistentCacheRatio}; + bool overlap{true}; + bool skipTransfers{false}; + bool useManaged{false}; + bool spin{false}; + bool threads{false}; + bool graph{false}; + bool rerun{false}; + bool timeDeserialize{false}; + bool timeRefit{false}; + bool setOptProfile{false}; + std::unordered_map inputs; + using ShapeProfile = std::unordered_map>; + ShapeProfile shapes; + nvinfer1::ProfilingVerbosity nvtxVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY}; + MemoryAllocationStrategy memoryAllocationStrategy{MemoryAllocationStrategy::kSTATIC}; + std::unordered_map debugTensorFileNames; + + WeightStreamingBudget weightStreamingBudget; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class ReportingOptions : public Options +{ +public: + bool verbose{false}; + int32_t avgs{defaultAvgRuns}; + std::vector percentiles{defaultPercentiles.begin(), defaultPercentiles.end()}; + bool refit{false}; + bool output{false}; + bool dumpRawBindings{false}; + bool profile{false}; + bool layerInfo{false}; + bool optProfileInfo{false}; + std::string exportTimes; + std::string exportOutput; + std::string exportProfile; + std::string exportLayerInfo; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class SafeBuilderOptions : public Options +{ +public: + std::string serialized{}; + std::string onnxModelFile{}; + bool help{false}; + bool verbose{false}; + std::vector inputFormats; + std::vector outputFormats; + bool int8{false}; + bool fp8{false}; + bool int4{false}; + std::string calibFile{}; + std::vector plugins; + bool standard{false}; + TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL}; + std::string timingCacheFile{}; + SparsityFlag sparsity{SparsityFlag::kDISABLE}; + int32_t avgTiming{defaultAvgTiming}; + + void parse(Arguments& arguments) override; + + static void printHelp(std::ostream& out); +}; + +class AllOptions : public Options +{ +public: + ModelOptions model; + BuildOptions build; + SystemOptions system; + InferenceOptions inference; + ReportingOptions reporting; + bool helps{false}; + + void parse(Arguments& arguments) override; + + static void help(std::ostream& out); +}; + +class TaskInferenceOptions : public Options +{ +public: + std::string engine; + int32_t device{defaultDevice}; + int32_t DLACore{-1}; + int32_t batch{batchNotProvided}; + bool graph{false}; + float persistentCacheRatio{defaultPersistentCacheRatio}; + void parse(Arguments& arguments) override; + static void help(std::ostream& out); +}; + + +Arguments argsToArgumentsMap(int32_t argc, char* argv[]); + +bool parseHelp(Arguments& arguments); + +void helpHelp(std::ostream& out); + +// Functions to print options + +std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options); + +std::ostream& operator<<(std::ostream& os, const IOFormat& format); + +std::ostream& operator<<(std::ostream& os, const ShapeRange& dims); + +std::ostream& operator<<(std::ostream& os, const ModelOptions& options); + +std::ostream& operator<<(std::ostream& os, const BuildOptions& options); + +std::ostream& operator<<(std::ostream& os, const SystemOptions& options); + +std::ostream& operator<<(std::ostream& os, const InferenceOptions& options); + +std::ostream& operator<<(std::ostream& os, const ReportingOptions& options); + +std::ostream& operator<<(std::ostream& os, const AllOptions& options); + +std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options); + +std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype); + +std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType); + +inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) +{ + for (int32_t i = 0; i < dims.nbDims; ++i) + { + os << (i ? "x" : "") << dims.d[i]; + } + return os; +} +inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role) +{ + switch (role) + { + case nvinfer1::WeightsRole::kKERNEL: + { + os << "Kernel"; + break; + } + case nvinfer1::WeightsRole::kBIAS: + { + os << "Bias"; + break; + } + case nvinfer1::WeightsRole::kSHIFT: + { + os << "Shift"; + break; + } + case nvinfer1::WeightsRole::kSCALE: + { + os << "Scale"; + break; + } + case nvinfer1::WeightsRole::kCONSTANT: + { + os << "Constant"; + break; + } + case nvinfer1::WeightsRole::kANY: + { + os << "Any"; + break; + } + } + + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const std::vector& vec) +{ + for (int32_t i = 0, e = static_cast(vec.size()); i < e; ++i) + { + os << (i ? "x" : "") << vec[i]; + } + return os; +} + +} // namespace sample + +#endif // TRT_SAMPLES_OPTIONS_H diff --git a/src/Detector/tensorrt_onnx/common/sampleReporting.cpp b/src/Detector/tensorrt_onnx/common/sampleReporting.cpp new file mode 100644 index 000000000..e9dda6e0a --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleReporting.cpp @@ -0,0 +1,609 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "sampleInference.h" +#include "sampleOptions.h" +#include "sampleReporting.h" + +using namespace nvinfer1; + +namespace sample +{ + +namespace +{ + +//! +//! \brief Find percentile in an ascending sequence of timings +//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown. +//! +template +float findPercentile(float percentile, std::vector const& timings, T const& toFloat) +{ + int32_t const all = static_cast(timings.size()); + int32_t const exclude = static_cast((1 - percentile / 100) * all); + if (timings.empty()) + { + return std::numeric_limits::infinity(); + } + if (percentile < 0.F || percentile > 100.F) + { + throw std::runtime_error("percentile is not in [0, 100]!"); + } + return toFloat(timings[std::max(all - 1 - exclude, 0)]); +} + +//! +//! \brief Find median in a sorted sequence of timings +//! +template +float findMedian(std::vector const& timings, T const& toFloat) +{ + if (timings.empty()) + { + return std::numeric_limits::infinity(); + } + + int32_t const m = timings.size() / 2; + if (timings.size() % 2) + { + return toFloat(timings[m]); + } + + return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2; +} + +//! +//! \brief Find coefficient of variance (which is std / mean) in a sorted sequence of timings given the mean +//! +template +float findCoeffOfVariance(std::vector const& timings, T const& toFloat, float mean) +{ + if (timings.empty()) + { + return 0; + } + + if (mean == 0.F) + { + return std::numeric_limits::infinity(); + } + + auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) { + float const diff = toFloat(a) - mean; + return acc + diff * diff; + }; + float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size(); + + return std::sqrt(variance) / mean * 100.F; +} + +inline InferenceTime traceToTiming(const InferenceTrace& a) +{ + return InferenceTime( + (a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart), (a.d2hEnd - a.d2hStart)); +} + +inline std::string dimsToString(Dims const& shape) +{ + std::stringstream ss; + + if (shape.nbDims == 0) + { + ss << "scalar"; + } + else + { + for (int32_t i = 0; i < shape.nbDims; i++) + { + ss << shape.d[i] << (i != shape.nbDims - 1 ? "x" : ""); + } + } + return ss.str(); +} + +} // namespace + +void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os) +{ + os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl; + os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl; +} + +void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os) +{ + int64_t count = 0; + InferenceTime sum; + + os << std::endl; + os << "=== Trace details ===" << std::endl; + os << "Trace averages of " << runsPerAvg << " runs:" << std::endl; + + // Show only the first N lines and the last N lines, where N = kTIMING_PRINT_THRESHOLD. + constexpr int64_t kTIMING_PRINT_THRESHOLD{200}; + int64_t const maxNbTimings{kTIMING_PRINT_THRESHOLD * runsPerAvg}; + + for (int64_t idx = 0, size = timings.size(); idx < size; ++idx) + { + // Omit some latency printing to avoid very long logs. + if (size > 2 * maxNbTimings && idx == maxNbTimings) + { + os << "... Omitting " << (size - 2 * maxNbTimings) << " lines" << std::endl; + idx = size - kTIMING_PRINT_THRESHOLD * runsPerAvg - 1; + } + + sum += timings[idx]; + + if (++count == runsPerAvg) + { + // clang-format off + os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg + << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg + << " ms)" << std::endl; + // clang-format on + count = 0; + sum.enq = 0; + sum.h2d = 0; + sum.compute = 0; + sum.d2h = 0; + } + } +} + +void printMetricExplanations(std::ostream& os) +{ + os << std::endl; + os << "=== Explanations of the performance metrics ===" << std::endl; + os << "Total Host Walltime: the host walltime from when the first query (after warmups) is enqueued to when the " + "last query is completed." + << std::endl; + os << "GPU Compute Time: the GPU latency to execute the kernels for a query." << std::endl; + os << "Total GPU Compute Time: the summation of the GPU Compute Time of all the queries. If this is significantly " + "shorter than Total Host Walltime, the GPU may be under-utilized because of host-side overheads or data " + "transfers." + << std::endl; + os << "Throughput: the observed throughput computed by dividing the number of queries by the Total Host Walltime. " + "If this is significantly lower than the reciprocal of GPU Compute Time, the GPU may be under-utilized " + "because of host-side overheads or data transfers." + << std::endl; + os << "Enqueue Time: the host latency to enqueue a query. If this is longer than GPU Compute Time, the GPU may be " + "under-utilized." + << std::endl; + os << "H2D Latency: the latency for host-to-device data transfers for input tensors of a single query." + << std::endl; + os << "D2H Latency: the latency for device-to-host data transfers for output tensors of a single query." + << std::endl; + os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a " + "single query." + << std::endl; +} + +PerformanceResult getPerformanceResult(std::vector const& timings, + std::function metricGetter, std::vector const& percentiles) +{ + auto const metricComparator + = [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); }; + auto const metricAccumulator = [metricGetter](float acc, InferenceTime const& a) { return acc + metricGetter(a); }; + std::vector newTimings = timings; + std::sort(newTimings.begin(), newTimings.end(), metricComparator); + PerformanceResult result; + result.min = metricGetter(newTimings.front()); + result.max = metricGetter(newTimings.back()); + result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) / newTimings.size(); + result.median = findMedian(newTimings, metricGetter); + for (auto percentile : percentiles) + { + result.percentiles.emplace_back(findPercentile(percentile, newTimings, metricGetter)); + } + result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean); + return result; +} + +void printEpilog(std::vector const& timings, float walltimeMs, std::vector const& percentiles, + int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) +{ + float const throughput = batchSize * timings.size() / walltimeMs * 1000; + + auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; + auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles); + + auto const getEnqueue = [](InferenceTime const& t) { return t.enq; }; + auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentiles); + + auto const getH2d = [](InferenceTime const& t) { return t.h2d; }; + auto const h2dResult = getPerformanceResult(timings, getH2d, percentiles); + + auto const getCompute = [](InferenceTime const& t) { return t.compute; }; + auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentiles); + + auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; + auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles); + + auto const toPerfString = [&](const PerformanceResult& r) { + std::stringstream s; + s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, " + << "median = " << r.median << " ms"; + for (int32_t i = 0, n = percentiles.size(); i < n; ++i) + { + s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms"; + } + return s.str(); + }; + + osInfo << std::endl; + osInfo << "=== Performance summary ===" << std::endl; + osInfo << "Throughput: " << throughput << " qps" << std::endl; + osInfo << "Latency: " << toPerfString(latencyResult) << std::endl; + osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl; + osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl; + osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; + osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; + osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; + osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; + + // Report warnings if the throughput is bound by other factors than GPU Compute Time. + constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; + if (enqueueResult.median > kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median) + { + osWarning + << "* Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized." + << std::endl; + osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the " + "throughput." + << std::endl; + } + if (h2dResult.median >= gpuComputeResult.median) + { + osWarning << "* Throughput may be bound by host-to-device transfers for the inputs rather than GPU Compute and " + "the GPU may be under-utilized." + << std::endl; + osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl; + } + if (d2hResult.median >= gpuComputeResult.median) + { + osWarning << "* Throughput may be bound by device-to-host transfers for the outputs rather than GPU Compute " + "and the GPU may be under-utilized." + << std::endl; + osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl; + } + + // Report warnings if the GPU Compute Time is unstable. + constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F}; + if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD) + { + osWarning << "* GPU compute time is unstable, with coefficient of variance = " << gpuComputeResult.coeffVar + << "%." << std::endl; + osWarning << " If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the " + << "stability." << std::endl; + } + + // Report warnings if multiple inference streams are used. + if (infStreams > 1) + { + osWarning << "* Multiple inference streams are used. Latencies may not be accurate since inferences may run in " + << " parallel. Please use \"Throughput\" as the performance metric instead." << std::endl; + } + + // Explain what the metrics mean. + osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl; + printMetricExplanations(osVerbose); + + osInfo << std::endl; +} + +void printPerformanceReport(std::vector const& trace, ReportingOptions const& reportingOpts, + InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) +{ + int32_t batchSize = infOpts.batch; + float const warmupMs = infOpts.warmup; + auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; }; + auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); + int32_t const warmups = noWarmup - trace.begin(); + float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; + // treat inference with explicit batch as a single query and report the throughput + batchSize = batchSize ? batchSize : 1; + printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); + + std::vector timings(trace.size() - warmups); + std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); + printTiming(timings, reportingOpts.avgs, osInfo); + printEpilog( + timings, benchTime, reportingOpts.percentiles, batchSize, infOpts.infStreams, osInfo, osWarning, osVerbose); + + if (!reportingOpts.exportTimes.empty()) + { + exportJSONTrace(trace, reportingOpts.exportTimes, warmups); + } +} + +//! Printed format: +//! [ value, ...] +//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time, +//! "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time, +//! "d2h" : time, "latency" : time } +//! +void exportJSONTrace(std::vector const& trace, std::string const& fileName, int32_t const nbWarmups) +{ + std::ofstream os(fileName, std::ofstream::trunc); + os << "[" << std::endl; + char const* sep = " "; + for (auto iter = trace.begin() + nbWarmups; iter < trace.end(); ++iter) + { + auto const& t = *iter; + InferenceTime const it(traceToTiming(t)); + os << sep << "{ "; + sep = ", "; + // clang-format off + os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep + << "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep + << "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep + << "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep + << "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep + << "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << " }" + << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept +{ + if (mIterator == mLayers.end()) + { + bool const first = !mLayers.empty() && mLayers.begin()->name == layerName; + mUpdatesCount += mLayers.empty() || first; + if (first) + { + mIterator = mLayers.begin(); + } + else + { + mLayers.emplace_back(); + mLayers.back().name = layerName; + mIterator = mLayers.end() - 1; + } + } + + mIterator->timeMs.push_back(timeMs); + ++mIterator; +} + +void Profiler::print(std::ostream& os) const noexcept +{ + std::string const nameHdr(" Layer"); + std::string const timeHdr(" Time(ms)"); + std::string const avgHdr(" Avg.(ms)"); + std::string const medHdr(" Median(ms)"); + std::string const percentageHdr(" Time(%)"); + + float const totalTimeMs = getTotalTime(); + + auto const timeLength = timeHdr.size(); + auto const avgLength = avgHdr.size(); + auto const medLength = medHdr.size(); + auto const percentageLength = percentageHdr.size(); + + os << std::endl + << "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl + << timeHdr << avgHdr << medHdr << percentageHdr << nameHdr << std::endl; + + for (auto const& p : mLayers) + { + if (p.timeMs.empty() || getTotalTime(p) == 0.F) + { + // there is no point to print profiling for layer that didn't run at all + continue; + } + // clang-format off + os << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p) + << std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p) + << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p) + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 + << " " << p.name << std::endl; + } + { + os << std::setw(timeLength) << std::fixed << std::setprecision(2) + << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount + << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime() + << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 + << " Total" << std::endl; + // clang-format on + } + os << std::endl; +} + +void Profiler::exportJSONProfile(std::string const& fileName) const noexcept +{ + std::ofstream os(fileName, std::ofstream::trunc); + os << "[" << std::endl << " { \"count\" : " << mUpdatesCount << " }" << std::endl; + + auto const totalTimeMs = getTotalTime(); + + for (auto const& l : mLayers) + { + // clang-format off + os << ", {" << R"( "name" : ")" << l.name << R"(")" + R"(, "timeMs" : )" << getTotalTime(l) + << R"(, "averageMs" : )" << getAvgTime(l) + << R"(, "medianMs" : )" << getMedianTime(l) + << R"(, "percentage" : )" << getTotalTime(l) / totalTimeMs * 100 + << " }" << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) +{ + os << "Input Tensors:" << std::endl; + bindings.dumpInputs(context, os); +} + +void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) +{ + auto isOutput = [](Binding const& b) { return !b.isInput; }; + bindings.dumpBindings(context, isOutput, os); +} + +void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os) +{ + bindings.dumpRawBindingToFiles(context, os); +} + +void exportJSONOutput( + nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch) +{ + std::ofstream os(fileName, std::ofstream::trunc); + std::string sep = " "; + auto const output = bindings.getOutputBindings(); + os << "[" << std::endl; + for (auto const& binding : output) + { + // clang-format off + os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl; + sep = ", "; + os << " " << sep << R"("dimensions" : ")"; + bindings.dumpBindingDimensions(binding.first, context, os); + os << "\"" << std::endl; + os << " " << sep << "\"values\" : [ "; + bindings.dumpBindingValues(context, binding.second, os, sep, batch); + os << " ]" << std::endl << " }" << std::endl; + // clang-format on + } + os << "]" << std::endl; +} + +void exportJSONOutput( + nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch); + +void printLayerInfo( + ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context) +{ + if (reporting.layerInfo) + { + sample::gLogInfo << "Layer Information:" << std::endl; + sample::gLogInfo << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kONELINE) + << std::flush; + } + if (!reporting.exportLayerInfo.empty()) + { + std::ofstream os(reporting.exportLayerInfo, std::ofstream::trunc); + os << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kJSON) << std::flush; + } +} + +void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine) +{ + if (reporting.optProfileInfo) + { + sample::gLogInfo << "Optimization Profile Information:" << std::endl; + for (int32_t i = 0; i < engine->getNbOptimizationProfiles(); i++) + { + for (int32_t j = 0, e = engine->getNbIOTensors(); j < e; j++) + { + auto const tensorName = engine->getIOTensorName(j); + + if (engine->getTensorIOMode(tensorName) == nvinfer1::TensorIOMode::kINPUT) + { + auto tensorMinShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMIN); + auto tensorOptShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kOPT); + auto tensorMaxShape = engine->getProfileShape(tensorName, i, nvinfer1::OptProfileSelector::kMAX); + + sample::gLogInfo << "Model input " << tensorName << " (profile " << i << "): " + << "min=" << dimsToString(tensorMinShape) + << ", opt=" << dimsToString(tensorOptShape) + << ", max=" << dimsToString(tensorMaxShape) << std::endl; + } + } + } + } +} + +void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv) +{ + if (reporting.profile) + { + iEnv.profiler->print(sample::gLogInfo); + } + if (!reporting.exportProfile.empty()) + { + iEnv.profiler->exportJSONProfile(reporting.exportProfile); + } + + // Print an warning about total per-layer latency when auxiliary streams are used. + if (!iEnv.safe && (reporting.profile || !reporting.exportProfile.empty())) + { + int32_t const nbAuxStreams = iEnv.engine.get()->getNbAuxStreams(); + if (nbAuxStreams > 0) + { + sample::gLogWarning << "The engine uses " << nbAuxStreams << " auxiliary streams, so the \"Total\" latency " + << "may not be accurate because some layers may have run in parallel!" << std::endl; + } + } +} + +namespace details +{ +void dump(std::unique_ptr const& context, std::unique_ptr const& binding, + ReportingOptions const& reporting, int32_t batch) +{ + if (!context) + { + sample::gLogError << "Empty context! Skip printing outputs." << std::endl; + return; + } + if (reporting.output) + { + dumpOutputs(*context, *binding, sample::gLogInfo); + } + if (reporting.dumpRawBindings) + { + dumpRawBindingsToFiles(*context, *binding, sample::gLogInfo); + } + if (!reporting.exportOutput.empty()) + { + exportJSONOutput(*context, *binding, reporting.exportOutput, batch); + } +} +} // namespace details + +void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch) +{ + auto const& binding = iEnv.bindings.at(0); + if (!binding) + { + sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl; + return; + } + if (iEnv.safe) + { + sample::gLogError << "Safe inferernce is not supported!" << std::endl; + return; + } + auto const& context = iEnv.contexts.at(0); + details::dump(context, binding, reporting, batch); +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleReporting.h b/src/Detector/tensorrt_onnx/common/sampleReporting.h new file mode 100644 index 000000000..922ef3c8b --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleReporting.h @@ -0,0 +1,298 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_REPORTING_H +#define TRT_SAMPLE_REPORTING_H + +#include +#include +#include + +#include "sampleOptions.h" + +namespace sample +{ + +class Bindings; + +//! +//! \struct InferenceTime +//! \brief Measurement times in milliseconds +//! +struct InferenceTime +{ + InferenceTime(float q, float i, float c, float o) + : enq(q) + , h2d(i) + , compute(c) + , d2h(o) + { + } + + InferenceTime() = default; + InferenceTime(InferenceTime const&) = default; + InferenceTime(InferenceTime&&) = default; + InferenceTime& operator=(InferenceTime const&) = default; + InferenceTime& operator=(InferenceTime&&) = default; + ~InferenceTime() = default; + + float enq{0}; // Enqueue + float h2d{0}; // Host to Device + float compute{0}; // Compute + float d2h{0}; // Device to Host + + // ideal latency + float latency() const + { + return h2d + compute + d2h; + } +}; + +//! +//! \struct InferenceTrace +//! \brief Measurement points in milliseconds +//! +struct InferenceTrace +{ + InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe) + : stream(s) + , enqStart(es) + , enqEnd(ee) + , h2dStart(is) + , h2dEnd(ie) + , computeStart(cs) + , computeEnd(ce) + , d2hStart(os) + , d2hEnd(oe) + { + } + + InferenceTrace() = default; + InferenceTrace(InferenceTrace const&) = default; + InferenceTrace(InferenceTrace&&) = default; + InferenceTrace& operator=(InferenceTrace const&) = default; + InferenceTrace& operator=(InferenceTrace&&) = default; + ~InferenceTrace() = default; + + int32_t stream{0}; + float enqStart{0}; + float enqEnd{0}; + float h2dStart{0}; + float h2dEnd{0}; + float computeStart{0}; + float computeEnd{0}; + float d2hStart{0}; + float d2hEnd{0}; +}; + +inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b) +{ + return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h); +} + +inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b) +{ + return a = a + b; +} + +//! +//! \struct PerformanceResult +//! \brief Performance result of a performance metric +//! +struct PerformanceResult +{ + float min{0.F}; + float max{0.F}; + float mean{0.F}; + float median{0.F}; + std::vector percentiles; + float coeffVar{0.F}; // coefficient of variation +}; + +//! +//! \brief Print benchmarking time and number of traces collected +//! +void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os); + +//! +//! \brief Print a timing trace +//! +void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os); + +//! +//! \brief Print the performance summary of a trace +//! +void printEpilog(std::vector const& timings, std::vector const& percentiles, int32_t batchSize, + std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); + +//! +//! \brief Get the result of a specific performance metric from a trace +//! +PerformanceResult getPerformanceResult(std::vector const& timings, + std::function metricGetter, std::vector const& percentiles); + +//! +//! \brief Print the explanations of the performance metrics printed in printEpilog() function. +//! +void printMetricExplanations(std::ostream& os); + +//! +//! \brief Print and summarize a timing trace +//! +void printPerformanceReport(std::vector const& trace, ReportingOptions const& reportingOpts, + InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose); + +//! +//! \brief Export a timing trace to JSON file +//! +void exportJSONTrace( + std::vector const& InferenceTime, std::string const& fileName, int32_t const nbWarmups); + +//! +//! \brief Print input tensors to stream +//! +void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); + +//! +//! \brief Print output tensors to stream +//! +void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); + +void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os); + +//! +//! \brief Export output tensors to JSON file +//! +void exportJSONOutput( + nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch); + +//! +//! \struct LayerProfile +//! \brief Layer profile information +//! +struct LayerProfile +{ + std::string name; + std::vector timeMs; +}; + +//! +//! \class Profiler +//! \brief Collect per-layer profile information, assuming times are reported in the same order +//! +class Profiler : public nvinfer1::IProfiler +{ + +public: + void reportLayerTime(char const* layerName, float timeMs) noexcept override; + + void print(std::ostream& os) const noexcept; + + //! + //! \brief Export a profile to JSON file + //! + void exportJSONProfile(std::string const& fileName) const noexcept; + +private: + float getTotalTime() const noexcept + { + auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { + return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); + }; + return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime); + } + + float getMedianTime() const noexcept + { + if (mLayers.empty()) + { + return 0.F; + } + std::vector totalTime; + for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run) + { + auto const layerTime + = [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; }; + auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime); + totalTime.push_back(t); + } + return median(totalTime); + } + + float getMedianTime(LayerProfile const& p) const noexcept + { + return median(p.timeMs); + } + + static float median(std::vector vals) + { + if (vals.empty()) + { + return 0.F; + } + std::sort(vals.begin(), vals.end()); + if (vals.size() % 2U == 1U) + { + return vals[vals.size() / 2U]; + } + return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F; + } + + //! return the total runtime of given layer profile + float getTotalTime(LayerProfile const& p) const noexcept + { + auto const& vals = p.timeMs; + return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus()); + } + + float getAvgTime(LayerProfile const& p) const noexcept + { + return getTotalTime(p) / p.timeMs.size(); + } + + std::vector mLayers; + std::vector::iterator mIterator{mLayers.begin()}; + int32_t mUpdatesCount{0}; +}; + +//! +//! \brief Print layer info to logger or export it to output JSON file. +//! +void printLayerInfo( + ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context); + +//! +//! \brief Print optimization profile info to logger. +//! +void printOptimizationProfileInfo(ReportingOptions const& reporting, nvinfer1::ICudaEngine const* engine); + +//! Forward declaration. +struct InferenceEnvironment; + +//! +//! \brief Print per-layer perf profile data to logger or export it to output JSON file. +//! +void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv); + +//! +//! \brief Print binding output values to logger or export them to output JSON file. +//! +void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch); + +} // namespace sample + +#endif // TRT_SAMPLE_REPORTING_H diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.cpp b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp new file mode 100644 index 000000000..89a128ee1 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.cpp @@ -0,0 +1,608 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "sampleUtils.h" +#include "bfloat16.h" +#include "half.h" +#include + +using namespace nvinfer1; + +namespace sample +{ + +size_t dataTypeSize(nvinfer1::DataType dataType) +{ + switch (dataType) + { +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT64: return 8U; +#endif + case nvinfer1::DataType::kINT32: + case nvinfer1::DataType::kFLOAT: return 4U; +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kBF16: +#endif + case nvinfer1::DataType::kHALF: return 2U; + case nvinfer1::DataType::kBOOL: + case nvinfer1::DataType::kUINT8: + case nvinfer1::DataType::kINT8: + case nvinfer1::DataType::kFP8: return 1U; +#if (NV_TENSORRT_MAJOR > 8) + case nvinfer1::DataType::kINT4: + ASSERT(false && "Element size is not implemented for sub-byte data-types."); +#endif + } + return 0; +} + +int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch) +{ + int64_t maxNbElems = 1; + for (int32_t i = 0; i < dims.nbDims; ++i) + { + // Get effective length of axis. + int64_t d = dims.d[i]; + // Any dimension is 0, it is an empty tensor. + if (d == 0) + { + return 0; + } + if (i == vecDim) + { + d = samplesCommon::divUp(d, comps); + } + maxNbElems = std::max(maxNbElems, d * strides.d[i]); + } + return maxNbElems * batch * (vecDim < 0 ? 1 : comps); +} + +nvinfer1::Dims toDims(std::vector const& vec) +{ + int32_t limit = static_cast(nvinfer1::Dims::MAX_DIMS); + if (static_cast(vec.size()) > limit) + { + sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl; + } + // Pick first nvinfer1::Dims::MAX_DIMS elements + nvinfer1::Dims dims{std::min(static_cast(vec.size()), limit), {}}; + std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d)); + return dims; +} + +void loadFromFile(std::string const& fileName, char* dst, size_t size) +{ + ASSERT(dst); + + std::ifstream file(fileName, std::ios::in | std::ios::binary); + if (file.is_open()) + { + file.seekg(0, std::ios::end); + int64_t fileSize = static_cast(file.tellg()); + // Due to change from int32_t to int64_t VC engines created with earlier versions + // may expect input of the half of the size + if (fileSize != static_cast(size) && fileSize != static_cast(size * 2)) + { + std::ostringstream msg; + msg << "Unexpected file size for input file: " << fileName << ". Note: Input binding size is: " << size + << " bytes but the file size is " << fileSize + << " bytes. Double check the size and datatype of the provided data."; + throw std::invalid_argument(msg.str()); + } + // Move file pointer back to the beginning after reading file size. + file.seekg(0, std::ios::beg); + file.read(dst, size); + size_t const nbBytesRead = file.gcount(); + file.close(); + if (nbBytesRead != size) + { + std::ostringstream msg; + msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size + << " bytes but only read: " << nbBytesRead << " bytes"; + throw std::invalid_argument(msg.str()); + } + } + else + { + std::ostringstream msg; + msg << "Cannot open file " << fileName << "!"; + throw std::invalid_argument(msg.str()); + } +} + +std::vector splitToStringVec(std::string const& s, char separator, int64_t maxSplit) +{ + std::vector splitted; + + for (size_t start = 0; start < s.length();) + { + // If maxSplit is specified and we have reached maxSplit, emplace back the rest of the string and break the + // loop. + if (maxSplit >= 0 && static_cast(splitted.size()) == maxSplit) + { + splitted.emplace_back(s.substr(start, s.length() - start)); + break; + } + + size_t separatorIndex = s.find(separator, start); + if (separatorIndex == std::string::npos) + { + separatorIndex = s.length(); + } + splitted.emplace_back(s.substr(start, separatorIndex - start)); + + // If the separator is the last character, then we should push an empty string at the end. + if (separatorIndex == s.length() - 1) + { + splitted.emplace_back(""); + } + + start = separatorIndex + 1; + } + + return splitted; +} + +bool broadcastIOFormats(std::vector const& formats, size_t nbBindings, bool isInput /*= true*/) +{ + bool broadcast = formats.size() == 1; + bool validFormatsCount = broadcast || (formats.size() == nbBindings); + if (!formats.empty() && !validFormatsCount) + { + if (isInput) + { + throw std::invalid_argument( + "The number of inputIOFormats must match network's inputs or be one for broadcasting."); + } + + throw std::invalid_argument( + "The number of outputIOFormats must match network's outputs or be one for broadcasting."); + } + return broadcast; +} + +void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) +{ + using TensorToLayer = std::unordered_map; + using LayerToTensor = std::unordered_map; + + // 1. Collect layers and tensors information from the network. + TensorToLayer matmulI2L; + TensorToLayer constO2L; + TensorToLayer shuffleI2L; + LayerToTensor shuffleL2O; + auto collectMappingInfo = [&](int32_t const idx) + { + ILayer* l = network.getLayer(idx); + switch (l->getType()) + { + case nvinfer1::LayerType::kMATRIX_MULTIPLY: + { + // assume weights on the second input. + matmulI2L.insert({l->getInput(1), l}); + break; + } + case nvinfer1::LayerType::kCONSTANT: + { + DataType const dtype = static_cast(l)->getWeights().type; + if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF) + { + // Sparsify float only. + constO2L.insert({l->getOutput(0), l}); + } + break; + } + case nvinfer1::LayerType::kSHUFFLE: + { + shuffleI2L.insert({l->getInput(0), l}); + shuffleL2O.insert({l, l->getOutput(0)}); + break; + } + default: break; + } + }; + int32_t const nbLayers = network.getNbLayers(); + for (int32_t i = 0; i < nbLayers; ++i) + { + collectMappingInfo(i); + } + if (matmulI2L.size() == 0 || constO2L.size() == 0) + { + // No MatrixMultiply or Constant layer found, no weights to sparsify. + return; + } + + // Helper for analysis + auto isTranspose + = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; + auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; + auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool + { + for (int32_t i = 0; i < dims.nbDims; ++i) + { + if (dims.d[i] != i || dims.d[i] != -1) + { + return false; + } + } + return true; + }; + auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* + { + while (shuffleI2L.find(t) != shuffleI2L.end()) + { + nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); + if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions()) + || !isIdenticalReshape(s->getReshapeDimensions())) + { + break; + } + + if (isTranspose(s->getFirstTranspose())) + { + needTranspose = !needTranspose; + } + if (isTranspose(s->getSecondTranspose())) + { + needTranspose = !needTranspose; + } + + t = shuffleL2O.at(s); + } + return t; + }; + + // 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose + std::unordered_map constantLayerToSparse; + for (auto& o2l : constO2L) + { + // If need to transpose the weights of the Constant layer. + // Need to transpose by default due to semantic difference. + bool needTranspose{true}; + ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose); + if (matmulI2L.find(t) == matmulI2L.end()) + { + continue; + } + + // check MatMul params... + IMatrixMultiplyLayer* mm = static_cast(matmulI2L.at(t)); + bool const twoInputs = mm->getNbInputs() == 2; + bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions()); + bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE + && mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR; + if (!(twoInputs && all2D && isSimple)) + { + continue; + } + if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE) + { + needTranspose = !needTranspose; + } + + constantLayerToSparse.insert({static_cast(o2l.second), needTranspose}); + } + + // 3. Finally, sparsify the weights + auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) + { + Dims dims = layer->getOutput(0)->getDimensions(); + ASSERT(dims.nbDims == 2); + int32_t const idxN = needTranspose ? 1 : 0; + int32_t const n = dims.d[idxN]; + int32_t const k = dims.d[1 - idxN]; + sparseWeights.emplace_back(); + std::vector& spw = sparseWeights.back(); + Weights w = layer->getWeights(); + DataType const dtype = w.type; + ASSERT(dtype == nvinfer1::DataType::kFLOAT + || dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored. + + if (needTranspose) + { + if (dtype == nvinfer1::DataType::kFLOAT) + { + spw.resize(w.count * sizeof(float)); + transpose2DWeights(spw.data(), w.values, k, n); + } + else if (dtype == nvinfer1::DataType::kHALF) + { + spw.resize(w.count * sizeof(half_float::half)); + transpose2DWeights(spw.data(), w.values, k, n); + } + + w.values = spw.data(); + std::vector tmpW; + sparsify(w, n, 1, tmpW); + + if (dtype == nvinfer1::DataType::kFLOAT) + { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } + else if (dtype == nvinfer1::DataType::kHALF) + { + transpose2DWeights(spw.data(), tmpW.data(), n, k); + } + } + else + { + sparsify(w, n, 1, spw); + } + + w.values = spw.data(); + layer->setWeights(w); + }; + for (auto& l : constantLayerToSparse) + { + sparsifyConstantWeights(l.first, l.second); + } +} + +template +void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + auto weights = l.getKernelWeights(); + sparsify(weights, k, trs, sparseWeights); + weights.values = sparseWeights.data(); + l.setKernelWeights(weights); +} + +// Explicit instantiation +template void setSparseWeights( + IConvolutionLayer& l, int32_t k, int32_t trs, std::vector& sparseWeights); + +void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights) +{ + for (int32_t l = 0; l < network.getNbLayers(); ++l) + { + auto* layer = network.getLayer(l); + auto const t = layer->getType(); + if (t == nvinfer1::LayerType::kCONVOLUTION) + { + auto& conv = *static_cast(layer); + auto const& dims = conv.getKernelSizeNd(); + ASSERT(dims.nbDims == 2 || dims.nbDims == 3); + auto const k = conv.getNbOutputMaps(); + auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); + sparseWeights.emplace_back(); + setSparseWeights(conv, k, trs, sparseWeights.back()); + } + } + + sparsifyMatMulKernelWeights(network, sparseWeights); + sample::gLogVerbose << "--sparsity=force pruned " << sparseWeights.size() << " weights to be sparsity pattern." << std::endl; + sample::gLogVerbose << "--sparsity=force has been deprecated. Please use to rewrite the weights to a sparsity pattern and then run with --sparsity=enable" << std::endl; +} + +void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + switch (weights.type) + { + case DataType::kFLOAT: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; + case DataType::kHALF: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kBF16: + sparsify(static_cast(weights.values), weights.count, k, trs, sparseWeights); + break; +#endif + case DataType::kINT8: + case DataType::kINT32: + case DataType::kUINT8: + case DataType::kBOOL: +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kINT4: +#endif + case DataType::kFP8: +#if (NV_TENSORRT_MAJOR > 8) + case DataType::kINT64: +#endif + ASSERT(false && "Unsupported data type"); + } +} + +template +void print(std::ostream& os, T v) +{ + os << v; +} + +void print(std::ostream& os, int8_t v) +{ + os << static_cast(v); +} + +void print(std::ostream& os, __half v) +{ + os << static_cast(v); +} + +void print(std::ostream& os, __nv_fp8_e4m3 v) +{ + os << static_cast(v); +} + +template +void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv) +{ + auto const vol = volume(dims); + T const* typedBuffer = static_cast(buffer); + std::string sep; + for (int64_t v = 0; v < vol; ++v) + { + int64_t curV = v; + int32_t dataOffset = 0; + for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) + { + int32_t dimVal = curV % dims.d[dimIndex]; + if (dimIndex == vectorDim) + { + dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; + } + else + { + dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); + } + curV /= dims.d[dimIndex]; + ASSERT(curV >= 0); + } + + os << sep; + sep = separator; + print(os, typedBuffer[dataOffset]); + } +} + +// Explicit instantiation +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); +template void dumpBuffer<__nv_fp8_e4m3>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims, + Dims const& strides, int32_t vectorDim, int32_t spv); + +template +void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) +{ + auto const c = count / (k * trs); + sparseWeights.resize(count * sizeof(T)); + auto* sparseValues = reinterpret_cast(sparseWeights.data()); + + constexpr int32_t window = 4; + constexpr int32_t nonzeros = 2; + + int32_t const crs = c * trs; + auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; }; + + for (int64_t ki = 0; ki < k; ++ki) + { + for (int64_t rsi = 0; rsi < trs; ++rsi) + { + int32_t w = 0; + int32_t nz = 0; + for (int64_t ci = 0; ci < c; ++ci) + { + auto const index = getIndex(ki, ci, rsi); + if (nz < nonzeros) + { + sparseValues[index] = values[index]; + ++nz; + } + else + { + sparseValues[index] = 0; + } + if (++w == window) + { + w = 0; + nz = 0; + } + } + } + } +} + +// Explicit instantiation +template void sparsify( + float const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights); +template void sparsify( + half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights); + +template +void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n) +{ + ASSERT(dst != src); + T* tdst = reinterpret_cast(dst); + T const* tsrc = reinterpret_cast(src); + for (int32_t mi = 0; mi < m; ++mi) + { + for (int32_t ni = 0; ni < n; ++ni) + { + int32_t const isrc = mi * n + ni; + int32_t const idst = ni * m + mi; + tdst[idst] = tsrc[isrc]; + } + } +} + +// Explicit instantiation +template void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); +template void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); + +template ::value, bool>::type> +void fillBuffer(void* buffer, int64_t volume, T min, T max) +{ + T* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + std::uniform_int_distribution distribution(min, max); + auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; + std::generate(typedBuffer, typedBuffer + volume, generator); +} + +template ::value, int32_t>::type> +void fillBuffer(void* buffer, int64_t volume, T min, T max) +{ + T* typedBuffer = static_cast(buffer); + std::default_random_engine engine; + std::uniform_real_distribution distribution((float)min, (float)max); + auto generator = [&engine, &distribution]() { return static_cast(distribution(engine)); }; + std::generate(typedBuffer, typedBuffer + volume, generator); +} + +// Explicit instantiation +template void fillBuffer(void* buffer, int64_t volume, bool min, bool max); +template void fillBuffer(void* buffer, int64_t volume, float min, float max); +template void fillBuffer(void* buffer, int64_t volume, int32_t min, int32_t max); +template void fillBuffer(void* buffer, int64_t volume, int64_t min, int64_t max); +template void fillBuffer(void* buffer, int64_t volume, int8_t min, int8_t max); +template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max); +template void fillBuffer(void* buffer, int64_t volume, BFloat16 min, BFloat16 max); +template void fillBuffer(void* buffer, int64_t volume, uint8_t min, uint8_t max); +template void fillBuffer<__nv_fp8_e4m3>(void* buffer, int64_t volume, __nv_fp8_e4m3 min, __nv_fp8_e4m3 max); + +bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target) +{ + auto const splitPattern = splitToStringVec(pattern, '*', 1); + + // If there is no wildcard, return if the two strings match exactly. + if (splitPattern.size() == 1) + { + return pattern == target; + } + + // Otherwise, target must follow prefix+anything+postfix pattern. + return target.size() >= (splitPattern[0].size() + splitPattern[1].size()) && target.find(splitPattern[0]) == 0 + && target.rfind(splitPattern[1]) == (target.size() - splitPattern[1].size()); +} + +} // namespace sample diff --git a/src/Detector/tensorrt_onnx/common/sampleUtils.h b/src/Detector/tensorrt_onnx/common/sampleUtils.h new file mode 100644 index 000000000..6cd4280b9 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/sampleUtils.h @@ -0,0 +1,127 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_SAMPLE_UTILS_H +#define TRT_SAMPLE_UTILS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "NvInfer.h" + +#include "common.h" +#include "logger.h" + +#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \ + { \ + if ((condition) == false) \ + { \ + (err) << (msg) << std::endl; \ + return retval; \ + } \ + } + +namespace sample +{ + +size_t dataTypeSize(nvinfer1::DataType dataType); + +template +inline T roundUp(T m, T n) +{ + return ((m + n - 1) / n) * n; +} + +//! comps is the number of components in a vector. Ignored if vecDim < 0. +int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch); + +using samplesCommon::volume; + +nvinfer1::Dims toDims(std::vector const& vec); + +template ::value, bool>::type = true> +void fillBuffer(void* buffer, int64_t volume, T min, T max); + +template ::value, int32_t>::type = 0> +void fillBuffer(void* buffer, int64_t volume, T min, T max); + +template +void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, nvinfer1::Dims const& dims, + nvinfer1::Dims const& strides, int32_t vectorDim, int32_t spv); + +void loadFromFile(std::string const& fileName, char* dst, size_t size); + +std::vector splitToStringVec(std::string const& option, char separator, int64_t maxSplit = -1); + +bool broadcastIOFormats(std::vector const& formats, size_t nbBindings, bool isInput = true); + +int32_t getCudaDriverVersion(); + +int32_t getCudaRuntimeVersion(); + +void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights); +void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector& sparseWeights); + +// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0. +template +void sparsify(T const* values, int64_t count, int32_t k, int32_t rs, std::vector& sparseWeights); + +template +void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector& sparseWeights); + +// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers. +// Forward analysis on the API graph to determine which weights to sparsify. +void sparsifyMatMulKernelWeights( + nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights); + +template +void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n); + +//! A helper function to match a target string with a pattern where the pattern can contain up to one wildcard ('*') +//! character that matches to any strings. +bool matchStringWithOneWildcard(std::string const& pattern, std::string const& target); + +//! A helper method to find an item from an unordered_map. If the exact match exists, this is identical to +//! map.find(target). If the exact match does not exist, it returns the first plausible match, taking up to one wildcard +//! into account. If there is no plausible match, then it returns map.end(). +template +typename std::unordered_map::const_iterator findPlausible( + std::unordered_map const& map, std::string const& target) +{ + auto res = map.find(target); + if (res == map.end()) + { + res = std::find_if( + map.begin(), map.end(), [&](typename std::unordered_map::value_type const& item) { + return matchStringWithOneWildcard(item.first, target); + }); + } + return res; +} + +} // namespace sample + +#endif // TRT_SAMPLE_UTILS_H diff --git a/src/Detector/tensorrt_onnx/common/streamReader.h b/src/Detector/tensorrt_onnx/common/streamReader.h new file mode 100644 index 000000000..5bb321448 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/streamReader.h @@ -0,0 +1,82 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef STREAM_READER_H +#define STREAM_READER_H + +#include "NvInferRuntime.h" +#include "sampleUtils.h" +#include + +#if (NV_TENSORRT_MAJOR > 8) + +namespace samplesCommon +{ + +//! Implements the TensorRT IStreamReader to allow deserializing an engine directly from the plan file. +class FileStreamReader final : public nvinfer1::IStreamReader +{ +public: + bool open(std::string filepath) + { + mFile.open(filepath, std::ios::binary); + return mFile.is_open(); + } + + void close() + { + if (mFile.is_open()) + { + mFile.close(); + } + } + + ~FileStreamReader() final + { + close(); + } + + int64_t read(void* dest, int64_t bytes) final + { + if (!mFile.good()) + { + return -1; + } + mFile.read(static_cast(dest), bytes); + return mFile.gcount(); + } + + void reset() + { + assert(mFile.good()); + mFile.seekg(0); + } + + bool isOpen() const + { + return mFile.is_open(); + } + +private: + std::ifstream mFile; +}; + +} // namespace samplesCommon + +#endif // #if (NV_TENSORRT_MAJOR > 8) + +#endif // STREAM_READER_H diff --git a/src/Detector/tensorrt_onnx/common/timingCache.cpp b/src/Detector/tensorrt_onnx/common/timingCache.cpp new file mode 100644 index 000000000..18e85ba40 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/timingCache.cpp @@ -0,0 +1,157 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "timingCache.h" +#include "NvInfer.h" +#include "fileLock.h" +#include "sampleUtils.h" +#include +#include +#include +#include +#include +#include +using namespace nvinfer1; +namespace nvinfer1 +{ +namespace utils +{ +std::vector loadTimingCacheFile(ILogger& logger, std::string const& inFileName) +{ + try + { + std::unique_ptr fileLock{new FileLock(logger, inFileName)}; + std::ifstream iFile(inFileName, std::ios::in | std::ios::binary); + if (!iFile) + { + std::stringstream ss; + ss << "Could not read timing cache from: " << inFileName + << ". A new timing cache will be generated and written."; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return std::vector(); + } + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + std::stringstream ss; + ss << "Loaded " << fsize << " bytes of timing cache from " << inFileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + return content; + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } + return {}; +} + +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err) +{ + std::unique_ptr timingCache{}; + auto timingCacheContents = loadTimingCacheFile(logger, timingCacheFile); + timingCache.reset(config.createTimingCache(timingCacheContents.data(), timingCacheContents.size())); + SMP_RETVAL_IF_FALSE(timingCache != nullptr, "TimingCache creation failed", nullptr, err); + config.clearFlag(BuilderFlag::kDISABLE_TIMING_CACHE); + SMP_RETVAL_IF_FALSE( + config.setTimingCache(*timingCache, true), "IBuilderConfig setTimingCache failed", nullptr, err); + return timingCache; +} + +void saveTimingCacheFile(ILogger& logger, std::string const& outFileName, IHostMemory const* blob) +{ + try + { + std::unique_ptr fileLock{new FileLock(logger, outFileName)}; + std::ofstream oFile(outFileName, std::ios::out | std::ios::binary); + if (!oFile) + { + std::stringstream ss; + ss << "Could not write timing cache to: " << outFileName; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return; + } + oFile.write(reinterpret_cast(blob->data()), blob->size()); + oFile.close(); + std::stringstream ss; + ss << "Saved " << blob->size() << " bytes of timing cache to " << outFileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } +} + +void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName, + nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder) +{ + try + { + // Prepare empty timingCache in case that there is no existing file to read + std::unique_ptr config{builder.createBuilderConfig()}; + std::unique_ptr fileTimingCache{config->createTimingCache(static_cast(nullptr), 0)}; + + std::unique_ptr fileLock{new FileLock(logger, fileName)}; + std::ifstream iFile(fileName, std::ios::in | std::ios::binary); + if (iFile) + { + iFile.seekg(0, std::ifstream::end); + size_t fsize = iFile.tellg(); + iFile.seekg(0, std::ifstream::beg); + std::vector content(fsize); + iFile.read(content.data(), fsize); + iFile.close(); + std::stringstream ss; + ss << "Loaded " << fsize << " bytes of timing cache from " << fileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + fileTimingCache.reset(config->createTimingCache(static_cast(content.data()), content.size())); + if (!fileTimingCache) + { + throw std::runtime_error("Failed to create timingCache from " + fileName + "!"); + } + } + fileTimingCache->combine(*timingCache, false); + std::unique_ptr blob{fileTimingCache->serialize()}; + if (!blob) + { + throw std::runtime_error("Failed to serialize ITimingCache!"); + } + std::ofstream oFile(fileName, std::ios::out | std::ios::binary); + if (!oFile) + { + std::stringstream ss; + ss << "Could not write timing cache to: " << fileName; + logger.log(ILogger::Severity::kWARNING, ss.str().c_str()); + return; + } + oFile.write(reinterpret_cast(blob->data()), blob->size()); + oFile.close(); + std::stringstream ss; + ss << "Saved " << blob->size() << " bytes of timing cache to " << fileName; + logger.log(ILogger::Severity::kINFO, ss.str().c_str()); + } + catch (std::exception const& e) + { + std::cerr << "Exception detected: " << e.what() << std::endl; + } +} +} // namespace utils +} // namespace nvinfer1 diff --git a/src/Detector/tensorrt_onnx/common/timingCache.h b/src/Detector/tensorrt_onnx/common/timingCache.h new file mode 100644 index 000000000..c4c76e376 --- /dev/null +++ b/src/Detector/tensorrt_onnx/common/timingCache.h @@ -0,0 +1,38 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ +#define TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ +#include "NvInfer.h" +#include +#include +#include +#include + +namespace nvinfer1 +{ +namespace utils +{ +std::vector loadTimingCacheFile(nvinfer1::ILogger& logger, std::string const& inFileName); +std::unique_ptr buildTimingCacheFromFile( + ILogger& logger, IBuilderConfig& config, std::string const& timingCacheFile, std::ostream& err); +void saveTimingCacheFile(nvinfer1::ILogger& logger, std::string const& outFileName, nvinfer1::IHostMemory const* blob); +void updateTimingCacheFile(nvinfer1::ILogger& logger, std::string const& fileName, + nvinfer1::ITimingCache const* timingCache, nvinfer1::IBuilder& builder); +} // namespace utils +} // namespace nvinfer1 + +#endif // TENSORRT_SAMPLES_COMMON_TIMINGCACHE_H_ diff --git a/src/Detector/tensorrt_yolo/calibrator.cpp b/src/Detector/tensorrt_yolo/calibrator.cpp deleted file mode 100644 index 31403f3c6..000000000 --- a/src/Detector/tensorrt_yolo/calibrator.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "calibrator.h" -#include -#include -#include -#include - -Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, - const std::string& calibImagesPath, - const std::string& calibTableFilePath, - const uint64_t& inputSize, const uint32_t& inputH, - const uint32_t& inputW, const std::string& inputBlobName) : - m_BatchSize(batchSize), - m_InputH(inputH), - m_InputW(inputW), - m_InputSize(inputSize), - m_InputCount(batchSize * inputSize), - m_InputBlobName(inputBlobName), - m_CalibTableFilePath(calibTableFilePath), - m_ImageIndex(0) -{ - if (!fileExists(m_CalibTableFilePath, false)) - { - std::random_device rng; - std::mt19937 urng(rng()); - - m_ImageList = loadImageList(calibImages, calibImagesPath); - m_ImageList.resize(static_cast(m_ImageList.size() / m_BatchSize) * m_BatchSize); - std::shuffle(m_ImageList.begin(), m_ImageList.end(), urng); - } - - NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float))); -} - -Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); } - -bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int /*nbBindings*/) -{ - if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false; - - // Load next batch - std::vector dsImages(m_BatchSize); - for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j) - { - dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), m_InputH, m_InputW); - } - m_ImageIndex += m_BatchSize; - - cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW); - - NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr(0), m_InputCount * sizeof(float), - cudaMemcpyHostToDevice)); - assert(!strcmp(names[0], m_InputBlobName.c_str())); - bindings[0] = m_DeviceInput; - return true; -} - -const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) -{ - void* output; - m_CalibrationCache.clear(); - assert(!m_CalibTableFilePath.empty()); - std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in); - input >> std::noskipws; - if (m_ReadCache && input.good()) - std::copy(std::istream_iterator(input), std::istream_iterator(), - std::back_inserter(m_CalibrationCache)); - - length = m_CalibrationCache.size(); - if (length) - { - std::cout << "Using cached calibration table to build the engine" << std::endl; - output = &m_CalibrationCache[0]; - } - - else - { - std::cout << "New calibration table will be created to build the engine" << std::endl; - output = nullptr; - } - - return output; -} - -void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) -{ - assert(!m_CalibTableFilePath.empty()); - std::ofstream output(m_CalibTableFilePath, std::ios::binary); - output.write(reinterpret_cast(cache), length); - output.close(); -} \ No newline at end of file diff --git a/src/Detector/tensorrt_yolo/calibrator.h b/src/Detector/tensorrt_yolo/calibrator.h deleted file mode 100644 index 50012f9a4..000000000 --- a/src/Detector/tensorrt_yolo/calibrator.h +++ /dev/null @@ -1,61 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ -#ifndef _CALIBRATOR_H_ -#define _CALIBRATOR_H_ - -#include "NvInfer.h" -#include "ds_image.h" -#include "trt_utils.h" - -class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator -{ -public: - Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, - const std::string& calibImagesPath, const std::string& calibTableFilePath, - const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, - const std::string& inputBlobName); - virtual ~Int8EntropyCalibrator(); - - int getBatchSize() const override { return m_BatchSize; } - bool getBatch(void* bindings[], const char* names[], int nbBindings) override; - const void* readCalibrationCache(size_t& length) override; - void writeCalibrationCache(const void* cache, size_t length) override; - -private: - const uint32_t m_BatchSize; - const uint32_t m_InputH; - const uint32_t m_InputW; - const uint64_t m_InputSize; - const uint64_t m_InputCount; - const std::string m_InputBlobName; - const std::string m_CalibTableFilePath{nullptr}; - uint32_t m_ImageIndex; - bool m_ReadCache{true}; - void* m_DeviceInput{nullptr}; - std::vector m_ImageList; - std::vector m_CalibrationCache; -}; - -#endif \ No newline at end of file diff --git a/src/Detector/tensorrt_yolo/chunk.cu b/src/Detector/tensorrt_yolo/chunk.cu deleted file mode 100644 index 1eb497b64..000000000 --- a/src/Detector/tensorrt_yolo/chunk.cu +++ /dev/null @@ -1,194 +0,0 @@ -#include -#include -#include -#include -#include "chunk.h" -#include -#define NV_CUDA_CHECK(status) \ - { \ - if (status != 0) \ - { \ - std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ - << " at line " << __LINE__ << std::endl; \ - abort(); \ - } \ - } - - -namespace nvinfer1 -{ - Chunk::Chunk() - { - - } - Chunk::Chunk(const void* buffer, size_t size) - { - assert(size == sizeof(_n_size_split)); - _n_size_split = *reinterpret_cast(buffer); - } - Chunk::~Chunk() - { - - } - int Chunk::getNbOutputs() const - { - return 2; - } - - Dims Chunk::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) - { - assert(nbInputDims == 1); - assert(index == 0 || index == 1); - return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]); - } - - int Chunk::initialize() - { - return 0; - } - - void Chunk::terminate() - { - } - - size_t Chunk::getWorkspaceSize(int maxBatchSize) const - { - return 0; - } - - int Chunk::enqueue(int batchSize, - const void* const* inputs, - void** outputs, - void* workspace, - cudaStream_t stream) - { - //batch - for (int b = 0; b < batchSize; ++b) - { - NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice)); - NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice)); - } - // NV_CUDA_CHECK(cudaMemcpy(outputs[0], inputs[0], _n_size_split, cudaMemcpyDeviceToDevice)); - // NV_CUDA_CHECK(cudaMemcpy(outputs[1], (void*)((char*)inputs[0] + _n_size_split), _n_size_split, cudaMemcpyDeviceToDevice)); - return 0; - } - - size_t Chunk::getSerializationSize() const - { - return sizeof(_n_size_split); - } - - void Chunk::serialize(void *buffer)const - { - *reinterpret_cast(buffer) = _n_size_split; - } - - const char* Chunk::getPluginType()const - { - return "CHUNK_TRT"; - } - const char* Chunk::getPluginVersion() const - { - return "1.0"; - } - - void Chunk::destroy() - { - delete this; - } - - void Chunk::setPluginNamespace(const char* pluginNamespace) - { - _s_plugin_namespace = pluginNamespace; - } - - const char* Chunk::getPluginNamespace() const - { - return _s_plugin_namespace.c_str(); - } - - DataType Chunk::getOutputDataType(int index, - const nvinfer1::DataType* inputTypes, - int nbInputs) const - { - assert(index == 0 || index == 1); - return DataType::kFLOAT; - } - - bool Chunk::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const - { - return false; - } - - bool Chunk::canBroadcastInputAcrossBatch(int inputIndex) const - { - return false; - } - - void Chunk::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) {} - - void Chunk::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) - { - _n_size_split = in->dims.d[0] / 2 * in->dims.d[1] * in->dims.d[2] *sizeof(float); - } - void Chunk::detachFromContext() {} - - // Clone the plugin - IPluginV2IOExt* Chunk::clone() const - { - Chunk *p = new Chunk(); - p->_n_size_split = _n_size_split; - p->setPluginNamespace(_s_plugin_namespace.c_str()); - return p; - } - - //---------------------------- - PluginFieldCollection ChunkPluginCreator::_fc{}; - std::vector ChunkPluginCreator::_vec_plugin_attributes; - - ChunkPluginCreator::ChunkPluginCreator() - { - _vec_plugin_attributes.clear(); - _fc.nbFields = _vec_plugin_attributes.size(); - _fc.fields = _vec_plugin_attributes.data(); - } - - const char* ChunkPluginCreator::getPluginName() const - { - return "CHUNK_TRT"; - } - - const char* ChunkPluginCreator::getPluginVersion() const - { - return "1.0"; - } - - const PluginFieldCollection* ChunkPluginCreator::getFieldNames() - { - return &_fc; - } - - IPluginV2IOExt* ChunkPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) - { - Chunk* obj = new Chunk(); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - IPluginV2IOExt* ChunkPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) - { - Chunk* obj = new Chunk(serialData,serialLength); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - void ChunkPluginCreator::setPluginNamespace(const char* libNamespace) - { - _s_name_space = libNamespace; - } - - const char* ChunkPluginCreator::getPluginNamespace() const - { - return _s_name_space.c_str(); - } -}//namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/chunk.h b/src/Detector/tensorrt_yolo/chunk.h deleted file mode 100644 index 7d36ab32a..000000000 --- a/src/Detector/tensorrt_yolo/chunk.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef CHUNK_H_ -#define CHUNK_H_ - -//#include "NvInfer.h" -//#include "NvInferPlugin.h" -//#include "NvInferRuntimeCommon.h" -//#include -//#include -//#include -//#include -//#include -//#include -//#include - -#include -#include -#include "NvInfer.h" - -namespace nvinfer1 -{ - class Chunk : public IPluginV2IOExt - { - public: - Chunk(); - Chunk(const void* buffer, size_t length); - ~Chunk(); - int getNbOutputs()const override; - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; - int initialize() override; - void terminate() override; - size_t getWorkspaceSize(int maxBatchSize) const override; - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override; - const char* getPluginVersion() const override; - void destroy() override; - void setPluginNamespace(const char* pluginNamespace) override; - const char* getPluginNamespace() const override; - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; - bool canBroadcastInputAcrossBatch(int inputIndex) const override; - void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override; - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int /*nbInputs*/, int /*nbOutputs*/) const override - { - return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; - } - IPluginV2IOExt* clone() const override; - private: - std::string _s_plugin_namespace; - int _n_size_split; - }; - - class ChunkPluginCreator : public IPluginCreator - { - public: - ChunkPluginCreator(); - ~ChunkPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; - private: - std::string _s_name_space; - static PluginFieldCollection _fc; - static std::vector _vec_plugin_attributes; - }; - -}//nampespace nvinfer1 - - -#endif - - diff --git a/src/Detector/tensorrt_yolo/class_detector.cpp b/src/Detector/tensorrt_yolo/class_detector.cpp deleted file mode 100644 index 0b946e3c9..000000000 --- a/src/Detector/tensorrt_yolo/class_detector.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "class_detector.h" -#include "class_yolo_detector.hpp" - -namespace tensor_rt -{ - class Detector::Impl - { - public: - Impl() {} - - ~Impl() {} - - YoloDectector _detector; - }; - - Detector::Detector() - { - _impl = new Impl(); - } - - Detector::~Detector() - { - if (_impl) - { - delete _impl; - _impl = nullptr; - } - } - - void Detector::init(const Config &config) - { - _impl->_detector.init(config); - } - - void Detector::detect(const std::vector &mat_image, std::vector &vec_batch_result) - { - _impl->_detector.detect(mat_image, vec_batch_result); - } - - cv::Size Detector::get_input_size() const - { - return _impl->_detector.get_input_size(); - } -} diff --git a/src/Detector/tensorrt_yolo/class_detector.h b/src/Detector/tensorrt_yolo/class_detector.h deleted file mode 100644 index e3ce147f5..000000000 --- a/src/Detector/tensorrt_yolo/class_detector.h +++ /dev/null @@ -1,82 +0,0 @@ -#ifndef CLASS_DETECTOR_H_ -#define CLASS_DETECTOR_H_ - -#include "API.h" -#include -#include - -namespace tensor_rt -{ - struct Result - { - int id = -1; - float prob = 0.f; - cv::Rect rect; - - Result(int id_, float prob_, cv::Rect r) - : id(id_), prob(prob_), rect(r) - { - } - }; - - typedef std::vector BatchResult; - - enum ModelType - { - YOLOV2 = 0, - YOLOV3, - YOLOV2_TINY, - YOLOV3_TINY, - YOLOV4, - YOLOV4_TINY, - YOLOV5 - }; - - enum Precision - { - INT8 = 0, - FP16, - FP32 - }; - - struct Config - { - std::string file_model_cfg = "yolov4.cfg"; - - std::string file_model_weights = "yolov4.weights"; - - float detect_thresh = 0.9f; - - ModelType net_type = YOLOV3; - - Precision inference_precison = FP32; - - int gpu_id = 0; - - uint32_t batch_size = 1; - - std::string calibration_image_list_file_txt = "configs/calibration_images.txt"; - }; - - class API Detector - { - public: - explicit Detector(); - - ~Detector(); - - void init(const Config &config); - - void detect(const std::vector &mat_image, std::vector &vec_batch_result); - - cv::Size get_input_size() const; - - private: - - Detector(const Detector &); - const Detector &operator =(const Detector &); - class Impl; - Impl *_impl = nullptr; - }; -} -#endif // !CLASS_QH_DETECTOR_H_ diff --git a/src/Detector/tensorrt_yolo/class_timer.hpp b/src/Detector/tensorrt_yolo/class_timer.hpp deleted file mode 100644 index eeb63b322..000000000 --- a/src/Detector/tensorrt_yolo/class_timer.hpp +++ /dev/null @@ -1,37 +0,0 @@ -#pragma once - -#include -#include -#include - -class Timer -{ -public: - Timer() : beg_(clock_::now()) - {} - void reset() - { - beg_ = clock_::now(); - } - - double elapsed() const - { - return std::chrono::duration_cast(clock_::now() - beg_).count(); - } - - void out(std::string message = "") - { - double t = elapsed(); - std::cout << message << " elasped time:" << t << "ms" << std::endl; - reset(); - } - - double get_duration()const - { - return elapsed(); - } -private: - typedef std::chrono::high_resolution_clock clock_; - typedef std::chrono::duration second_; - std::chrono::time_point beg_; -}; diff --git a/src/Detector/tensorrt_yolo/class_yolo_detector.hpp b/src/Detector/tensorrt_yolo/class_yolo_detector.hpp deleted file mode 100644 index 0a17c349a..000000000 --- a/src/Detector/tensorrt_yolo/class_yolo_detector.hpp +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef CLASS_YOLO_DETECTOR_HPP_ -#define CLASS_YOLO_DETECTOR_HPP_ - -#include -#include "ds_image.h" -#include "trt_utils.h" -#include "yolo.h" -#include "yolov2.h" -#include "yolov3.h" -#include "yolov4.h" -#include "yolov5.h" - -#include -#include -#include -#include /* defines FILENAME_MAX */ - -#include "class_detector.h" -#include "class_timer.hpp" -class YoloDectector -{ -public: - YoloDectector() - { - - } - ~YoloDectector() - { - - } - - void init(const tensor_rt::Config &config) - { - _config = config; - - this->set_gpu_id(_config.gpu_id); - - this->parse_config(); - - this->build_net(); - } - - void detect(const std::vector &vec_image, - std::vector &vec_batch_result) - { - std::vector vec_ds_images; - vec_batch_result.clear(); - if (vec_batch_result.capacity() < vec_image.size()) - vec_batch_result.reserve(vec_image.size()); - for (const auto &img:vec_image) - { - vec_ds_images.emplace_back(img, _p_net->getInputH(), _p_net->getInputW()); - } - cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW()); - _p_net->doInference(trtInput.data, static_cast(vec_ds_images.size())); - for (size_t i = 0; i < vec_ds_images.size(); ++i) - { - auto curImage = vec_ds_images.at(i); - auto binfo = _p_net->decodeDetections(static_cast(i), curImage.getImageHeight(), curImage.getImageWidth()); - auto remaining = nmsAllClasses(_p_net->getNMSThresh(), - binfo, - _p_net->getNumClasses(), - _vec_net_type[_config.net_type]); - - std::vector vec_result; - if (!remaining.empty()) - { - vec_result.reserve(remaining.size()); - for (const auto &b : remaining) - { - const int x = cvRound(b.box.x1); - const int y = cvRound(b.box.y1); - const int w = cvRound(b.box.x2 - b.box.x1); - const int h = cvRound(b.box.y2 - b.box.y1); - vec_result.emplace_back(b.label, b.prob, cv::Rect(x, y, w, h)); - } - } - vec_batch_result.emplace_back(vec_result); - } - } - - cv::Size get_input_size() const - { - return cv::Size(_p_net->getInputH(), _p_net->getInputW()); - } - -private: - - void set_gpu_id(const int id = 0) - { - cudaError_t status = cudaSetDevice(id); - if (status != cudaSuccess) - { - std::cout << "gpu id :" + std::to_string(id) + " not exist !" << std::endl; - assert(0); - } - } - - void parse_config() - { - _yolo_info.networkType = _vec_net_type[_config.net_type]; - _yolo_info.configFilePath = _config.file_model_cfg; - _yolo_info.wtsFilePath = _config.file_model_weights; - _yolo_info.precision = _vec_precision[_config.inference_precison]; - _yolo_info.deviceType = "kGPU"; - auto npos = _yolo_info.wtsFilePath.find(".weights"); - assert(npos != std::string::npos - && "wts file file not recognised. File needs to be of '.weights' format"); - _yolo_info.data_path = _yolo_info.wtsFilePath.substr(0, npos); - _yolo_info.calibrationTablePath = _yolo_info.data_path + "-calibration.table"; - _yolo_info.inputBlobName = "data"; - - _infer_param.printPerfInfo = false; - _infer_param.printPredictionInfo = false; - _infer_param.calibImages = _config.calibration_image_list_file_txt; - _infer_param.calibImagesPath = ""; - _infer_param.probThresh = _config.detect_thresh; - _infer_param.nmsThresh = 0.5; - _infer_param.batchSize = _config.batch_size; - } - - void build_net() - { - if ((_config.net_type == tensor_rt::YOLOV2) || (_config.net_type == tensor_rt::YOLOV2_TINY)) - { - _p_net = std::unique_ptr{ new YoloV2(_yolo_info, _infer_param) }; - } - else if ((_config.net_type == tensor_rt::YOLOV3) || (_config.net_type == tensor_rt::YOLOV3_TINY)) - { - _p_net = std::unique_ptr{ new YoloV3(_yolo_info, _infer_param) }; - } - else if ((_config.net_type == tensor_rt::YOLOV4) || (_config.net_type == tensor_rt::YOLOV4_TINY)) - { - _p_net = std::unique_ptr{ new YoloV4(_yolo_info,_infer_param) }; - } - else if (_config.net_type == tensor_rt::YOLOV5) - { - _p_net = std::unique_ptr{ new YoloV5(_yolo_info,_infer_param) }; - } - else - { - assert(false && "Unrecognised network_type."); - } - } - -private: - tensor_rt::Config _config; - NetworkInfo _yolo_info; - InferParams _infer_param; - std::vector _vec_net_type{ "yolov2", "yolov3", "yolov2-tiny", "yolov3-tiny", "yolov4", "yolov4-tiny", "yolov5" }; - std::vector _vec_precision{ "kINT8","kHALF","kFLOAT" }; - std::unique_ptr _p_net = nullptr; - Timer _m_timer; -}; - - -#endif diff --git a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake b/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake deleted file mode 100644 index 37388d30b..000000000 --- a/src/Detector/tensorrt_yolo/cmake/FindCUDNN.cmake +++ /dev/null @@ -1,180 +0,0 @@ -# Distributed under the OSI-approved BSD 3-Clause License. See accompanying -# file Copyright.txt or https://cmake.org/licensing for details. -#.rst: -# FindCUDNN -# ------- -# -# Find CUDNN library -# -# Valiables that affect result: -# , , : as usual -# -# : as usual, plus we do find '5.1' version if you wanted '5' -# (not if you wanted '5.0', as usual) -# -# Result variables -# ^^^^^^^^^^^^^^^^ -# -# This module will set the following variables in your project: -# -# ``CUDNN_INCLUDE`` -# where to find cudnn.h. -# ``CUDNN_LIBRARY`` -# the libraries to link against to use CUDNN. -# ``CUDNN_FOUND`` -# If false, do not try to use CUDNN. -# ``CUDNN_VERSION`` -# Version of the CUDNN library we looked for -# -# Exported functions -# ^^^^^^^^^^^^^^^^ -# function(CUDNN_INSTALL version __dest_libdir [__dest_incdir]) -# This function will try to download and install CUDNN. -# CUDNN5 and CUDNN6 are supported. -# -# - -function(CUDNN_INSTALL version dest_libdir dest_incdir dest_bindir) - message(STATUS "CUDNN_INSTALL: Installing CUDNN ${version}, lib:${dest_libdir}, inc:${dest_incdir}, bin:${dest_bindir}") - string(REGEX REPLACE "-rc$" "" version_base "${version}") - set(tar_libdir cuda/lib64) - set(tar_incdir cuda/include) - - if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") - set(url_extension tgz) - if("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "x86_64") - set(url_arch_name linux-x64 ) - elseif("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "ppc") - set(url_arch_name linux-ppc64le ) - # TX1 has to be installed via JetPack - endif() - elseif (APPLE) - set(url_extension tgz) - set(tar_libdir cuda/lib) - set(url_arch_name osx-x64) - elseif(WIN32) - set(url_extension zip) - set(tar_bindir cuda/bin) - set(tar_libdir cuda/lib/x64) - if(CMAKE_SYSTEM_VERSION MATCHES "10") - set(url_arch_name windows10-x64) - else() - set(url_arch_name windows7-x64) - endif() - endif() - - # Download and install CUDNN locally if not found on the system - if(url_arch_name) - set(download_dir ${CMAKE_CURRENT_BINARY_DIR}/downloads/cudnn${version}) - file(MAKE_DIRECTORY ${download_dir}) - set(cudnn_filename cudnn-${CUDA_VERSION}-${url_arch_name}-v${version}.${url_extension}) - set(base_url http://developer.download.nvidia.com/compute/redist/cudnn) - set(cudnn_url ${base_url}/v${version_base}/${cudnn_filename}) - set(cudnn_file ${download_dir}/${cudnn_filename}) - - if(NOT EXISTS ${cudnn_file}) - message(STATUS "Downloading CUDNN library from NVIDIA...") - file(DOWNLOAD ${cudnn_url} ${cudnn_file} - SHOW_PROGRESS STATUS cudnn_status - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzvf ${cudnn_file} WORKING_DIRECTORY ${download_dir} RESULT_VARIABLE cudnn_status) - - if(NOT "${cudnn_status}" MATCHES "0") - message(STATUS "Was not able to download CUDNN from ${cudnn_url}. Please install CuDNN manually from https://developer.nvidia.com/cuDNN") - endif() - endif() - - if(dest_bindir AND tar_bindir) - file(COPY ${download_dir}/${tar_bindir}/ DESTINATION ${dest_bindir}) - endif() - - if(dest_incdir) - file(COPY ${download_dir}/${tar_incdir}/ DESTINATION ${dest_incdir}) - endif() - - file(COPY ${download_dir}/${tar_libdir}/ DESTINATION ${dest_libdir} ) - - get_filename_component(dest_dir ${dest_libdir} DIRECTORY) - - set(CUDNN_ROOT_DIR ${dest_dir} PARENT_SCOPE) - unset(CUDNN_LIBRARY CACHE) - unset(CUDNN_INCLUDE_DIR CACHE) - - endif(url_arch_name) -endfunction() - -##################################################### - -find_package(PkgConfig) -pkg_check_modules(PC_CUDNN QUIET CUDNN) - -get_filename_component(__libpath_cudart "${CUDA_CUDART_LIBRARY}" PATH) - -# We use major only in library search as major/minor is not entirely consistent among platforms. -# Also, looking for exact minor version of .so is in general not a good idea. -# More strict enforcement of minor/patch version is done if/when the header file is examined. -if(CUDNN_FIND_VERSION_EXACT) - SET(__cudnn_ver_suffix ".${CUDNN_FIND_VERSION_MAJOR}") - SET(__cudnn_lib_win_name cudnn64_${CUDNN_FIND_VERSION_MAJOR}) -else() - SET(__cudnn_lib_win_name cudnn64) -endif() - -find_library(CUDNN_LIBRARY - NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name} - PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX} - PATH_SUFFIXES lib lib64 bin - DOC "CUDNN library." ) - -if(CUDNN_LIBRARY) - SET(CUDNN_MAJOR_VERSION ${CUDNN_FIND_VERSION_MAJOR}) - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}) - get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY} PATH) - find_path(CUDNN_INCLUDE_DIR - NAMES cudnn.h - HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root} - PATH_SUFFIXES include - DOC "Path to CUDNN include directory." ) -endif() - -if(CUDNN_LIBRARY AND CUDNN_INCLUDE_DIR) - file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS) - string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)" - CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1" - CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}") - string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)" - CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1" - CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}") - string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)" - CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}") - string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1" - CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}") - set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}) -endif() - -if(CUDNN_MAJOR_VERSION) - ## Fixing the case where 5.1 does not fit 'exact' 5. - if(CUDNN_FIND_VERSION_EXACT AND NOT CUDNN_FIND_VERSION_MINOR) - if("${CUDNN_MAJOR_VERSION}" STREQUAL "${CUDNN_FIND_VERSION_MAJOR}") - set(CUDNN_VERSION ${CUDNN_FIND_VERSION}) - endif() - endif() -else() - # Try to set CUDNN version from config file - set(CUDNN_VERSION ${PC_CUDNN_CFLAGS_OTHER}) -endif() - -find_package_handle_standard_args( - CUDNN - FOUND_VAR CUDNN_FOUND - REQUIRED_VARS CUDNN_LIBRARY - VERSION_VAR CUDNN_VERSION - ) - -if(CUDNN_FOUND) - set(CUDNN_LIBRARIES ${CUDNN_LIBRARY}) - set(CUDNN_INCLUDE_DIRS ${CUDNN_INCLUDE_DIR}) - set(CUDNN_DEFINITIONS ${PC_CUDNN_CFLAGS_OTHER}) -endif() diff --git a/src/Detector/tensorrt_yolo/detect.cu b/src/Detector/tensorrt_yolo/detect.cu deleted file mode 100644 index 4d8f20e28..000000000 --- a/src/Detector/tensorrt_yolo/detect.cu +++ /dev/null @@ -1,210 +0,0 @@ -//sys -#include -#include -#include -#include -#include -#include -#include -#include -//my -#include "detect.h" - -#define NV_CUDA_CHECK(status) \ - { \ - if (status != 0) \ - { \ - std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ - << " at line " << __LINE__ << std::endl; \ - abort(); \ - } \ - } - -namespace nvinfer1 -{ - Detect::Detect() - { - } - - Detect::Detect(const void* data, size_t length) - { - const char *d = reinterpret_cast(data), *a = d; - read(d,_n_anchor); - read(d,_n_classes); - read(d,_n_grid_h); - read(d, _n_grid_w); - read(d, _n_output_size); - //printf("anchor:%d,classes:%d,gh:%d,gw:%d,size:%d\n", _n_anchor, _n_classes, _n_grid_h, _n_grid_w, _n_output_size); - assert(d == a + length); - } - - Detect::Detect(const uint32_t n_anchor_, const uint32_t n_classes_, - const uint32_t n_grid_h_, const uint32_t n_grid_w_/*, - const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/): - _n_anchor(n_anchor_), - _n_classes(n_classes_), - _n_grid_h(n_grid_h_), - _n_grid_w(n_grid_w_) - { - _n_output_size = (5 + _n_classes)*_n_anchor*_n_grid_h*_n_grid_w; - } - Detect::~Detect() - {} - - inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } - - __global__ void gpu_detect_layer(const float *input_, - float* output_, - const uint32_t n_grid_h_, - const uint32_t n_grid_w_, - const uint32_t n_classes_, - const uint32_t n_anchor_) - { - uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x; - uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y; - uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z; - - if ((x_id >= n_grid_w_) || (y_id >= n_grid_h_) || (z_id >= n_anchor_)) - { - return; - } - // printf("grid_h:%d,grid_w:%d,class:%d,anchor:%d\n", n_grid_h_, n_grid_w_, n_classes_, n_anchor_); - const int numGridCells = n_grid_h_ * n_grid_w_; - const int bbindex = y_id * n_grid_w_ + x_id; - - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)] - = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 0)])-0.5f; - - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)] - = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 1)])-0.5f; - - float w = 2.f * sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)]); - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 2)] - = w*w; - - float h = 2.f* sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)]); - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 3)] - = h*h; - - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)] - = sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + 4)]); - for (uint32_t i = 0; i < n_classes_; ++i) - { - output_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))] - = sigmoidGPU(input_[bbindex + numGridCells * (z_id * (5 + n_classes_) + (5 + i))]); - } - } - - cudaError_t cuda_detect_layer(const void* input_, - void* output_, - const uint32_t& batch_size_, - const uint32_t& grid_h_, - const uint32_t& grid_w_, - const uint32_t& n_classes_, - const uint32_t& n_anchor_, - uint64_t n_output_size_, - cudaStream_t stream_) - { - dim3 threads_per_block(16, 16, 4); - dim3 number_of_blocks((grid_w_ / threads_per_block.x) + 1, - (grid_h_ / threads_per_block.y) + 1, - (n_anchor_ / threads_per_block.z) + 1); - for (int batch = 0; batch < batch_size_; ++batch) - { - gpu_detect_layer << > >( - reinterpret_cast(input_) + (batch * n_output_size_), - reinterpret_cast(output_) + (batch * n_output_size_), - grid_h_, - grid_w_, - n_classes_, - n_anchor_); - } - return cudaGetLastError(); - } - - int Detect::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) - { - NV_CUDA_CHECK(cuda_detect_layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, _n_classes, _n_anchor, _n_output_size, stream)); - return 0; - } - - size_t Detect::getSerializationSize() const - { - return sizeof(_n_anchor) + sizeof(_n_classes) + sizeof(_n_grid_h) + sizeof(_n_grid_w) - + sizeof(_n_output_size); - } - - void Detect::serialize(void *buffer) const - { - char *d = static_cast(buffer), *a = d; - write(d,_n_anchor); - write(d, _n_classes); - write(d, _n_grid_h); - write(d, _n_grid_w); - write(d, _n_output_size); - assert(d == a + getSerializationSize()); - } - - void Detect::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) - { - - } - IPluginV2IOExt* Detect::clone() const - { - Detect *p = new Detect(_n_anchor,_n_classes,_n_grid_h,_n_grid_w); - p->setPluginNamespace(_s_plugin_namespace.c_str()); - return p; - } - - - // - PluginFieldCollection DetectPluginCreator::_fc{}; - std::vector DetectPluginCreator::_vec_plugin_attributes; - - DetectPluginCreator::DetectPluginCreator() - { - _vec_plugin_attributes.clear(); - _fc.nbFields = _vec_plugin_attributes.size(); - _fc.fields = _vec_plugin_attributes.data(); - } - - const char* DetectPluginCreator::getPluginName() const - { - return "DETECT_TRT"; - } - - const char* DetectPluginCreator::getPluginVersion() const - { - return "1.0"; - } - - const PluginFieldCollection* DetectPluginCreator::getFieldNames() - { - return &_fc; - } - - IPluginV2IOExt* DetectPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) - { - Detect* obj = new Detect(); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - IPluginV2IOExt* DetectPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) - { - Detect* obj = new Detect(serialData, serialLength); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - void DetectPluginCreator::setPluginNamespace(const char* libNamespace) - { - _s_name_space = libNamespace; - } - - const char* DetectPluginCreator::getPluginNamespace() const - { - return _s_name_space.c_str(); - } -}//end namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/detect.h b/src/Detector/tensorrt_yolo/detect.h deleted file mode 100644 index bd88b624b..000000000 --- a/src/Detector/tensorrt_yolo/detect.h +++ /dev/null @@ -1,132 +0,0 @@ -#ifndef _DETECT_H_ -#define _DETECT_H_ - -#include -#include -#include "NvInfer.h" - -namespace nvinfer1 -{ - template - void write(char*& buffer, const T& val) - { - *reinterpret_cast(buffer) = val; - buffer += sizeof(T); - } - - template - void read(const char*& buffer, T& val) - { - val = *reinterpret_cast(buffer); - buffer += sizeof(T); - } - - class Detect :public IPluginV2IOExt - { - public: - Detect(); - Detect(const void* data, size_t length); - Detect(const uint32_t n_anchor_, const uint32_t _n_classes_, - const uint32_t n_grid_h_, const uint32_t n_grid_w_/*, - const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/); - ~Detect(); - int getNbOutputs()const override - { - return 1; - } - Dims getOutputDimensions(int /*index*/, const Dims* inputs, int /*nbInputDims*/) override - { - return inputs[0]; - } - int initialize() override - { - return 0; - } - void terminate() override - { - } - size_t getWorkspaceSize(int /*maxBatchSize*/) const override - { - return 0; - } - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override - { - return "DETECT_TRT"; - } - const char* getPluginVersion() const override - { - return "1.0"; - } - void destroy() override - { - delete this; - } - void setPluginNamespace(const char* pluginNamespace) override - { - _s_plugin_namespace = pluginNamespace; - } - const char* getPluginNamespace() const override - { - return _s_plugin_namespace.c_str(); - } - DataType getOutputDataType(int /*index*/, const nvinfer1::DataType* /*inputTypes*/, int /*nbInputs*/) const override - { - return DataType::kFLOAT; - } - bool isOutputBroadcastAcrossBatch(int /*outputIndex*/, const bool* /*inputIsBroadcasted*/, int /*nbInputs*/) const override - { - return false; - } - bool canBroadcastInputAcrossBatch(int /*inputIndex*/) const override - { - return false; - } - void attachToContext( - cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) override - {} - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override - {} - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int /*nbInputs*/, int /*nbOutputs*/) const override - { - return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; - } - IPluginV2IOExt* clone() const override; - private: - - uint32_t _n_anchor; - uint32_t _n_classes; - uint32_t _n_grid_h; - uint32_t _n_grid_w; - //uint32_t _n_stride_h; - // uint32_t _n_stride_w; - uint64_t _n_output_size; - std::string _s_plugin_namespace; - }; //end detect - - class DetectPluginCreator : public IPluginCreator - { - public: - DetectPluginCreator(); - ~DetectPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; - private: - std::string _s_name_space; - static PluginFieldCollection _fc; - static std::vector _vec_plugin_attributes; - };//end detect creator - -}//end namespace nvinfer1 - - - -#endif diff --git a/src/Detector/tensorrt_yolo/ds_image.cpp b/src/Detector/tensorrt_yolo/ds_image.cpp deleted file mode 100644 index f7ed6f78b..000000000 --- a/src/Detector/tensorrt_yolo/ds_image.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ -#include "ds_image.h" - -#ifdef HAVE_FILESYSTEM -#include -namespace fs = std::filesystem; -#else -#include -namespace fs = std::experimental::filesystem; -#endif - -DsImage::DsImage() : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() -{ -} - -DsImage::DsImage(const cv::Mat& mat_image_, const int& inputH, const int& inputW) : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() -{ - m_OrigImage = mat_image_; - - if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0) - { - std::cout << "empty image !"<< std::endl; - assert(0); - } - if (m_OrigImage.channels() != 3) - { - std::cout << "Non RGB images are not supported "<< std::endl; - assert(0); - } - -// m_OrigImage.copyTo(m_MarkedImage); - m_Height = m_OrigImage.rows; - m_Width = m_OrigImage.cols; - - //// resize the DsImage with scale - //float dim = std::max(m_Height, m_Width); - //int resizeH = ((m_Height / dim) * inputH); - //int resizeW = ((m_Width / dim) * inputW); - //m_ScalingFactor = static_cast(resizeH) / static_cast(m_Height); - - //// Additional checks for images with non even dims - //if ((inputW - resizeW) % 2) resizeW--; - //if ((inputH - resizeH) % 2) resizeH--; - //assert((inputW - resizeW) % 2 == 0); - //assert((inputH - resizeH) % 2 == 0); - - //m_XOffset = (inputW - resizeW) / 2; - //m_YOffset = (inputH - resizeH) / 2; - - //assert(2 * m_XOffset + resizeW == inputW); - //assert(2 * m_YOffset + resizeH == inputH); - - // resizing - cv::resize(mat_image_, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_LINEAR); - // letterboxing - /*cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, - m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));*/ - // converting to RGB - //cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); -} -DsImage::DsImage(const std::string& path, const int& inputH, const int& inputW) : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() -{ - m_ImageName = fs::path(path).stem().string(); - m_OrigImage = cv::imread(path, cv::IMREAD_UNCHANGED); - - if (!m_OrigImage.data || m_OrigImage.cols <= 0 || m_OrigImage.rows <= 0) - { - std::cout << "Unable to open image : " << path << std::endl; - assert(0); - } - - if (m_OrigImage.channels() != 3) - { - std::cout << "Non RGB images are not supported : " << path << std::endl; - assert(0); - } - - m_OrigImage.copyTo(m_MarkedImage); - m_Height = m_OrigImage.rows; - m_Width = m_OrigImage.cols; - - // resize the DsImage with scale - int dim = std::max(m_Height, m_Width); - int resizeH = (m_Height * inputH) / dim; - int resizeW = (m_Width * inputW) / dim; - m_ScalingFactor = static_cast(resizeH) / static_cast(m_Height); - - // Additional checks for images with non even dims - if ((inputW - resizeW) % 2) resizeW--; - if ((inputH - resizeH) % 2) resizeH--; - assert((inputW - resizeW) % 2 == 0); - assert((inputH - resizeH) % 2 == 0); - - m_XOffset = (inputW - resizeW) / 2; - m_YOffset = (inputH - resizeH) / 2; - - assert(2 * m_XOffset + resizeW == inputW); - assert(2 * m_YOffset + resizeH == inputH); - - // resizing - cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC); - // letterboxing - /*cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, - m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128));*/ - // converting to RGB - cv::cvtColor(m_LetterboxImage, m_LetterboxImage,cv::COLOR_BGR2RGB); -} - -void DsImage::addBBox(BBoxInfo box, const std::string& labelName) -{ - m_Bboxes.push_back(box); - const int x = cvRound(box.box.x1); - const int y = cvRound(box.box.y1); - const int w = cvRound(box.box.x2 - box.box.x1); - const int h = cvRound(box.box.y2 - box.box.y1); - const cv::Scalar color - = cv::Scalar(m_RNG.uniform(0, 255), m_RNG.uniform(0, 255), m_RNG.uniform(0, 255)); - - cv::rectangle(m_MarkedImage, cv::Rect(x, y, w, h), color, 1); - const cv::Size tsize - = cv::getTextSize(labelName, cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, 1, nullptr); - cv::rectangle(m_MarkedImage, cv::Rect(x, y, tsize.width + 3, tsize.height + 4), color, -1); - cv::putText(m_MarkedImage, labelName.c_str(), cv::Point(x, y + tsize.height), - cv::FONT_HERSHEY_COMPLEX_SMALL, 0.5, cv::Scalar(255, 255, 255), 1); -} - -void DsImage::showImage() const -{ - cv::namedWindow(m_ImageName); - cv::imshow(m_ImageName.c_str(), m_MarkedImage); - cv::waitKey(0); -} - -void DsImage::saveImageJPEG(const std::string& dirPath) const -{ - cv::imwrite(dirPath + m_ImageName + ".jpeg", m_MarkedImage); -} -std::string DsImage::exportJson() const -{ - if (m_Bboxes.size() == 0) return ""; - std::stringstream json; - json.precision(2); - json << std::fixed; - for (uint32_t i = 0; i < m_Bboxes.size(); ++i) - { - json << "\n{\n"; - json << " \"image_id\" : " << std::stoi(m_ImageName) << ",\n"; - json << " \"category_id\" : " << m_Bboxes.at(i).classId << ",\n"; - json << " \"bbox\" : "; - json << "[" << m_Bboxes.at(i).box.x1 << ", " << m_Bboxes.at(i).box.y1 << ", "; - json << m_Bboxes.at(i).box.x2 - m_Bboxes.at(i).box.x1 << ", " - << m_Bboxes.at(i).box.y2 - m_Bboxes.at(i).box.y1 << "],\n"; - json << " \"score\" : " << m_Bboxes.at(i).prob << "\n"; - if (i != m_Bboxes.size() - 1) - json << "},"; - else - json << "}"; - } - return json.str(); -} diff --git a/src/Detector/tensorrt_yolo/ds_image.h b/src/Detector/tensorrt_yolo/ds_image.h deleted file mode 100644 index 268d202c0..000000000 --- a/src/Detector/tensorrt_yolo/ds_image.h +++ /dev/null @@ -1,67 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ -#ifndef __IMAGE_H__ -#define __IMAGE_H__ - -#include "trt_utils.h" - -struct BBoxInfo; - -class DsImage -{ -public: - DsImage(); - DsImage(const std::string& path, const int& inputH, const int& inputW); - DsImage(const cv::Mat& mat_image_, const int& inputH, const int& inputW); - int getImageHeight() const { return m_Height; } - int getImageWidth() const { return m_Width; } - cv::Mat getLetterBoxedImage() const { return m_LetterboxImage; } - cv::Mat getOriginalImage() const { return m_OrigImage; } - std::string getImageName() const { return m_ImageName; } - void addBBox(BBoxInfo box, const std::string& labelName); - void showImage() const; - void saveImageJPEG(const std::string& dirPath) const; - std::string exportJson() const; - -private: - int m_Height; - int m_Width; - int m_XOffset; - int m_YOffset; - float m_ScalingFactor; - std::string m_ImagePath; - cv::RNG m_RNG; - std::string m_ImageName; - std::vector m_Bboxes; - - // unaltered original Image - cv::Mat m_OrigImage; - // letterboxed Image given to the network as input - cv::Mat m_LetterboxImage; - // final image marked with the bounding boxes - cv::Mat m_MarkedImage; -}; - -#endif diff --git a/src/Detector/tensorrt_yolo/hardswish.cu b/src/Detector/tensorrt_yolo/hardswish.cu deleted file mode 100644 index 088942fd4..000000000 --- a/src/Detector/tensorrt_yolo/hardswish.cu +++ /dev/null @@ -1,162 +0,0 @@ -//sys -#include -#include -#include -#include -#include -#include -#include -#include -//my -#include "hardswish.h" - -#define NV_CUDA_CHECK(status) \ - { \ - if (status != 0) \ - { \ - std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ - << " at line " << __LINE__ << std::endl; \ - abort(); \ - } \ - } - -namespace nvinfer1 -{ - Hardswish::Hardswish() - { - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, 0); - _n_max_thread_pre_block = prop.maxThreadsPerBlock; - } - - Hardswish::Hardswish(const void* data, size_t length) - { - const char *d = reinterpret_cast(data), *a = d; - r(d, _n_max_thread_pre_block); - r(d, _n_output_size); - assert(d == a + length); - } - - Hardswish::~Hardswish() - {} - - __global__ void kernel_hardswish(const float *input_, float *output_, int n_data_size_) - { - int i = threadIdx.x + blockIdx.x * blockDim.x; - if (i >= n_data_size_)return; - if (input_[i] >= 3.0f) - { - output_[i] = input_[i]; - } - else if (input_[i] <= -3.0f) - { - output_[i] = 0.0f; - } - else - { - output_[i] = input_[i] * (input_[i] + 3.0f) / 6.0f; - } - } - - cudaError_t cuda_hardswish_layer(const void* input_, - void* output_, - const int n_batch_size_, - const int n_output_size_, - const int threads_, - cudaStream_t stream_) - { - int n_data_size = n_batch_size_ * n_output_size_; - kernel_hardswish << <(n_data_size + threads_ -1)/threads_, threads_ >> >( - reinterpret_cast(input_), - reinterpret_cast(output_), - n_data_size); - return cudaGetLastError(); - } - - int Hardswish::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) - { - //printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block); - NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size , _n_max_thread_pre_block,stream)); - return 0; - } - - size_t Hardswish::getSerializationSize() const - { - return sizeof(_n_max_thread_pre_block) +sizeof(_n_output_size); - } - - void Hardswish::serialize(void *buffer) const - { - char *d = static_cast(buffer), *a = d; - w(d, _n_max_thread_pre_block); - w(d, _n_output_size); - assert(d == a + getSerializationSize()); - } - - void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) - { - - _n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2]; - // printf("output_size:%d,threads:%d\n", _n_output_size, _n_max_thread_pre_block); - } - IPluginV2IOExt* Hardswish::clone() const - { - Hardswish *p = new Hardswish(); - p->setPluginNamespace(_s_plugin_namespace.c_str()); - p->_n_max_thread_pre_block = _n_max_thread_pre_block; - p->_n_output_size = _n_output_size; - return p; - } - - - // - PluginFieldCollection HardswishPluginCreator::_fc{}; - std::vector HardswishPluginCreator::_vec_plugin_attributes; - - HardswishPluginCreator::HardswishPluginCreator() - { - _vec_plugin_attributes.clear(); - _fc.nbFields = _vec_plugin_attributes.size(); - _fc.fields = _vec_plugin_attributes.data(); - } - - const char* HardswishPluginCreator::getPluginName() const - { - return "HARDSWISH_TRT"; - } - - const char* HardswishPluginCreator::getPluginVersion() const - { - return "1.0"; - } - - const PluginFieldCollection* HardswishPluginCreator::getFieldNames() - { - return &_fc; - } - - IPluginV2IOExt* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) - { - Hardswish* obj = new Hardswish(); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - IPluginV2IOExt* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) - { - Hardswish* obj = new Hardswish(serialData, serialLength); - obj->setPluginNamespace(_s_name_space.c_str()); - return obj; - } - - void HardswishPluginCreator::setPluginNamespace(const char* libNamespace) - { - _s_name_space = libNamespace; - } - - const char* HardswishPluginCreator::getPluginNamespace() const - { - return _s_name_space.c_str(); - } -}//end namespace nvinfer1 diff --git a/src/Detector/tensorrt_yolo/hardswish.h b/src/Detector/tensorrt_yolo/hardswish.h deleted file mode 100644 index 4eb9feafb..000000000 --- a/src/Detector/tensorrt_yolo/hardswish.h +++ /dev/null @@ -1,125 +0,0 @@ - -#ifndef _HARDSWISH_H_ -#define _HARDSWISH_H_ - -#include -#include -#include "NvInfer.h" - -namespace nvinfer1 -{ - template - void w(char*& buffer, const T& val) - { - *reinterpret_cast(buffer) = val; - buffer += sizeof(T); - } - - template - void r(const char*& buffer, T& val) - { - val = *reinterpret_cast(buffer); - buffer += sizeof(T); - } - - class Hardswish :public IPluginV2IOExt - { - public: - Hardswish(); - Hardswish(const void* data, size_t length); - ~Hardswish(); - int getNbOutputs()const override - { - return 1; - } - Dims getOutputDimensions(int /*index*/, const Dims* inputs, int /*nbInputDims*/) override - { - return inputs[0]; - } - int initialize() override - { - return 0; - } - void terminate() override - { - } - size_t getWorkspaceSize(int /*maxBatchSize*/) const override - { - return 0; - } - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override - { - return "HARDSWISH_TRT"; - } - const char* getPluginVersion() const override - { - return "1.0"; - } - void destroy() override - { - delete this; - } - void setPluginNamespace(const char* pluginNamespace) override - { - _s_plugin_namespace = pluginNamespace; - } - const char* getPluginNamespace() const override - { - return _s_plugin_namespace.c_str(); - } - DataType getOutputDataType(int /*index*/, const nvinfer1::DataType* /*inputTypes*/, int /*nbInputs*/) const override - { - return DataType::kFLOAT; - } - bool isOutputBroadcastAcrossBatch(int /*outputIndex*/, const bool* /*inputIsBroadcasted*/, int /*nbInputs*/) const override - { - return false; - } - bool canBroadcastInputAcrossBatch(int /*inputIndex*/) const override - { - return false; - } - void attachToContext( - cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) override - {} - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override - {} - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int /*nbInputs*/, int /*nbOutputs*/) const override - { - return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; - } - IPluginV2IOExt* clone() const override; - private: - - uint32_t _n_max_thread_pre_block; - uint32_t _n_output_size; - std::string _s_plugin_namespace; - }; //end detect - - class HardswishPluginCreator : public IPluginCreator - { - public: - HardswishPluginCreator(); - ~HardswishPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; - private: - std::string _s_name_space; - static PluginFieldCollection _fc; - static std::vector _vec_plugin_attributes; - };//end detect creator - -}//end namespace nvinfer1 - - - -#endif diff --git a/src/Detector/tensorrt_yolo/kernel.cu b/src/Detector/tensorrt_yolo/kernel.cu deleted file mode 100644 index 81e7f3853..000000000 --- a/src/Detector/tensorrt_yolo/kernel.cu +++ /dev/null @@ -1,65 +0,0 @@ - -#include -#include -#include -#include -#include - -inline __device__ float sigmoidGPU(const float& x) { return 1.0f / (1.0f + __expf(-x)); } - -__global__ void gpuYoloLayerV3(const float* input, float* output, const uint32_t grid_h_, - const uint32_t grid_w_, const uint32_t numOutputClasses, - const uint32_t numBBoxes) -{ - uint32_t x_id = blockIdx.x * blockDim.x + threadIdx.x; - uint32_t y_id = blockIdx.y * blockDim.y + threadIdx.y; - uint32_t z_id = blockIdx.z * blockDim.z + threadIdx.z; - - if ((x_id >= grid_w_) || (y_id >= grid_h_) || (z_id >= numBBoxes)) - { - return; - } - - const int numGridCells = grid_h_ * grid_w_; - const int bbindex = y_id * grid_w_ + x_id; - - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 0)]); - - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 1)]); - - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 2)]); - - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)] - = __expf(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 3)]); - - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + 4)]); - - for (uint32_t i = 0; i < numOutputClasses; ++i) - { - output[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))] - = sigmoidGPU(input[bbindex + numGridCells * (z_id * (5 + numOutputClasses) + (5 + i))]); - } -} - -cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize, - const uint32_t& n_grid_h_,const uint32_t& n_grid_w_, - const uint32_t& numOutputClasses, const uint32_t& numBBoxes, - uint64_t outputSize, cudaStream_t stream) -{ - dim3 threads_per_block(16, 16, 4); - dim3 number_of_blocks((n_grid_w_ / threads_per_block.x) + 1, - (n_grid_h_ / threads_per_block.y) + 1, - (numBBoxes / threads_per_block.z) + 1); - for (int batch = 0; batch < batchSize; ++batch) - { - gpuYoloLayerV3<<>>( - reinterpret_cast(input) + (batch * outputSize), - reinterpret_cast(output) + (batch * outputSize), n_grid_h_, n_grid_w_, numOutputClasses, - numBBoxes); - } - return cudaGetLastError(); -} diff --git a/src/Detector/tensorrt_yolo/mish.cu b/src/Detector/tensorrt_yolo/mish.cu deleted file mode 100644 index d05f609a5..000000000 --- a/src/Detector/tensorrt_yolo/mish.cu +++ /dev/null @@ -1,196 +0,0 @@ -#include -#include -#include -#include -#include "mish.h" - -namespace nvinfer1 -{ - MishPlugin::MishPlugin() - { - } - - MishPlugin::~MishPlugin() - { - } - - // create the plugin at runtime from a byte stream - MishPlugin::MishPlugin(const void* data, size_t length) - { - assert(length == sizeof(input_size_)); - input_size_ = *reinterpret_cast(data); - } - - void MishPlugin::serialize(void* buffer) const - { - *reinterpret_cast(buffer) = input_size_; - } - - size_t MishPlugin::getSerializationSize() const - { - return sizeof(input_size_); - } - - int MishPlugin::initialize() - { - return 0; - } - - Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) - { - assert(nbInputDims == 1); - assert(index == 0); - input_size_ = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; - // Output dimensions - return Dims3(inputs[0].d[0], inputs[0].d[1], inputs[0].d[2]); - } - - // Set plugin namespace - void MishPlugin::setPluginNamespace(const char* pluginNamespace) - { - mPluginNamespace = pluginNamespace; - } - - const char* MishPlugin::getPluginNamespace() const - { - return mPluginNamespace; - } - - // Return the DataType of the plugin output at the requested index - DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const - { - return DataType::kFLOAT; - } - - // Return true if output tensor is broadcast across a batch. - bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const - { - return false; - } - - // Return true if plugin can use input that is broadcast across batch without replication. - bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const - { - return false; - } - - void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) - { - } - - // Attach the plugin object to an execution context and grant the plugin the access to some context resource. - void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) - { - } - - // Detach the plugin object from its execution context. - void MishPlugin::detachFromContext() {} - - const char* MishPlugin::getPluginType() const - { - return "Mish_TRT"; - } - - const char* MishPlugin::getPluginVersion() const - { - return "1"; - } - - void MishPlugin::destroy() - { - delete this; - } - - // Clone the plugin - IPluginV2IOExt* MishPlugin::clone() const - { - MishPlugin *p = new MishPlugin(); - p->input_size_ = input_size_; - p->setPluginNamespace(mPluginNamespace); - return p; - } - - __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} - - __device__ float softplus_kernel(float x, float threshold = 20) { - if (x > threshold) return x; // too large - else if (x < -threshold) return expf(x); // too small - return logf(expf(x) + 1); - } - - __global__ void mish_kernel(const float *input, float *output, int num_elem) { - - int idx = threadIdx.x + blockDim.x * blockIdx.x; - if (idx >= num_elem) return; - - //float t = exp(input[idx]); - //if (input[idx] > 20.0) { - // t *= t; - // output[idx] = (t - 1.0) / (t + 1.0); - //} else { - // float tt = t * t; - // output[idx] = (tt + 2.0 * t) / (tt + 2.0 * t + 2.0); - //} - //output[idx] *= input[idx]; - output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx])); - } - - void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) { - int block_size = thread_count_; - int grid_size = (input_size_ * batchSize + block_size - 1) / block_size; - mish_kernel<<>>(inputs[0], output, input_size_ * batchSize); - } - - int MishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) - { - //assert(batchSize == 1); - //GPU - //CUDA_CHECK(cudaStreamSynchronize(stream)); - forwardGpu((const float *const *)inputs, (float*)outputs[0], stream, batchSize); - return 0; - } - - PluginFieldCollection MishPluginCreator::mFC{}; - std::vector MishPluginCreator::mPluginAttributes; - - MishPluginCreator::MishPluginCreator() - { - mPluginAttributes.clear(); - - mFC.nbFields = mPluginAttributes.size(); - mFC.fields = mPluginAttributes.data(); - } - - const char* MishPluginCreator::getPluginName() const - { - return "Mish_TRT"; - } - - const char* MishPluginCreator::getPluginVersion() const - { - return "1"; - } - - const PluginFieldCollection* MishPluginCreator::getFieldNames() - { - return &mFC; - } - - IPluginV2IOExt* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) - { - MishPlugin* obj = new MishPlugin(); - obj->setPluginNamespace(mNamespace.c_str()); - return obj; - } - - IPluginV2IOExt* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) - { - // This object will be deleted when the network is destroyed, which will - // call MishPlugin::destroy() - MishPlugin* obj = new MishPlugin(serialData, serialLength); - obj->setPluginNamespace(mNamespace.c_str()); - return obj; - } - -} - diff --git a/src/Detector/tensorrt_yolo/mish.h b/src/Detector/tensorrt_yolo/mish.h deleted file mode 100644 index cc1ef68d7..000000000 --- a/src/Detector/tensorrt_yolo/mish.h +++ /dev/null @@ -1,108 +0,0 @@ -#ifndef _MISH_PLUGIN_H -#define _MISH_PLUGIN_H - -#include -#include -#include "NvInfer.h" - - -//https://github.com/wang-xinyu/tensorrtx -namespace nvinfer1 -{ - class MishPlugin: public IPluginV2IOExt - { - public: - explicit MishPlugin(); - MishPlugin(const void* data, size_t length); - - ~MishPlugin(); - - int getNbOutputs() const override - { - return 1; - } - - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; - - int initialize() override; - - virtual void terminate() override {} - - virtual size_t getWorkspaceSize(int /*maxBatchSize*/) const override { return 0;} - - virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; - - virtual size_t getSerializationSize() const override; - - virtual void serialize(void* buffer) const override; - - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int /*nbInputs*/, int /*nbOutputs*/) const override { - return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; - } - - const char* getPluginType() const override; - - const char* getPluginVersion() const override; - - void destroy() override; - - IPluginV2IOExt* clone() const override; - - void setPluginNamespace(const char* pluginNamespace) override; - - const char* getPluginNamespace() const override; - - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; - - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; - - bool canBroadcastInputAcrossBatch(int inputIndex) const override; - - void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; - - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - - void detachFromContext() override; - - int input_size_; - private: - void forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize = 1); - int thread_count_ = 256; - const char* mPluginNamespace; - }; - - class MishPluginCreator : public IPluginCreator - { - public: - MishPluginCreator(); - - ~MishPluginCreator() override = default; - - const char* getPluginName() const override; - - const char* getPluginVersion() const override; - - const PluginFieldCollection* getFieldNames() override; - - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - - void setPluginNamespace(const char* libNamespace) override - { - mNamespace = libNamespace; - } - - const char* getPluginNamespace() const override - { - return mNamespace.c_str(); - } - - private: - std::string mNamespace; - static PluginFieldCollection mFC; - static std::vector mPluginAttributes; - }; -} -#endif diff --git a/src/Detector/tensorrt_yolo/plugin_factory.cpp b/src/Detector/tensorrt_yolo/plugin_factory.cpp deleted file mode 100644 index 0dba7a951..000000000 --- a/src/Detector/tensorrt_yolo/plugin_factory.cpp +++ /dev/null @@ -1,176 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "plugin_factory.h" -#include "trt_utils.h" - -PluginFactory::PluginFactory() : m_ReorgLayer{nullptr}, m_RegionLayer{nullptr} -{ - for (int i = 0; i < m_MaxLeakyLayers; ++i) m_LeakyReLULayers[i] = nullptr; -} - -nvinfer1::IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialData, - size_t serialLength) -{ - assert(isPlugin(layerName)); - if (std::string(layerName).find("leaky") != std::string::npos) - { - assert(m_LeakyReLUCount >= 0 && m_LeakyReLUCount <= m_MaxLeakyLayers); - assert(m_LeakyReLULayers[m_LeakyReLUCount] == nullptr); - /*m_LeakyReLULayers[m_LeakyReLUCount] - = unique_ptr_INvPlugin(nvinfer1::plugin::createPReLUPlugin(serialData, serialLength));*/ - ++m_LeakyReLUCount; - return m_LeakyReLULayers[m_LeakyReLUCount - 1].get(); - } - else if (std::string(layerName).find("reorg") != std::string::npos) - { - assert(m_ReorgLayer == nullptr); - /*m_ReorgLayer = unique_ptr_INvPlugin( - nvinfer1::plugin::createYOLOReorgPlugin(serialData, serialLength));*/ - return m_ReorgLayer.get(); - } - else if (std::string(layerName).find("region") != std::string::npos) - { - assert(m_RegionLayer == nullptr); - /*m_RegionLayer = unique_ptr_INvPlugin( - nvinfer1::plugin::createYOLORegionPlugin(serialData, serialLength));*/ - return m_RegionLayer.get(); - } - else if (std::string(layerName).find("yolo") != std::string::npos) - { - assert(m_YoloLayerCount >= 0 && m_YoloLayerCount < m_MaxYoloLayers); - assert(m_YoloLayers[m_YoloLayerCount] == nullptr); - m_YoloLayers[m_YoloLayerCount] - = unique_ptr_IPlugin(new YoloLayerV3(serialData, serialLength)); - ++m_YoloLayerCount; - return m_YoloLayers[m_YoloLayerCount - 1].get(); - } - else - { - std::cerr << "ERROR: Unrecognised layer : " << layerName << std::endl; - assert(0); - return nullptr; - } -} - -bool PluginFactory::isPlugin(const char* name) -{ - return ((std::string(name).find("leaky") != std::string::npos) - || (std::string(name).find("reorg") != std::string::npos) - || (std::string(name).find("region") != std::string::npos) - || (std::string(name).find("yolo") != std::string::npos)); -} - -void PluginFactory::destroy() -{ - m_ReorgLayer.reset(); - m_RegionLayer.reset(); - - for (int i = 0; i < m_MaxLeakyLayers; ++i) - { - m_LeakyReLULayers[i].reset(); - } - - for (int i = 0; i < m_MaxYoloLayers; ++i) - { - m_YoloLayers[i].reset(); - } - - m_LeakyReLUCount = 0; - m_YoloLayerCount = 0; -} - -/******* Yolo Layer V3 *******/ -/*****************************/ -YoloLayerV3::YoloLayerV3(const void* data, size_t length) -{ - const char *d = static_cast(data), *a = d; - read(d, m_NumBoxes); - read(d, m_NumClasses); - read(d,_n_grid_h); - read(d,_n_grid_w); - read(d, m_OutputSize); - assert(d = a + length); -} - -YoloLayerV3::YoloLayerV3(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_,const uint32_t &grid_w_): - m_NumBoxes(numBoxes), - m_NumClasses(numClasses), - _n_grid_h(grid_h_), - _n_grid_w(grid_w_) -{ - assert(m_NumBoxes > 0); - assert(m_NumClasses > 0); - assert(_n_grid_h > 0); - assert(_n_grid_w > 0); - m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses)); -} - -int YoloLayerV3::getNbOutputs() const { return 1; } - -nvinfer1::Dims YoloLayerV3::getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) -{ - assert(index == 0); - assert(nbInputDims == 1); - return inputs[0]; -} - -void YoloLayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs, - const nvinfer1::Dims* /*outputDims*/, int /*nbOutputs*/, int /*maxBatchSize*/) -{ - assert(nbInputs == 1); - assert(inputDims != nullptr); -} - -int YoloLayerV3::initialize() { return 0; } - -void YoloLayerV3::terminate() {} - -size_t YoloLayerV3::getWorkspaceSize(int maxBatchSize) const { return 0; } - -int YoloLayerV3::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) -{ - NV_CUDA_CHECK(cudaYoloLayerV3(inputs[0], outputs[0], batchSize,_n_grid_h,_n_grid_w, m_NumClasses, - m_NumBoxes, m_OutputSize, stream)); - return 0; -} - -size_t YoloLayerV3::getSerializationSize() -{ - return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w)+sizeof(_n_grid_h) + sizeof(m_OutputSize); -} - -void YoloLayerV3::serialize(void* buffer) -{ - char *d = static_cast(buffer), *a = d; - write(d, m_NumBoxes); - write(d, m_NumClasses); - write(d,_n_grid_h); - write(d,_n_grid_w); - write(d, m_OutputSize); - assert(d == a + getSerializationSize()); -} diff --git a/src/Detector/tensorrt_yolo/plugin_factory.h b/src/Detector/tensorrt_yolo/plugin_factory.h deleted file mode 100644 index 68fb34902..000000000 --- a/src/Detector/tensorrt_yolo/plugin_factory.h +++ /dev/null @@ -1,144 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef __PLUGIN_LAYER_H__ -#define __PLUGIN_LAYER_H__ - -#include -#include -#include -#include -#include - -#include "NvInferPlugin.h" - -#define NV_CUDA_CHECK(status) \ - { \ - if (status != 0) \ - { \ - std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \ - << " at line " << __LINE__ << std::endl; \ - abort(); \ - } \ - } - -// Forward declaration of cuda kernels -cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& batchSize, - const uint32_t& n_grid_h_, const uint32_t& n_grid_w_, - const uint32_t& numOutputClasses, const uint32_t& numBBoxes, - uint64_t outputSize, cudaStream_t stream); - -class PluginFactory : public nvinfer1::IPluginFactory -{ - -public: - PluginFactory(); - nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, - size_t serialLength) override; - bool isPlugin(const char* name); - void destroy(); - -private: - static const int m_MaxLeakyLayers = 72; - static const int m_ReorgStride = 2; - static constexpr float m_LeakyNegSlope = 0.1f; - static const int m_NumBoxes = 5; - static const int m_NumCoords = 4; - static const int m_NumClasses = 80; - static const int m_MaxYoloLayers = 3; - int m_LeakyReLUCount = 0; - int m_YoloLayerCount = 0; - // nvinfer1::plugin::RegionParameters m_RegionParameters{m_NumBoxes, m_NumCoords, m_NumClasses, nullptr}; - - struct INvPluginDeleter - { - void operator()(nvinfer1::plugin::INvPlugin* ptr) - { - if (ptr) - { - ptr->destroy(); - } - } - }; - struct IPluginDeleter - { - void operator()(nvinfer1::IPlugin* ptr) - { - if (ptr) - { - ptr->terminate(); - } - } - }; - typedef std::unique_ptr unique_ptr_INvPlugin; - typedef std::unique_ptr unique_ptr_IPlugin; - - unique_ptr_INvPlugin m_ReorgLayer; - unique_ptr_INvPlugin m_RegionLayer; - unique_ptr_INvPlugin m_LeakyReLULayers[m_MaxLeakyLayers]; - unique_ptr_IPlugin m_YoloLayers[m_MaxYoloLayers]; -}; - -class YoloLayerV3 : public nvinfer1::IPlugin -{ -public: - YoloLayerV3(const void* data, size_t length); - YoloLayerV3(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_,const uint32_t &grid_w_); - int getNbOutputs() const override; - nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; - void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, - int nbOutputs, int maxBatchSize) override; - int initialize() override; - void terminate() override; - size_t getWorkspaceSize(int maxBatchSize) const override; - int enqueue(int batchSize, const void* const* intputs, void** outputs, void* workspace, - cudaStream_t stream) override; - size_t getSerializationSize() override; - void serialize(void* buffer) override; - -private: - template - void write(char*& buffer, const T& val) - { - *reinterpret_cast(buffer) = val; - buffer += sizeof(T); - } - - template - void read(const char*& buffer, T& val) - { - val = *reinterpret_cast(buffer); - buffer += sizeof(T); - } - uint32_t m_NumBoxes; - uint32_t m_NumClasses; - uint32_t m_GridSize; - uint64_t m_OutputSize; - uint32_t _n_grid_h; - uint32_t _n_grid_w; -}; - -#endif // __PLUGIN_LAYER_H__ diff --git a/src/Detector/tensorrt_yolo/trt_utils.cpp b/src/Detector/tensorrt_yolo/trt_utils.cpp deleted file mode 100644 index 484cfbf44..000000000 --- a/src/Detector/tensorrt_yolo/trt_utils.cpp +++ /dev/null @@ -1,1304 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "trt_utils.h" -#include - -#ifdef HAVE_FILESYSTEM -#include -namespace fs = std::filesystem; -#else -#include -namespace fs = std::experimental::filesystem; -#endif - -#include -#include -using namespace nvinfer1; -REGISTER_TENSORRT_PLUGIN(MishPluginCreator); -REGISTER_TENSORRT_PLUGIN(ChunkPluginCreator); -REGISTER_TENSORRT_PLUGIN(HardswishPluginCreator); - -cv::Mat blobFromDsImages(const std::vector& inputImages, - const int& inputH, - const int& inputW) -{ - std::vector letterboxStack; - letterboxStack.reserve(inputImages.size()); - for (uint32_t i = 0; i < inputImages.size(); ++i) - { - letterboxStack.emplace_back(inputImages[i].getLetterBoxedImage()); - } - return cv::dnn::blobFromImages(letterboxStack, 1.0, cv::Size(inputW, inputH), cv::Scalar(0.0, 0.0, 0.0),true); -} - -static void leftTrim(std::string& s) -{ - s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); -} - -static void rightTrim(std::string& s) -{ - s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); -} - -std::string trim(std::string s) -{ - leftTrim(s); - rightTrim(s); - return s; -} - -std::string triml(std::string s,const char* t) -{ - s.erase(0, s.find_first_not_of(t)); - return s; -} -std::string trimr(std::string s, const char* t) -{ - s.erase(s.find_last_not_of(t) + 1); - return s; -} - -float clamp(const float val, const float minVal, const float maxVal) -{ - assert(minVal <= maxVal); - return std::min(maxVal, std::max(minVal, val)); -} - -bool fileExists(const std::string fileName, bool verbose) -{ - if (!fs::exists(fs::path(fileName))) - { - if (verbose) std::cout << "File does not exist : " << fileName << std::endl; - return false; - } - return true; -} - -BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const float& bh, - const uint32_t& stride, const uint32_t& netW, const uint32_t& netH) -{ - BBox b; - // Restore coordinates to network input resolution - float x = bx * stride; - float y = by * stride; - - b.x1 = x - bw / 2; - b.x2 = x + bw / 2; - - b.y1 = y - bh / 2; - b.y2 = y + bh / 2; - - b.x1 = clamp(b.x1, 0.f, static_cast(netW)); - b.x2 = clamp(b.x2, 0.f, static_cast(netW)); - b.y1 = clamp(b.y1, 0.f, static_cast(netH)); - b.y2 = clamp(b.y2, 0.f, static_cast(netH)); - - return b; -} - -void convertBBoxImgRes(const float /*scalingFactor*/, - //const float& xOffset, -// const float& yOffset, - const uint32_t &input_w_, - const uint32_t &input_h_, - const uint32_t &image_w_, - const uint32_t &image_h_, - BBox& bbox) -{ - //// Undo Letterbox - //bbox.x1 -= xOffset; - //bbox.x2 -= xOffset; - //bbox.y1 -= yOffset; - //bbox.y2 -= yOffset; - - //// Restore to input resolution - //bbox.x1 /= scalingFactor; - //bbox.x2 /= scalingFactor; - //bbox.y1 /= scalingFactor; - //bbox.y2 /= scalingFactor; - bbox.x1 = ((float)bbox.x1 / (float)input_w_)*(float)image_w_; - bbox.y1 = ((float)bbox.y1 / (float)input_h_)*(float)image_h_; - bbox.x2 = ((float)bbox.x2 / (float)input_w_)*(float)image_w_; - bbox.y2 = ((float)bbox.y2 / (float)input_h_)*(float)image_h_; -} - -void printPredictions(const BBoxInfo& b, const std::string& className) -{ - std::cout << " label:" << b.label << "(" << className << ")" - << " confidence:" << b.prob << " xmin:" << b.box.x1 << " ymin:" << b.box.y1 - << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl; -} - -std::vector loadListFromTextFile(const std::string filename) -{ - assert(fileExists(filename)); - std::vector list; - - std::ifstream f(filename); - if (!f) - { - std::cout << "failed to open " << filename; - assert(0); - } - - std::string line; - while (std::getline(f, line)) - { - if (line.empty()) - continue; - else - list.push_back(trim(line)); - } - return list; -} - -std::vector loadImageList(const std::string filename, const std::string prefix) -{ - std::vector fileList = loadListFromTextFile(filename); - for (auto& file : fileList) - { - if (fileExists(file, false)) - continue; - else - { - std::string prefixed = prefix + file; - if (fileExists(prefixed, false)) - file = prefixed; - else - std::cerr << "WARNING: couldn't find: " << prefixed - << " while loading: " << filename << std::endl; - } - } - return fileList; -} - - -std::vector nmsAllClasses(const float nmsThresh, - std::vector& binfo, - const uint32_t numClasses, - const std::string &model_type) -{ - std::vector result; - std::vector> splitBoxes(numClasses); - for (auto& box : binfo) - { - splitBoxes.at(box.label).push_back(box); - } - - for (auto& boxes : splitBoxes) - { - if (("yolov5"== model_type)||("yolov4" == model_type)||("yolov4-tiny"== model_type)) - { - boxes = diou_nms(nmsThresh, boxes); - } - else - { - boxes = nonMaximumSuppression(nmsThresh, boxes); - } - result.insert(result.end(), boxes.begin(), boxes.end()); - } - - return result; -} - - -std::vector diou_nms(const float nmsThresh, std::vector binfo) -{ - auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float - { - if (x1min > x2min) - { - std::swap(x1min, x2min); - std::swap(x1max, x2max); - } - return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; - }; - auto computeIoU = [&overlap1D](BBox& bbox1, BBox& bbox2) -> float - { - float overlapX = overlap1D(bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2); - float overlapY = overlap1D(bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2); - float area1 = (bbox1.x2 - bbox1.x1) * (bbox1.y2 - bbox1.y1); - float area2 = (bbox2.x2 - bbox2.x1) * (bbox2.y2 - bbox2.y1); - float overlap2D = overlapX * overlapY; - float u = area1 + area2 - overlap2D; - return u == 0 ? 0 : overlap2D / u; - }; - - //https://arxiv.org/pdf/1911.08287.pdf - auto R = [](BBox &bbox1,BBox &bbox2) ->float - { - float center1_x = (bbox1.x1 + bbox1.x2) / 2.f; - float center1_y = (bbox1.y1 + bbox1.y2) / 2.f; - float center2_x = (bbox2.x1 + bbox2.x2) / 2.f; - float center2_y = (bbox2.y1 + bbox2.y2) / 2.f; - - float d_center = (center1_x - center2_x)* (center1_x - center2_x) - + (center1_y - center2_y)*(center1_y - center2_y); - //smallest_enclosing box - float box_x1 = std::min({ bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2 }); - float box_y1 = std::min({ bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2 }); - float box_x2 = std::max({ bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2 }); - float box_y2 = std::max({ bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2 }); - - float d_diagonal = (box_x1 - box_x2) * (box_x1 - box_x2) + - (box_y1 - box_y2) * (box_y1 - box_y2); - - return d_center / d_diagonal; - }; - std::stable_sort(binfo.begin(), binfo.end(), - [](const BBoxInfo& b1, const BBoxInfo& b2) { return b1.prob > b2.prob; }); - std::vector out; - for (auto& i : binfo) - { - bool keep = true; - for (auto& j : out) - { - if (keep) - { - float overlap = computeIoU(i.box, j.box); - float r = R(i.box, j.box); - keep = (overlap-r) <= nmsThresh; - } - else - break; - } - if (keep) out.push_back(i); - } - return out; -} - - -std::vector nonMaximumSuppression(const float nmsThresh, std::vector binfo) -{ - auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float - { - if (x1min > x2min) - { - std::swap(x1min, x2min); - std::swap(x1max, x2max); - } - return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; - }; - auto computeIoU = [&overlap1D](BBox& bbox1, BBox& bbox2) -> float - { - float overlapX = overlap1D(bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2); - float overlapY = overlap1D(bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2); - float area1 = (bbox1.x2 - bbox1.x1) * (bbox1.y2 - bbox1.y1); - float area2 = (bbox2.x2 - bbox2.x1) * (bbox2.y2 - bbox2.y1); - float overlap2D = overlapX * overlapY; - float u = area1 + area2 - overlap2D; - return u == 0 ? 0 : overlap2D / u; - }; - - std::stable_sort(binfo.begin(), binfo.end(), - [](const BBoxInfo& b1, const BBoxInfo& b2) { return b1.prob > b2.prob; }); - std::vector out; - for (auto& i : binfo) - { - bool keep = true; - for (auto& j : out) - { - if (keep) - { - float overlap = computeIoU(i.box, j.box); - keep = overlap <= nmsThresh; - } - else - break; - } - if (keep) out.push_back(i); - } - return out; -} - -nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, - Logger& logger) -{ - // reading the model in memory - std::cout << "Loading TRT Engine..." << std::endl; - assert(fileExists(planFilePath)); - std::stringstream trtModelStream; - trtModelStream.seekg(0, trtModelStream.beg); - std::ifstream cache(planFilePath,std::ios::binary | std::ios::in); - assert(cache.good()); - trtModelStream << cache.rdbuf(); - cache.close(); - - // calculating model size - trtModelStream.seekg(0, std::ios::end); - const auto modelSize = trtModelStream.tellg(); - trtModelStream.seekg(0, std::ios::beg); - void* modelMem = malloc(modelSize); - trtModelStream.read((char*) modelMem, modelSize); - - nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger); - nvinfer1::ICudaEngine* engine - = runtime->deserializeCudaEngine(modelMem, modelSize, pluginFactory); - free(modelMem); - runtime->destroy(); - std::cout << "Loading Complete!" << std::endl; - - return engine; -} - -//{ -// std::ifstream file(weightsFilePath, std::ios_base::binary); -// assert(file.good()); -// std::string line; -// file.ignore(4); -// char buf[2]; -// file.read(buf, 1); -// if ((int)(unsigned char)buf[0] == 1) -// { -// file.ignore(11); -// } -// else if ((int)(unsigned char)buf[0] == 2) -// { -// file.ignore(15); -// } -//} -std::vector loadWeights(const std::string weightsFilePath, const std::string& /*networkType*/) -{ - assert(fileExists(weightsFilePath)); - std::cout << "Loading pre-trained weights..." << std::endl; - std::ifstream file(weightsFilePath, std::ios_base::binary); - assert(file.good()); - std::string line; - file.ignore(4); - char buf[2]; - file.read(buf, 1); - if ((int)(unsigned char)buf[0] == 1) - { - file.ignore(11); - } - else if ((int)(unsigned char)buf[0] == 2) - { - file.ignore(15); - } - else - { - std::cout << "Invalid network type" << std::endl; - assert(0); - } - - std::vector weights; - char* floatWeight = new char[4]; - while (!file.eof()) - { - file.read(floatWeight, 4); - assert(file.gcount() == 4); - weights.push_back(*reinterpret_cast(floatWeight)); - if (file.peek() == std::istream::traits_type::eof()) break; - } - std::cout << "Loading complete!" << std::endl; - delete[] floatWeight; - - // std::cout << "Total Number of weights read : " << weights.size() << std::endl; - return weights; -} - -std::string dimsToString(const nvinfer1::Dims d) -{ - std::stringstream s; - assert(d.nbDims >= 1); - for (int i = 0; i < d.nbDims - 1; ++i) - { - s << std::setw(4) << d.d[i] << " x"; - } - s << std::setw(4) << d.d[d.nbDims - 1]; - - return s.str(); -} - -void displayDimType(const nvinfer1::Dims d) -{ - std::cout << "(" << d.nbDims << ") "; - for (int i = 0; i < d.nbDims; ++i) - { - switch (d.type[i]) - { - case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break; - case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break; - case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break; - case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break; - } - } - std::cout << std::endl; -} - -int getNumChannels(nvinfer1::ITensor* t) -{ - nvinfer1::Dims d = t->getDimensions(); - assert(d.nbDims == 3); - - return d.d[0]; -} - -uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) -{ - assert(inputDims.nbDims == 3); - return inputDims.d[0] * inputDims.d[1] * inputDims.d[2]; -} - -nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "maxpool"); - assert(block.find("size") != block.end()); - assert(block.find("stride") != block.end()); - - int size = std::stoi(block.at("size")); - int stride = std::stoi(block.at("stride")); - - nvinfer1::IPoolingLayer* pool - = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); - assert(pool); - std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); - int pad = (size - 1) / 2; - pool->setPaddingNd(nvinfer1::DimsHW{pad,pad}); - pool->setStrideNd(nvinfer1::DimsHW{stride, stride}); - pool->setName(maxpoolLayerName.c_str()); - - return pool; -} - -nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, int& weightPtr, - int& inputChannels, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "convolutional"); - assert(block.find("batch_normalize") == block.end()); - assert(block.at("activation") == "linear"); - assert(block.find("filters") != block.end()); - assert(block.find("pad") != block.end()); - assert(block.find("size") != block.end()); - assert(block.find("stride") != block.end()); - - int filters = std::stoi(block.at("filters")); - int padding = std::stoi(block.at("pad")); - int kernelSize = std::stoi(block.at("size")); - int stride = std::stoi(block.at("stride")); - int pad; - if (padding) - pad = (kernelSize - 1) / 2; - else - pad = 0; - // load the convolution layer bias - nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters}; - float* val = new float[filters]; - for (int i = 0; i < filters; ++i) - { - val[i] = weights[weightPtr]; - weightPtr++; - } - convBias.values = val; - trtWeights.push_back(convBias); - // load the convolutional layer weights - int size = filters * inputChannels * kernelSize * kernelSize; - nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; - val = new float[size]; - for (int i = 0; i < size; ++i) - { - val[i] = weights[weightPtr]; - weightPtr++; - } - convWt.values = val; - trtWeights.push_back(convWt); - nvinfer1::IConvolutionLayer* conv = network->addConvolution( - *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); - assert(conv != nullptr); - std::string convLayerName = "conv_" + std::to_string(layerIdx); - conv->setName(convLayerName.c_str()); - conv->setStride(nvinfer1::DimsHW{stride, stride}); - conv->setPadding(nvinfer1::DimsHW{pad, pad}); - - return conv; -} - -nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, - std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& weightPtr, - int& inputChannels, - nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "convolutional"); - assert(block.find("batch_normalize") != block.end()); - assert(block.at("batch_normalize") == "1"); - assert(block.at("activation") == "mish"); - assert(block.find("filters") != block.end()); - assert(block.find("pad") != block.end()); - assert(block.find("size") != block.end()); - assert(block.find("stride") != block.end()); - - bool batchNormalize, bias; - if (block.find("batch_normalize") != block.end()) - { - batchNormalize = (block.at("batch_normalize") == "1"); - bias = false; - } - else - { - batchNormalize = false; - bias = true; - } - // all conv_bn_leaky layers assume bias is false - assert(batchNormalize == true && bias == false); - - int filters = std::stoi(block.at("filters")); - int padding = std::stoi(block.at("pad")); - int kernelSize = std::stoi(block.at("size")); - int stride = std::stoi(block.at("stride")); - int pad; - if (padding) - pad = (kernelSize - 1) / 2; - else - pad = 0; - - /***** CONVOLUTION LAYER *****/ - /*****************************/ - // batch norm weights are before the conv layer - // load BN biases (bn_biases) - std::vector bnBiases; - for (int i = 0; i < filters; ++i) - { - bnBiases.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN weights - std::vector bnWeights; - for (int i = 0; i < filters; ++i) - { - bnWeights.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN running_mean - std::vector bnRunningMean; - for (int i = 0; i < filters; ++i) - { - bnRunningMean.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN running_var - std::vector bnRunningVar; - for (int i = 0; i < filters; ++i) - { - // 1e-05 for numerical stability - bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); - weightPtr++; - } - // load Conv layer weights (GKCRS) - int size = filters * inputChannels * kernelSize * kernelSize; - nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; - float* val = new float[size]; - for (int i = 0; i < size; ++i) - { - val[i] = weights[weightPtr]; - weightPtr++; - } - convWt.values = val; - trtWeights.push_back(convWt); - nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; - trtWeights.push_back(convBias); - nvinfer1::IConvolutionLayer* conv = network->addConvolution( - *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias); - assert(conv != nullptr); - std::string convLayerName = "conv_" + std::to_string(layerIdx); - conv->setName(convLayerName.c_str()); - conv->setStride(nvinfer1::DimsHW{ stride, stride }); - conv->setPadding(nvinfer1::DimsHW{ pad, pad }); - - /***** BATCHNORM LAYER *****/ - /***************************/ - size = filters; - // create the weights - nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size }; - nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size }; - nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size }; - float* shiftWt = new float[size]; - for (int i = 0; i < size; ++i) - { - shiftWt[i] - = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); - } - shift.values = shiftWt; - float* scaleWt = new float[size]; - for (int i = 0; i < size; ++i) - { - scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; - } - scale.values = scaleWt; - float* powerWt = new float[size]; - for (int i = 0; i < size; ++i) - { - powerWt[i] = 1.0; - } - power.values = powerWt; - trtWeights.push_back(shift); - trtWeights.push_back(scale); - trtWeights.push_back(power); - // Add the batch norm layers - nvinfer1::IScaleLayer* bn = network->addScale( - *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); - bn->setName(bnLayerName.c_str()); - /***** ACTIVATION LAYER *****/ - /****************************/ - auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1"); - const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); - nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData); - nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) }; - auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj); - return mish; -} - -nvinfer1::ILayer * layer_split(const int n_layer_index_, - nvinfer1::ITensor *input_, - nvinfer1::INetworkDefinition* network) -{ - auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0"); - const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); - nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData); - auto chunk = network->addPluginV2(&input_, 1, *pluginObj); - return chunk; -} - -std::vector parse_int_list(const std::string s_args_) -{ - std::string s_args = s_args_; - std::vector vec_args; - while (!s_args.empty()) - { - auto npos = s_args.find_first_of(','); - if (npos != std::string::npos) - { - int v = std::stoi(trim(s_args.substr(0, npos))); - vec_args.push_back(v); - s_args.erase(0, npos + 1); - } - else - { - int v = std::stoi(trim(s_args)); - vec_args.push_back(v); - break; - } - } - return vec_args; -} - - - -std::vector dims2chw(const nvinfer1::Dims d) -{ - std::vector chw; - assert(d.nbDims >= 1); - for (int i = 0; i < d.nbDims; ++i) - { - chw.push_back(d.d[i]); - } - return chw; -} - -nvinfer1::ILayer* layer_bottleneck(std::vector &trtWeights_, - std::string s_layer_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int c2_, - bool shortcut_ = true, - const int gouup_ = 1, - const float e_ = 0.5) -{ - int c_ = int(c2_*e_); - auto cv1 = layer_conv_bn_act(trtWeights_,s_layer_name_ + ".cv1", map_wts_, input_, network_, c_, 1, 1); - auto cv2 = layer_conv_bn_act(trtWeights_,s_layer_name_ + ".cv2", map_wts_, cv1->getOutput(0), network_, c2_, 3, 1,gouup_); - if (shortcut_) - { - nvinfer1::IElementWiseLayer* ew - = network_->addElementWise(*input_, - *cv2->getOutput(0), - nvinfer1::ElementWiseOperation::kSUM); - return ew; - } - else - { - return cv2; - } -} - -nvinfer1::ILayer * layer_concate(nvinfer1::ITensor** concatInputs, - const int n_size_, - const int n_axis_, - nvinfer1::INetworkDefinition* network_) -{ - nvinfer1::IConcatenationLayer* concat - = network_->addConcatenation(concatInputs, n_size_); - assert(concat != nullptr); -// concat->setAxis(n_axis_); - return concat; -} -nvinfer1::ILayer * layer_bn(std::vector &trtWeights_, - const std::string s_layer_name_, - std::map>&map_wts_,//conv-bn - nvinfer1::ITensor* input_, - const int n_filters_, - nvinfer1::INetworkDefinition* network_) -{ - std::vector bn_wts = map_wts_[s_layer_name_ + ".bn.weight"]; - std::vector bn_bias = map_wts_[s_layer_name_ + ".bn.bias"]; - std::vector bn_mean = map_wts_[s_layer_name_ + ".bn.running_mean"]; - std::vector bn_var = map_wts_[s_layer_name_ + ".bn.running_var"]; - assert(bn_wts.size() == n_filters_); - assert(bn_bias.size() == n_filters_); - assert(bn_mean.size() == n_filters_); - assert(bn_var.size() == n_filters_); - for (int i = 0; i < n_filters_; ++i) - { - bn_var[i] = sqrt(bn_var[i] + 1.0e-5f); - } - //float bn_num_batches_tracked = map_wts_[s_layer_name_ + ".bn.num_batches_tracked.weight"][0]; - // create the weights - nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - float* shiftWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - shiftWt[i] - = bn_bias.at(i) - ((bn_mean.at(i) * bn_wts.at(i)) / bn_var.at(i)); - } - shift.values = shiftWt; - float* scaleWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - scaleWt[i] = bn_wts.at(i) / bn_var[i]; - } - scale.values = scaleWt; - float* powerWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - powerWt[i] = 1.0; - } - power.values = powerWt; - // Add the batch norm layers - auto bn = network_->addScale(*input_, nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - trtWeights_.push_back(shift); - trtWeights_.push_back(scale); - trtWeights_.push_back(power); - return bn; -} - -nvinfer1::ILayer * layer_act(nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const std::string s_act_) -{ - if (s_act_ == "leaky") - { - auto act = network_->addActivation(*input_, nvinfer1::ActivationType::kLEAKY_RELU); - act->setAlpha(0.1); - assert(act != nullptr); - return act; - } - else if (s_act_ == "hardswish") - { - nvinfer1::IPluginV2 *hardswish_plugin = new nvinfer1::Hardswish(); - auto act = network_->addPluginV2(&input_, 1, *hardswish_plugin); - assert(act != nullptr); - return act; - } - return nullptr; -} - -nvinfer1::ILayer * layer_conv(std::vector &trtWeights_, - const std::string s_layer_name_, - std::map>&map_wts_,//conv-bn - nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const int n_filters_, - const int n_kernel_size_, - const int n_stride_, - const bool b_bias_, - const int group_ , - const bool b_padding_) -{ - int pad = b_padding_ ? ((n_kernel_size_ - 1) / 2) : 0; - std::vector chw = dims2chw(input_->getDimensions()); - - //conv - int size = n_filters_ * chw[0] * n_kernel_size_ * n_kernel_size_; - nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; - float *conv_wts = new float[size]; - std::vector &vec_wts = map_wts_[s_layer_name_ + ".weight"]; - for (int i = 0; i < size; ++i) - { - conv_wts[i] = vec_wts[i]; - } - assert(size == (map_wts_[s_layer_name_ + ".weight"].size())); - convWt.values = conv_wts; - nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; - if (b_bias_) - { - int size_bias = n_filters_; - float *conv_bias = new float[size_bias]; - std::vector &vec_bias = map_wts_[s_layer_name_ + ".bias"]; - for (int i = 0; i < size_bias; ++i) - { - conv_bias[i] = vec_bias[i]; - } - assert(size_bias == vec_bias.size()); - convBias.values = conv_bias; - convBias.count = size_bias; - } - nvinfer1::IConvolutionLayer* conv = network_->addConvolutionNd( - *input_, - n_filters_, - nvinfer1::DimsHW{ n_kernel_size_, n_kernel_size_ }, - convWt, - convBias); - assert(conv != nullptr); - conv->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); - conv->setStrideNd(nvinfer1::DimsHW{ n_stride_ ,n_stride_ }); - if (!b_bias_) - { - conv->setNbGroups(group_); - } - trtWeights_.push_back(convWt); - trtWeights_.push_back(convBias); - return conv; -} - -nvinfer1::ILayer * layer_bottleneck_csp(std::vector &trtWeights_, - std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int c2_, - const int n_depth_, - const bool b_short_cut_ , - const int group_ , - const float e_ ) -{ - std::vector chw=dims2chw(input_->getDimensions()); - //int c1 = dims2chw(input_->getDimensions())[0]; - int c1 = chw[0]; - int c_ = int(c2_*0.5); - //cv1 - auto out = layer_conv_bn_act(trtWeights_, s_model_name_ +".cv1", map_wts_, input_, network_, c_, 1); - //m - for (int d = 0; d < n_depth_; ++d) - { - std::string m_name = s_model_name_ + ".m." + std::to_string(d); - out = layer_bottleneck(trtWeights_, m_name, map_wts_, network_, out->getOutput(0), c_, b_short_cut_, group_, 1.f); - } - //cv3 - auto cv3 = layer_conv(trtWeights_, s_model_name_ + ".cv3", map_wts_, out->getOutput(0), network_, c_, 1); - //cv2 - auto cv2 = layer_conv(trtWeights_, s_model_name_ + ".cv2", map_wts_, input_, network_, c_, 1); - //concate - nvinfer1::ITensor** concatInputs - = reinterpret_cast(malloc(sizeof(nvinfer1::ITensor*) *2)); - concatInputs[0] = cv3->getOutput(0); - concatInputs[1] = cv2->getOutput(0); - auto cat = layer_concate(concatInputs, 2, 0,network_); - auto bn = layer_bn(trtWeights_, s_model_name_, map_wts_, cat->getOutput(0), 2 * c_, network_); - auto act = layer_act(bn->getOutput(0), network_); - //cv4 - auto cv4 = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv4", map_wts_, act->getOutput(0), network_, c2_, 1); - return cv4; -} - -nvinfer1::ILayer * layer_spp(std::vector &trtWeights_, - std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int c2_, - const std::vector &vec_args_) -{ - std::vector chw=dims2chw(input_->getDimensions()); - int c1 = chw[0];//dims2chw(input_->getDimensions())[0]; - int c_ = c1 / 2; - nvinfer1::ILayer * x = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv1", map_wts_, input_, network_, c_, 1); - nvinfer1::ITensor** concatInputs - = reinterpret_cast(malloc(sizeof(nvinfer1::ITensor*) * (vec_args_.size()+1))); - concatInputs[0] = x->getOutput(0); - for (int ind = 0; ind < vec_args_.size(); ++ind) - { - nvinfer1::IPoolingLayer* pool - = network_->addPoolingNd(*x->getOutput(0), - nvinfer1::PoolingType::kMAX, - nvinfer1::DimsHW{ vec_args_[ind], vec_args_[ind] }); - assert(pool); - int pad = vec_args_[ind] / 2; - pool->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); - pool->setStrideNd(nvinfer1::DimsHW{1, 1}); - concatInputs[ind + 1] = pool->getOutput(0); - } - nvinfer1::IConcatenationLayer* concat = network_->addConcatenation(concatInputs, static_cast(vec_args_.size()+1)); - //concat->setAxis(0); - assert(concat != nullptr); - nvinfer1::ILayer *cv2 = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv2", map_wts_, concat->getOutput(0), network_, c2_, 1); - assert(cv2 != nullptr); - return cv2; -} - - -nvinfer1::ILayer *layer_upsample(std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int n_scale_) -{ - std::vector chw=dims2chw(input_->getDimensions()); - int c1 = chw[0];//dims2chw(input_->getDimensions())[0]; - float *deval = new float[c1*n_scale_*n_scale_]; - for (int i = 0; i < c1*n_scale_*n_scale_; i++) - { - deval[i] = 1.0; - } - nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale_*n_scale_ }; - nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 }; - IDeconvolutionLayer* upsample = network_->addDeconvolutionNd(*input_,c1, DimsHW{ n_scale_, n_scale_ }, wts, bias); - upsample->setStrideNd(DimsHW{ n_scale_, n_scale_ }); - upsample->setNbGroups(c1); - return upsample; -} - -nvinfer1::ILayer * layer_conv_bn_act(std::vector &trtWeights_, - const std::string s_layer_name_, - std::map>&map_wts_,//conv-bn - nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const int n_filters_, - const int n_kernel_size_, - const int n_stride_, - const int group_, - const bool b_padding_, - const bool b_bn_, - const std::string s_act_) -{ - int pad = b_padding_ ? ((n_kernel_size_ - 1) / 2) : 0; - std::vector chw = dims2chw(input_->getDimensions()); - - //conv - nvinfer1::ILayer *conv = layer_conv(trtWeights_, s_layer_name_ + ".conv", map_wts_, input_, network_, n_filters_, n_kernel_size_, n_stride_); - nvinfer1::ILayer* bn = layer_bn(trtWeights_, s_layer_name_, map_wts_, conv->getOutput(0), n_filters_, network_); - nvinfer1::ILayer * act = layer_act(bn->getOutput(0), network_,s_act_); - return act; -} - - - -nvinfer1::ILayer* layer_focus(std::vector &trtWeights_, - std::string s_model_name_, - std::map>& map_wts_, - nvinfer1::ITensor* input, - const int out_channels_, - const int kernel_size_, - std::vector& trtWeights, - nvinfer1::INetworkDefinition* network) -{ - std::vector chw = dims2chw(input->getDimensions()); - ISliceLayer *s1 = network->addSlice(*input, Dims3{ 0, 0, 0 }, Dims3{ chw[0], chw[1] / 2, chw[2] / 2 }, Dims3{ 1, 2, 2 }); - ISliceLayer *s2 = network->addSlice(*input, Dims3{ 0, 1, 0 }, Dims3{ chw[0], chw[1] / 2, chw[2] / 2 }, Dims3{ 1, 2, 2 }); - ISliceLayer *s3 = network->addSlice(*input, Dims3{ 0, 0, 1 }, Dims3{ chw[0], chw[1] / 2, chw[2] / 2 }, Dims3{ 1, 2, 2 }); - ISliceLayer *s4 = network->addSlice(*input, Dims3{ 0, 1, 1 }, Dims3{ chw[0], chw[1] / 2, chw[2] / 2 }, Dims3{ 1, 2, 2 }); - ITensor* inputTensors[] = { s1->getOutput(0), s2->getOutput(0), s3->getOutput(0), s4->getOutput(0) }; - auto cat = network->addConcatenation(inputTensors, 4); - auto cat_out = cat->getOutput(0); - auto out = layer_conv_bn_act(trtWeights_, - s_model_name_ +".conv", - map_wts_, - cat_out, - network, - out_channels_, - kernel_size_); - return out; -} - - -nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, - std::map& block, - std::vector& weights, - std::vector& trtWeights, int& weightPtr, - int& inputChannels, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "convolutional"); - assert(block.find("batch_normalize") != block.end()); - assert(block.at("batch_normalize") == "1"); - assert(block.at("activation") == "leaky"); - assert(block.find("filters") != block.end()); - assert(block.find("pad") != block.end()); - assert(block.find("size") != block.end()); - assert(block.find("stride") != block.end()); - - bool batchNormalize, bias; - if (block.find("batch_normalize") != block.end()) - { - batchNormalize = (block.at("batch_normalize") == "1"); - bias = false; - } - else - { - batchNormalize = false; - bias = true; - } - // all conv_bn_leaky layers assume bias is false - assert(batchNormalize == true && bias == false); - - int filters = std::stoi(block.at("filters")); - int padding = std::stoi(block.at("pad")); - int kernelSize = std::stoi(block.at("size")); - int stride = std::stoi(block.at("stride")); - int pad; - if (padding) - pad = (kernelSize - 1) / 2; - else - pad = 0; - - /***** CONVOLUTION LAYER *****/ - /*****************************/ - // batch norm weights are before the conv layer - // load BN biases (bn_biases) - std::vector bnBiases; - for (int i = 0; i < filters; ++i) - { - bnBiases.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN weights - std::vector bnWeights; - for (int i = 0; i < filters; ++i) - { - bnWeights.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN running_mean - std::vector bnRunningMean; - for (int i = 0; i < filters; ++i) - { - bnRunningMean.push_back(weights[weightPtr]); - weightPtr++; - } - // load BN running_var - std::vector bnRunningVar; - for (int i = 0; i < filters; ++i) - { - // 1e-05 for numerical stability - bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); - weightPtr++; - } - // load Conv layer weights (GKCRS) - int size = filters * inputChannels * kernelSize * kernelSize; - nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* val = new float[size]; - for (int i = 0; i < size; ++i) - { - val[i] = weights[weightPtr]; - weightPtr++; - } - convWt.values = val; - trtWeights.push_back(convWt); - nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; - trtWeights.push_back(convBias); - nvinfer1::IConvolutionLayer* conv = network->addConvolution( - *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); - assert(conv != nullptr); - std::string convLayerName = "conv_" + std::to_string(layerIdx); - conv->setName(convLayerName.c_str()); - conv->setStride(nvinfer1::DimsHW{stride, stride}); - conv->setPadding(nvinfer1::DimsHW{pad, pad}); - - /***** BATCHNORM LAYER *****/ - /***************************/ - size = filters; - // create the weights - nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; - nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; - nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* shiftWt = new float[size]; - for (int i = 0; i < size; ++i) - { - shiftWt[i] - = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); - } - shift.values = shiftWt; - float* scaleWt = new float[size]; - for (int i = 0; i < size; ++i) - { - scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; - } - scale.values = scaleWt; - float* powerWt = new float[size]; - for (int i = 0; i < size; ++i) - { - powerWt[i] = 1.0; - } - power.values = powerWt; - trtWeights.push_back(shift); - trtWeights.push_back(scale); - trtWeights.push_back(power); - // Add the batch norm layers - nvinfer1::IScaleLayer* bn = network->addScale( - *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); - bn->setName(bnLayerName.c_str()); - /***** ACTIVATION LAYER *****/ - /****************************/ - auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU); - leaky->setAlpha(0.1f); - /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1); - assert(leakyRELU != nullptr); - nvinfer1::ITensor* bnOutput = bn->getOutput(0); - nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/ - assert(leaky != nullptr); - std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); - leaky->setName(leakyLayerName.c_str()); - - return leaky; -} - -nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, - std::vector& /*weights*/, - std::vector& trtWeights, int& /*inputChannels*/, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) -{ - assert(block.at("type") == "upsample"); - nvinfer1::Dims inpDims = input->getDimensions(); - assert(inpDims.nbDims == 3); - // assert(inpDims.d[1] == inpDims.d[2]); - int n_scale = std::stoi(block.at("stride")); - - int c1 = inpDims.d[0]; - float *deval = new float[c1*n_scale*n_scale]; - for (int i = 0; i < c1*n_scale*n_scale; i++) - { - deval[i] = 1.0; - } - nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale }; - nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 }; - IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias); - upsample->setStrideNd(DimsHW{ n_scale, n_scale }); - upsample->setNbGroups(c1); - return upsample; - - #if 0 -// add pre multiply matrix as a constant - nvinfer1::Dims preDims{3, - {1, stride * h, w}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - int size = stride * h * w; - nvinfer1::Weights preMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* preWt = new float[size]; - /* (2*h * w) - [ [1, 0, ..., 0], - [1, 0, ..., 0], - [0, 1, ..., 0], - [0, 1, ..., 0], - ..., - ..., - [0, 0, ..., 1], - [0, 0, ..., 1] ] - */ - for (int i = 0, idx = 0; i < h; ++i) - { - for (int s = 0; s < stride; ++s) - { - for (int j = 0; j < w; ++j, ++idx) - { - preWt[idx] = (i == j) ? 1.0f : 0.0f; - } - } - } - preMul.values = preWt; - trtWeights.push_back(preMul); - nvinfer1::IConstantLayer* preM = network->addConstant(preDims, preMul); - assert(preM != nullptr); - std::string preLayerName = "preMul_" + std::to_string(layerIdx); - preM->setName(preLayerName.c_str()); - // add post multiply matrix as a constant - nvinfer1::Dims postDims{3, - {1, h, stride * w}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - size = stride * h * w; - nvinfer1::Weights postMul{nvinfer1::DataType::kFLOAT, nullptr, size}; - float* postWt = new float[size]; - /* (h * 2*w) - [ [1, 1, 0, 0, ..., 0, 0], - [0, 0, 1, 1, ..., 0, 0], - ..., - ..., - [0, 0, 0, 0, ..., 1, 1] ] - */ - for (int i = 0, idx = 0; i < h; ++i) - { - for (int j = 0; j < stride * w; ++j, ++idx) - { - postWt[idx] = (j / stride == i) ? 1.0f : 0.0f; - } - } - postMul.values = postWt; - trtWeights.push_back(postMul); - nvinfer1::IConstantLayer* post_m = network->addConstant(postDims, postMul); - assert(post_m != nullptr); - std::string postLayerName = "postMul_" + std::to_string(layerIdx); - post_m->setName(postLayerName.c_str()); - // add matrix multiply layers for upsampling - nvinfer1::IMatrixMultiplyLayer* mm1 - = network->addMatrixMultiply(*preM->getOutput(0), nvinfer1::MatrixOperation::kNONE, *input, - nvinfer1::MatrixOperation::kNONE); - assert(mm1 != nullptr); - std::string mm1LayerName = "mm1_" + std::to_string(layerIdx); - mm1->setName(mm1LayerName.c_str()); - nvinfer1::IMatrixMultiplyLayer* mm2 - = network->addMatrixMultiply(*mm1->getOutput(0), nvinfer1::MatrixOperation::kNONE, - *post_m->getOutput(0), nvinfer1::MatrixOperation::kNONE); - assert(mm2 != nullptr); - std::string mm2LayerName = "mm2_" + std::to_string(layerIdx); - mm2->setName(mm2LayerName.c_str()); - return mm2; -#endif -} - -void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, - std::string layerOutput, std::string weightPtr) -{ - std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName; - std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left - << layerOutput; - std::cout << std::setw(6) << std::left << weightPtr << std::endl; -} diff --git a/src/Detector/tensorrt_yolo/trt_utils.h b/src/Detector/tensorrt_yolo/trt_utils.h deleted file mode 100644 index 5366536bd..000000000 --- a/src/Detector/tensorrt_yolo/trt_utils.h +++ /dev/null @@ -1,267 +0,0 @@ - -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef __TRT_UTILS_H__ -#define __TRT_UTILS_H__ - -/* OpenCV headers */ -//#include -#include -#include -#include -#include -#include - -#include "mish.h" -#include "chunk.h" -#include "hardswish.h" -#include -#include -#include -#include "NvInfer.h" - -#include "ds_image.h" -#include "plugin_factory.h" -//#include "logging.h" -class DsImage; -struct BBox -{ - float x1, y1, x2, y2; -}; - -struct BBoxInfo -{ - BBox box; - int label; - int classId; // For coco benchmarking - float prob; -}; - -class Logger : public nvinfer1::ILogger -{ -public: - Logger(Severity severity = Severity::kWARNING) - { - severity = severity; - } - - ~Logger() - { - - } - nvinfer1::ILogger& getTRTLogger() - { - return *this; - } - - void log(nvinfer1::ILogger::Severity severity, const char* msg) override - { - // suppress info-level messages - if (severity == Severity::kINFO) return; - - switch (severity) - { - case Severity::kINTERNAL_ERROR: std::cerr << "INTERNAL_ERROR: " << msg << std::endl; break; - case Severity::kERROR: std::cerr << "ERROR: " << msg << std::endl; break; - case Severity::kWARNING: std::cerr << "WARNING: " << msg << std::endl; break; - case Severity::kINFO: std::cerr << "INFO: " << msg << std::endl; break; - case Severity::kVERBOSE: break; - // default: std::cerr <<"UNKNOW:"<< msg << std::endl;break; - } - } -}; - -class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula -{ - -private: - std::set m_SamePaddingLayers; - - nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize, - nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, - nvinfer1::DimsHW /*dilation*/, const char* layerName) const override - { - // assert(inputDims.d[0] == inputDims.d[1]); - assert(kernelSize.d[0] == kernelSize.d[1]); - assert(stride.d[0] == stride.d[1]); - assert(padding.d[0] == padding.d[1]); - - int output_h, output_w; - // Only layer maxpool_12 makes use of same padding - if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end()) - { - output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0]; - output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1]; - } - // Valid Padding - else - { - output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1; - output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1; - } - return nvinfer1::DimsHW{output_h, output_w}; - } - -public: - void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); } -}; - -// Common helper functions -cv::Mat blobFromDsImages(const std::vector& inputImages, const int& inputH, - const int& inputW); -std::string trim(std::string s); -std::string triml(std::string s, const char* t); -std::string trimr(std::string s, const char* t); -float clamp(const float val, const float minVal, const float maxVal); -bool fileExists(const std::string fileName, bool verbose = true); -BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const float& bh, - const uint32_t& stride, const uint32_t& netW, const uint32_t& netH); -void convertBBoxImgRes(const float scalingFactor, - //const float& xOffset, - // const float& yOffset, - const uint32_t &input_w_, - const uint32_t &input_h_, - const uint32_t &image_w_, - const uint32_t &image_h_, - BBox& bbox); -void printPredictions(const BBoxInfo& info, const std::string& className); -std::vector loadListFromTextFile(const std::string filename); -std::vector loadImageList(const std::string filename, const std::string prefix); -std::vector diou_nms(const float numThresh, std::vector binfo); -std::vector nmsAllClasses(const float nmsThresh, std::vector& binfo, - const uint32_t numClasses, const std::string &model_type); -std::vector nonMaximumSuppression(const float nmsThresh, std::vector binfo); -nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, - Logger& logger); -std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); -std::string dimsToString(const nvinfer1::Dims d); -void displayDimType(const nvinfer1::Dims d); -int getNumChannels(nvinfer1::ITensor* t); -uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); - -// Helper functions to create yolo engine -nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); -nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, int& weightPtr, - int& inputChannels, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, - std::map& block, - std::vector& weights, - std::vector& trtWeights, - int& weightPtr, - int& inputChannels, - nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); - -nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, int& weightPtr, - int& inputChannels, nvinfer1::ITensor* input, - nvinfer1::INetworkDefinition* network); -nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, int& inputChannels, - nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); -void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, - std::string layerOutput, std::string weightPtr); - -nvinfer1::ILayer * layer_split(const int n_layer_index_, - nvinfer1::ITensor *input_, - nvinfer1::INetworkDefinition* network); - -std::vector parse_int_list(const std::string s_args_); - -nvinfer1::ILayer* layer_focus(std::vector &trtWeights_, - std::string s_model_name_, - std::map>& map_wts_, - nvinfer1::ITensor* input, - const int out_channels_, - const int kernel_size_, - std::vector& trtWeights, - nvinfer1::INetworkDefinition* network); - -nvinfer1::ILayer * layer_conv_bn_act(std::vector &trtWeights_, - const std::string s_layer_name_, - std::map> &vec_wts_,//conv-bn - nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const int n_filters_, - const int n_kernel_size_ = 3, - const int n_stride_ = 1, - const int group_ =1, - const bool b_padding_ = true, - const bool b_bn_ = true, - const std::string s_act_ = "hardswish"); - -nvinfer1::ILayer * layer_act(nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const std::string s_act_ = "hardswish"); - -nvinfer1::ILayer * layer_bottleneck_csp(std::vector &trtWeights_, - std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int c2_, - const int n_depth_ = 1, - const bool b_short_cut_ = true, - const int group_ = 1, - const float e_ = 0.5); - -nvinfer1::ILayer * layer_spp(std::vector &trtWeights_, - std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int c2_, - const std::vector &vec_args_); - -nvinfer1::ILayer *layer_upsample(std::string s_model_name_, - std::map> &map_wts_, - nvinfer1::INetworkDefinition* network_, - nvinfer1::ITensor* input_, - const int n_scale_); - -nvinfer1::ILayer * layer_conv(std::vector &trtWeights_, - const std::string s_layer_name_, - std::map>&vec_wts_,//conv-bn - nvinfer1::ITensor* input_, - nvinfer1::INetworkDefinition* network_, - const int n_filters_, - const int n_kernel_size_, - const int n_stride_ = 1, - const bool b_bias_ = false, - const int group_ = 1, - const bool b_padding_ = true); -std::vector dims2chw(const nvinfer1::Dims d); - - - -#endif diff --git a/src/Detector/tensorrt_yolo/yolo.cpp b/src/Detector/tensorrt_yolo/yolo.cpp deleted file mode 100644 index 6a20e270a..000000000 --- a/src/Detector/tensorrt_yolo/yolo.cpp +++ /dev/null @@ -1,1266 +0,0 @@ -#include "yolo.h" -#include -#include -#include -#include -#include -#include - -using namespace nvinfer1; -REGISTER_TENSORRT_PLUGIN(DetectPluginCreator); - -Yolo::Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams) : - m_NetworkType(networkInfo.networkType), - m_ConfigFilePath(networkInfo.configFilePath), - m_WtsFilePath(networkInfo.wtsFilePath), - m_LabelsFilePath(networkInfo.labelsFilePath), - m_Precision(networkInfo.precision), - m_DeviceType(networkInfo.deviceType), - m_CalibImages(inferParams.calibImages), - m_CalibImagesFilePath(inferParams.calibImagesPath), - m_CalibTableFilePath(networkInfo.calibrationTablePath), - m_InputBlobName(networkInfo.inputBlobName), - m_InputH(0), - m_InputW(0), - m_InputC(0), - m_InputSize(0), - m_ProbThresh(inferParams.probThresh), - m_NMSThresh(inferParams.nmsThresh), - m_PrintPerfInfo(inferParams.printPerfInfo), - m_PrintPredictions(inferParams.printPredictionInfo), - m_BatchSize(inferParams.batchSize), - m_Logger(Logger()), - m_Network(nullptr), - m_Builder(nullptr), - m_ModelStream(nullptr), - m_Engine(nullptr), - m_Context(nullptr), - m_InputBindingIndex(-1), - m_CudaStream(nullptr), - m_PluginFactory(new PluginFactory), - m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula), - _n_yolo_ind(0) -{ - // m_ClassNames = loadListFromTextFile(m_LabelsFilePath); - - m_configBlocks = parseConfigFile(m_ConfigFilePath); - if (m_NetworkType == "yolov5") - { - parse_cfg_blocks_v5(m_configBlocks); - } - else - { - parseConfigBlocks(); - } - m_EnginePath = networkInfo.data_path + "-" + m_Precision + "-batch" + std::to_string(m_BatchSize) + ".engine"; - if (m_Precision == "kFLOAT") - { - if ("yolov5" == m_NetworkType) - { - - create_engine_yolov5(); - } - else - { - createYOLOEngine(); - } - } - else if (m_Precision == "kINT8") - { - Int8EntropyCalibrator calibrator(m_BatchSize, m_CalibImages, m_CalibImagesFilePath, - m_CalibTableFilePath, m_InputSize, m_InputH, m_InputW, - m_InputBlobName); - if ("yolov5" == m_NetworkType) - { - create_engine_yolov5(nvinfer1::DataType::kINT8, &calibrator); - } - else - { - createYOLOEngine(nvinfer1::DataType::kINT8, &calibrator); - } - } - else if (m_Precision == "kHALF") - { - if ("yolov5" == m_NetworkType) - { - create_engine_yolov5(nvinfer1::DataType::kHALF, nullptr); - } - else - { - createYOLOEngine(nvinfer1::DataType::kHALF, nullptr); - } - } - else - { - std::cout << "Unrecognized precision type " << m_Precision << std::endl; - assert(0); - } - - assert(m_PluginFactory != nullptr); - m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger); - assert(m_Engine != nullptr); - m_Context = m_Engine->createExecutionContext(); - assert(m_Context != nullptr); - m_InputBindingIndex = m_Engine->getBindingIndex(m_InputBlobName.c_str()); - assert(m_InputBindingIndex != -1); - assert(m_BatchSize <= static_cast(m_Engine->getMaxBatchSize())); - allocateBuffers(); - NV_CUDA_CHECK(cudaStreamCreate(&m_CudaStream)); - assert(verifyYoloEngine()); -} - -Yolo::~Yolo() -{ - for (auto& tensor : m_OutputTensors) NV_CUDA_CHECK(cudaFreeHost(tensor.hostBuffer)); - for (auto& deviceBuffer : m_DeviceBuffers) NV_CUDA_CHECK(cudaFree(deviceBuffer)); - NV_CUDA_CHECK(cudaStreamDestroy(m_CudaStream)); - if (m_Context) - { - m_Context->destroy(); - m_Context = nullptr; - } - - if (m_Engine) - { - m_Engine->destroy(); - m_Engine = nullptr; - } - - if (m_PluginFactory) - { - m_PluginFactory->destroy(); - m_PluginFactory = nullptr; - } - - m_TinyMaxpoolPaddingFormula.reset(); -} - -std::vector split_layer_index(const std::string &s_,const std::string &delimiter_) -{ - std::vector index; - std::string s = s_; - size_t pos = 0; - std::string token; - while ((pos = s.find(delimiter_)) != std::string::npos) - { - token = s.substr(0, pos); - index.push_back(std::stoi(trim(token))); - s.erase(0, pos + delimiter_.length()); - } - index.push_back(std::stoi(trim(s))); - return index; -} - -void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator) -{ - if (fileExists(m_EnginePath))return; - std::vector weights = loadWeights(m_WtsFilePath, m_NetworkType); - std::vector trtWeights; - int weightPtr = 0; - int channels = m_InputC; - m_Builder = nvinfer1::createInferBuilder(m_Logger); - nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); - m_Network = m_Builder->createNetworkV2(0U); - if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) - || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) - { - std::cout << "Platform doesn't support this precision." << std::endl; - assert(0); - } - - nvinfer1::ITensor* data = m_Network->addInput( - m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, - nvinfer1::DimsCHW{static_cast(m_InputC), static_cast(m_InputH), - static_cast(m_InputW)}); - assert(data != nullptr); - // Add elementwise layer to normalize pixel values 0-1 - nvinfer1::Dims divDims{ - 3, - {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; - nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr, - static_cast(m_InputSize)}; - float* divWt = new float[m_InputSize]; - for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0; - divWeights.values = divWt; - trtWeights.push_back(divWeights); - nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights); - assert(constDivide != nullptr); - nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise( - *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV); - assert(elementDivide != nullptr); - - nvinfer1::ITensor* previous = elementDivide->getOutput(0); - std::vector tensorOutputs; - uint32_t outputTensorCount = 0; - - if (/*"yolov3" == m_NetworkType || */"yolov3-tiny" == m_NetworkType) - { - // Set the output dimensions formula for pooling layers - assert(m_TinyMaxpoolPaddingFormula && "Tiny maxpool padding formula not created"); - m_Network->setPoolingOutputDimensionsFormula(m_TinyMaxpoolPaddingFormula.get()); - } - - // build the network using the network API - for (uint32_t i = 0; i < m_configBlocks.size(); ++i) - { - // check if num. of channels is correct - assert(getNumChannels(previous) == channels); - std::string layerIndex = "(" + std::to_string(i) + ")"; - - if (m_configBlocks.at(i).at("type") == "net") - { - printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); - } - else if (m_configBlocks.at(i).at("type") == "convolutional") - { - std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out; - std::string layerType; - //check activation - std::string activation = ""; - if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end()) - { - activation = m_configBlocks[i]["activation"]; - } - // check if batch_norm enabled - if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && - ("leaky" == activation)) - { - out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, - channels, previous, m_Network); - layerType = "conv-bn-leaky"; - } - else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && - ("mish" == activation)) - { - out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, - channels, previous, m_Network); - layerType = "conv-bn-mish"; - } - else// if("linear" == activation) - { - out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, - channels, previous, m_Network); - layerType = "conv-linear"; - } - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); - } - else if (m_configBlocks.at(i).at("type") == "shortcut") - { - assert(m_configBlocks.at(i).at("activation") == "linear"); - assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end()); - int from = stoi(m_configBlocks.at(i).at("from")); - - std::string inputVol = dimsToString(previous->getDimensions()); - // check if indexes are correct - assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); - assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); - assert(i + from - 1 < i - 2); - nvinfer1::IElementWiseLayer* ew - = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1], - nvinfer1::ElementWiseOperation::kSUM); - assert(ew != nullptr); - std::string ewLayerName = "shortcut_" + std::to_string(i); - ew->setName(ewLayerName.c_str()); - previous = ew->getOutput(0); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(ew->getOutput(0)); - printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); - } - else if (m_configBlocks.at(i).at("type") == "yolo") - { - nvinfer1::Dims prevTensorDims = previous->getDimensions(); - // assert(prevTensorDims.d[1] == prevTensorDims.d[2]); - TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); - curYoloTensor.gridSize = prevTensorDims.d[1]; - curYoloTensor.grid_h = prevTensorDims.d[1]; - curYoloTensor.grid_w = prevTensorDims.d[2]; - curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; - curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h; - curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w; - m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h - * curYoloTensor.grid_w - * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); - std::string layerName = "yolo_" + std::to_string(outputTensorCount); - curYoloTensor.blobName = layerName; - nvinfer1::IPlugin* yoloPlugin - = new YoloLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, - m_OutputTensors.at(outputTensorCount).numClasses, - m_OutputTensors.at(outputTensorCount).grid_h, - m_OutputTensors.at(outputTensorCount).grid_w); - assert(yoloPlugin != nullptr); - nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin); - assert(yolo != nullptr); - yolo->setName(layerName.c_str()); - std::string inputVol = dimsToString(previous->getDimensions()); - previous = yolo->getOutput(0); - assert(previous != nullptr); - previous->setName(layerName.c_str()); - std::string outputVol = dimsToString(previous->getDimensions()); - m_Network->markOutput(*previous); - channels = getNumChannels(previous); - tensorOutputs.push_back(yolo->getOutput(0)); - printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); - ++outputTensorCount; - } - else if (m_configBlocks.at(i).at("type") == "route") - { - size_t found = m_configBlocks.at(i).at("layers").find(","); - if (found != std::string::npos)//concate multi layers - { - std::vector vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ","); - for (auto &ind_layer:vec_index) - { - if (ind_layer < 0) - { - ind_layer = static_cast(tensorOutputs.size()) + ind_layer; - } - assert(ind_layer < static_cast(tensorOutputs.size()) && ind_layer >= 0); - } - nvinfer1::ITensor** concatInputs - = reinterpret_cast(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size())); - for (size_t ind = 0; ind < vec_index.size(); ++ind) - { - concatInputs[ind] = tensorOutputs[vec_index[ind]]; - } - nvinfer1::IConcatenationLayer* concat - = m_Network->addConcatenation(concatInputs, static_cast(vec_index.size())); - assert(concat != nullptr); - std::string concatLayerName = "route_" + std::to_string(i - 1); - concat->setName(concatLayerName.c_str()); - // concatenate along the channel dimension - concat->setAxis(0); - previous = concat->getOutput(0); - assert(previous != nullptr); - //nvinfer1::Dims debug = previous->getDimensions(); - std::string outputVol = dimsToString(previous->getDimensions()); - int nums = 0; - for (auto &indx:vec_index) - { - nums += getNumChannels(tensorOutputs[indx]); - } - channels = nums; - tensorOutputs.push_back(concat->getOutput(0)); - printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr)); - } - else //single layer - { - int idx = std::stoi(trim(m_configBlocks.at(i).at("layers"))); - if (idx < 0) - { - idx = static_cast(tensorOutputs.size()) + idx; - } - assert(idx < static_cast(tensorOutputs.size()) && idx >= 0); - - //route - if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end()) - { - previous = tensorOutputs[idx]; - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - // set the output volume depth - channels = getNumChannels(tensorOutputs[idx]); - tensorOutputs.push_back(tensorOutputs[idx]); - printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); - - } - //yolov4-tiny route split layer - else - { - if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end()) - { - assert(0); - } - int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id"))); - nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network); - std::string inputVol = dimsToString(previous->getDimensions()); - previous = out->getOutput(chunk_idx); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(chunk_idx)); - printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr)); - } - } - } - else if (m_configBlocks.at(i).at("type") == "upsample") - { - std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights, - channels, previous, m_Network); - previous = out->getOutput(0); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); - } - else if (m_configBlocks.at(i).at("type") == "maxpool") - { - // Add same padding layers - if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1") - { - m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i)); - } - std::string inputVol = dimsToString(previous->getDimensions()); - nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network); - previous = out->getOutput(0); - assert(previous != nullptr); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); - } - else - { - std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\"" - << std::endl; - assert(0); - } - } - - if (static_cast(weights.size()) != weightPtr) - { - std::cout << "Number of unused weights left : " << static_cast(weights.size()) - weightPtr << std::endl; - assert(0); - } - - // std::cout << "Output blob names :" << std::endl; - // for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl; - - // Create and cache the engine if not already present - if (fileExists(m_EnginePath)) - { - std::cout << "Using previously generated plan file located at " << m_EnginePath - << std::endl; - destroyNetworkUtils(trtWeights); - return; - } - - /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType - << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ - - m_Builder->setMaxBatchSize(m_BatchSize); - //m_Builder->setMaxWorkspaceSize(1 << 20); - - config->setMaxWorkspaceSize(1 << 20); - if (dataType == nvinfer1::DataType::kINT8) - { - assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision"); - // m_Builder->setInt8Mode(true); - config->setFlag(nvinfer1::BuilderFlag::kINT8); - // m_Builder->setInt8Calibrator(calibrator); - config->setInt8Calibrator(calibrator); - } - else if (dataType == nvinfer1::DataType::kHALF) - { - config->setFlag(nvinfer1::BuilderFlag::kFP16); - // m_Builder->setHalf2Mode(true); - } - - m_Builder->allowGPUFallback(true); - int nbLayers = m_Network->getNbLayers(); - int layersOnDLA = 0; - // std::cout << "Total number of layers: " << nbLayers << std::endl; - for (int i = 0; i < nbLayers; i++) - { - nvinfer1::ILayer* curLayer = m_Network->getLayer(i); - if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) - { - m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); - layersOnDLA++; - std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; - } - } - // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; - - // Build the engine - std::cout << "Building the TensorRT Engine..." << std::endl; - m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config); - assert(m_Engine != nullptr); - std::cout << "Building complete!" << std::endl; - - // Serialize the engine - writePlanFileToDisk(); - - // destroy - destroyNetworkUtils(trtWeights); -} - -int make_division(const float f_in_, const int n_divisor_) -{ - return ceil(f_in_ / n_divisor_)*n_divisor_; -} - -void parse_bottleneck_args(const std::string s_args_, int &n_out_ch_, bool &b_shourt_cut_) -{ - std::string s_args = s_args_; - while (!s_args.empty()) - { - auto npos = s_args.find_first_of(','); - if (npos != std::string::npos) - { - n_out_ch_ = std::stoi(trim(s_args.substr(0, npos))); - s_args.erase(0, npos + 1); - } - else - { - try - { - n_out_ch_ = std::stoi(trim(s_args.substr(0, npos))); - } - catch (const std::exception&) - { - - } - if ("False" == trim(s_args)) - { - b_shourt_cut_ = false; - } - else if ("True" == trim(s_args)) - { - b_shourt_cut_ = true; - } - break; - } - } -} - -void parse_spp_args(const std::string s_args_, int &n_filters_, std::vector &vec_k_) -{ - std::string s_args = s_args_; - vec_k_.clear(); - size_t pos = 0; - std::string token; - std::string delimiter = ","; - bool w = 0; - while ((pos = s_args.find(delimiter)) != std::string::npos) - { - token = s_args.substr(0, pos); - if (!w) - { - n_filters_ = std::stoi(triml(trim(token), "[")); - w = true; - } - else - { - vec_k_.push_back(std::stoi(triml(trim(token), "["))); - } - s_args.erase(0, pos + delimiter.length()); - } - vec_k_.push_back(std::stoi(triml(trim(s_args), "]"))); -} - -std::vector parse_str_list(const std::string s_args_) -{ - std::string s_args = s_args_; - std::vector vec_args; - while (!s_args.empty()) - { - auto npos = s_args.find_first_of(','); - if (npos != std::string::npos) - { - std::string v =trimr( triml(trim(s_args.substr(0, npos)),"'"),"'"); - vec_args.push_back(v); - s_args.erase(0, npos + 1); - } - else - { - std::string v =trimr( triml(trim(s_args.substr(0, npos)),"'"),"'"); - vec_args.push_back(v); - break; - } - } - return vec_args; -} - -void parse_upsample(const std::string s_args_, int &n_filters_) -{ - std::string s_args = s_args_; - size_t pos = 0; - std::string token; - std::string delimiter = ","; - while ((pos = s_args.find(delimiter)) != std::string::npos) - { - token = s_args.substr(0, pos); - try - { - n_filters_ = std::stoi(trim(token)); - } - catch (const std::exception&) - { - - } - s_args.erase(0, pos + delimiter.length()); - } -} - -float round_f(const float in_, const int precision_) -{ - float out; - std::stringstream ss; - ss << std::setprecision(precision_) << in_; - ss >> out; - return out; -} - -void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, - Int8EntropyCalibrator* calibrator ) -{ - if (fileExists(m_EnginePath))return; - std::map> model_wts; - load_weights_v5(m_WtsFilePath, model_wts); - std::vector trtWeights; - int channels = m_InputC; - m_Builder = nvinfer1::createInferBuilder(m_Logger); - nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); - m_Network = m_Builder->createNetworkV2(0U); - if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) - || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) - { - std::cout << "Platform doesn't support this precision." << std::endl; - assert(0); - } - nvinfer1::ITensor* data = m_Network->addInput( - m_InputBlobName.c_str(), - nvinfer1::DataType::kFLOAT, - nvinfer1::DimsCHW{ static_cast(m_InputC), static_cast(m_InputH), - static_cast(m_InputW) }); - assert(data != nullptr); - // Add elementwise layer to normalize pixel values 0-1 - nvinfer1::Dims divDims{ - 3, - { static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW) }, - { nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL } }; - - nvinfer1::Weights divWeights{ nvinfer1::DataType::kFLOAT, - nullptr, - static_cast(m_InputSize) }; - float* divWt = new float[m_InputSize]; - for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0; - divWeights.values = divWt; - trtWeights.push_back(divWeights); - nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights); - assert(constDivide != nullptr); - nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise( - *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV); - assert(elementDivide != nullptr); - - nvinfer1::ITensor* previous = elementDivide->getOutput(0); - std::vector tensorOutputs; - int n_output = 3 * (_n_classes + 5); - for (uint32_t i = 0; i < m_configBlocks.size(); ++i) - { - assert(getNumChannels(previous) == channels); - std::string layerIndex = "(" + std::to_string(i) + ")"; - - if ("net" == m_configBlocks.at(i).at("type") ) - { - printLayerInfo("", "layer", " inp_size", " out_size",""); - } - else if ("Focus" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - std::vector args = parse_int_list(m_configBlocks[i]["args"]); - int filters = args[0]; - int kernel_size = args[1]; - filters = (n_output != filters) ? make_division(filters*_f_width_multiple, 8) : filters; - nvinfer1::ILayer* out = layer_focus(trtWeights, - "model." + std::to_string(i - 1), - model_wts, - previous, - filters, - kernel_size, - trtWeights, - m_Network); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex,"Focus", inputVol, outputVol, ""); - }//end focus - else if ("Conv" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - std::vector args = parse_int_list(m_configBlocks[i]["args"]); - int filters = args[0]; - int kernel_size = args[1]; - int stride = args[2]; - int n_out_channel = (n_output != filters) ? make_division(filters*_f_width_multiple, 8) : filters; - nvinfer1::ILayer * out = layer_conv_bn_act(trtWeights, - "model."+std::to_string(i-1), model_wts, previous, m_Network, n_out_channel, kernel_size, stride); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "Conv", inputVol, outputVol, ""); - }//end Conv - else if ("BottleneckCSP" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - int filters = 0; - bool short_cut =true; - int number = std::stoi(m_configBlocks[i]["number"]); - parse_bottleneck_args(m_configBlocks[i]["args"], filters, short_cut); - int n_out_channel = (n_output != filters) ? make_division(filters*_f_width_multiple, 8) : filters; - int n_depth = (number > 1) ? (std::max(int(round(_f_depth_multiple *number)), 1)) : number; - std::string s_model_name = "model." + std::to_string(i- 1); - auto out = layer_bottleneck_csp(trtWeights,s_model_name, model_wts, m_Network, previous, n_out_channel, n_depth, short_cut); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "BottleneckCSP", inputVol, outputVol, ""); - }// bottleneckCSP - else if ("SPP" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - int filters = 0; - std::vector vec_k; - parse_spp_args(m_configBlocks[i]["args"], filters, vec_k); - int n_out_channel = (n_output != filters) ? make_division(filters*_f_width_multiple, 8) : filters; - std::string s_model_name = "model." + std::to_string(i- 1); - auto out = layer_spp(trtWeights, s_model_name, model_wts, m_Network, previous, n_out_channel, vec_k); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "SPP", inputVol, outputVol, ""); - }//end SPP - else if ("nn.Upsample" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - int scale = 0; - parse_upsample(m_configBlocks[i]["args"], scale); - std::string s_model_name = "model." + std::to_string(i - 1); - auto out = layer_upsample(s_model_name, model_wts, m_Network, previous, scale); - previous = out->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(out->getOutput(0)); - printLayerInfo(layerIndex, "Upsample", inputVol, outputVol, ""); - }//end upsample - else if ("Concat" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - int n_dimension = std::stoi(m_configBlocks[i]["args"]); - std::vector vec_from = parse_int_list(m_configBlocks[i]["from"]); - for (auto &f:vec_from) - { - f = f < 0 ? (f + i-1) : f; - } - nvinfer1::ITensor** concat_tensor - = reinterpret_cast(malloc(sizeof(nvinfer1::ITensor*) * vec_from.size() )); - for (size_t j = 0; j < vec_from.size(); ++j) - { - concat_tensor[j] = tensorOutputs[vec_from[j]]; - } - nvinfer1::IConcatenationLayer* concat - =m_Network->addConcatenation(concat_tensor, static_cast(vec_from.size())); - concat->setAxis(n_dimension-1); - assert(concat != nullptr); - previous = concat->getOutput(0); - assert(previous != nullptr); - channels = getNumChannels(previous); - std::string outputVol = dimsToString(previous->getDimensions()); - tensorOutputs.push_back(concat->getOutput(0)); - printLayerInfo(layerIndex, "Concat", inputVol, outputVol, ""); - }//end concat - else if ("Detect" == m_configBlocks.at(i).at("type")) - { - std::string inputVol = dimsToString(previous->getDimensions()); - std::vector vec_from = parse_int_list(m_configBlocks[i]["from"]); - for (auto &f : vec_from) - { - f = f < 0 ? (f + i - 1) : f; - } - std::vector vec_args = parse_str_list(m_configBlocks[i]["args"]); - std::string s_model_name = "model." + std::to_string(i - 1); - for (size_t ind_from = 0; ind_from < vec_from.size(); ++ind_from) - { - int n_filters = (5 + _n_classes) * 3; - int from = vec_from[ind_from]; - auto conv = layer_conv(trtWeights, s_model_name+".m."+std::to_string(ind_from), - model_wts, tensorOutputs[from], m_Network, n_filters,1,1,true); - - auto tensor_conv = conv->getOutput(0); - TensorInfo& curYoloTensor = m_OutputTensors.at(ind_from); - std::vector chw = dims2chw(tensor_conv->getDimensions()); - curYoloTensor.grid_h = chw[1]; - curYoloTensor.grid_w = chw[2]; - curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h; - curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w; - m_OutputTensors.at(ind_from).volume = curYoloTensor.grid_h - * curYoloTensor.grid_w - * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); - std::string layerName = "yolo_" + std::to_string(ind_from); - curYoloTensor.blobName = layerName; - /*auto creator = getPluginRegistry()->getPluginCreator("DETECT_TRT", "1.0"); - const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); - nvinfer1::IPluginV2 *yoloPlugin = creator->createPlugin(("detect" + std::to_string(ind_from)).c_str(), pluginData);*/ - nvinfer1::IPluginV2 *yoloPlugin = new nvinfer1::Detect(curYoloTensor.numBBoxes, - curYoloTensor.numClasses, - curYoloTensor.grid_h, - curYoloTensor.grid_w); - assert(yoloPlugin != nullptr); - auto yolo = m_Network->addPluginV2(&tensor_conv, 1, *yoloPlugin); - assert(yolo != nullptr); - - yolo->setName(layerName.c_str()); - inputVol = dimsToString(tensorOutputs[from]->getDimensions()); - previous = yolo->getOutput(0); - assert(previous != nullptr); - previous->setName(layerName.c_str()); - std::string outputVol = dimsToString(previous->getDimensions()); - m_Network->markOutput(*yolo->getOutput(0)); - channels = getNumChannels(yolo->getOutput(0)); - tensorOutputs.push_back(yolo->getOutput(0)); - printLayerInfo(layerIndex, "detect"+std::to_string(ind_from), inputVol, outputVol, ""); - } - }//end detect - else - { - std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\"" - << std::endl; - assert(0); - } - } - if (fileExists(m_EnginePath)) - { - std::cout << "Using previously generated plan file located at " << m_EnginePath - << std::endl; - destroyNetworkUtils(trtWeights); - return; - } - - /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType - << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ - - m_Builder->setMaxBatchSize(m_BatchSize); - config->setMaxWorkspaceSize(1<<20); - if (dataType == nvinfer1::DataType::kINT8) - { - assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision"); - // m_Builder->setInt8Mode(true); - config->setFlag(nvinfer1::BuilderFlag::kINT8); - // m_Builder->setInt8Calibrator(calibrator); - config->setInt8Calibrator(calibrator); - } - else if (dataType == nvinfer1::DataType::kHALF) - { - config->setFlag(nvinfer1::BuilderFlag::kFP16); - // m_Builder->setHalf2Mode(true); - } - - m_Builder->allowGPUFallback(true); - int nbLayers = m_Network->getNbLayers(); - int layersOnDLA = 0; - // std::cout << "Total number of layers: " << nbLayers << std::endl; - for (int i = 0; i < nbLayers; i++) - { - nvinfer1::ILayer* curLayer = m_Network->getLayer(i); - if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) - { - m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); - layersOnDLA++; - std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; - } - } - // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; - - // Build the engine - std::cout << "Building the TensorRT Engine..." << std::endl; - m_Engine = m_Builder->buildEngineWithConfig(*m_Network, *config); - assert(m_Engine != nullptr); - std::cout << "Building complete!" << std::endl; - - // Serialize the engine - writePlanFileToDisk(); - - // destroy - destroyNetworkUtils(trtWeights); -} - -void Yolo::load_weights_v5(const std::string s_weights_path_, - std::map> &vec_wts_) -{ - vec_wts_.clear(); - assert(fileExists(s_weights_path_)); - std::cout << "Loading pre-trained weights..." << std::endl; - std::ifstream file(s_weights_path_, std::ios_base::binary); - assert(file.good()); - std::string line; - while (std::getline(file,line)) - { - if(line.size()==0)continue; - std::stringstream iss(line); - std::string wts_name; - iss >> wts_name ; - std::vector weights; - uint32_t n_str; - while(iss >> std::hex >> n_str) - { - weights.push_back(reinterpret_cast(n_str)); - } - vec_wts_[wts_name] = weights; - } - std::cout << "Loading complete!" << std::endl; -} -void Yolo::doInference(const unsigned char* input, const uint32_t batchSize) -{ - //Timer timer; - assert(batchSize <= m_BatchSize && "Image batch size exceeds TRT engines batch size"); - NV_CUDA_CHECK(cudaMemcpyAsync(m_DeviceBuffers.at(m_InputBindingIndex), input, - batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice, - m_CudaStream)); - - m_Context->enqueue(batchSize, m_DeviceBuffers.data(), m_CudaStream, nullptr); - for (auto& tensor : m_OutputTensors) - { - NV_CUDA_CHECK(cudaMemcpyAsync(tensor.hostBuffer, m_DeviceBuffers.at(tensor.bindingIndex), - batchSize * tensor.volume * sizeof(float), - cudaMemcpyDeviceToHost, m_CudaStream)); - } - cudaStreamSynchronize(m_CudaStream); - //timer.out("inference"); -} - -std::vector Yolo::decodeDetections(const int& imageIdx, - const int& imageH, - const int& imageW) -{ - std::vector binfo; - for (auto& tensor : m_OutputTensors) - { - std::vector curBInfo = decodeTensor(imageIdx, imageH, imageW, tensor); - binfo.insert(binfo.end(), curBInfo.begin(), curBInfo.end()); - } - return binfo; -} - -std::vector> Yolo::parseConfigFile(const std::string cfgFilePath) -{ - assert(fileExists(cfgFilePath)); - std::ifstream file(cfgFilePath); - assert(file.good()); - std::string line; - std::vector> blocks; - std::map block; - - while (getline(file, line)) - { - if (line.size() == 0) continue; - if (line.front() == '#') continue; - line = trim(line); - if (line.front() == '[') - { - if (block.size() > 0) - { - blocks.push_back(block); - block.clear(); - } - std::string key = "type"; - std::string value = trim(line.substr(1, line.size() - 2)); - block.insert(std::pair(key, value)); - } - else - { - size_t cpos = line.find('='); - std::string key = trim(line.substr(0, cpos)); - std::string value = trim(line.substr(cpos + 1)); - block.insert(std::pair(key, value)); - } - } - blocks.push_back(block); - return blocks; -} - -void Yolo::parseConfigBlocks() -{ - for (auto block : m_configBlocks) - { - if (block.at("type") == "net") - { - assert((block.find("height") != block.end()) - && "Missing 'height' param in network cfg"); - assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); - assert((block.find("channels") != block.end()) - && "Missing 'channels' param in network cfg"); - - m_InputH = std::stoul(block.at("height")); - m_InputW = std::stoul(block.at("width")); - m_InputC = std::stoul(block.at("channels")); - // assert(m_InputW == m_InputH); - m_InputSize = m_InputC * m_InputH * m_InputW; - } - else if ((block.at("type") == "region") || (block.at("type") == "yolo")) - { - assert((block.find("num") != block.end()) - && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); - assert((block.find("classes") != block.end()) - && std::string("Missing 'classes' param in " + block.at("type") + " layer") - .c_str()); - assert((block.find("anchors") != block.end()) - && std::string("Missing 'anchors' param in " + block.at("type") + " layer") - .c_str()); - - TensorInfo outputTensor; - std::string anchorString = block.at("anchors"); - while (!anchorString.empty()) - { - size_t npos = anchorString.find_first_of(','); - if (npos != std::string::npos) - { - float anchor = std::stof(trim(anchorString.substr(0, npos))); - outputTensor.anchors.push_back(anchor); - anchorString.erase(0, npos + 1); - } - else - { - float anchor = std::stof(trim(anchorString)); - outputTensor.anchors.push_back(anchor); - break; - } - } - - if ((m_NetworkType == "yolov3") || - (m_NetworkType == "yolov3-tiny") || - (m_NetworkType == "yolov4") || - (m_NetworkType == "yolov4-tiny")) - { - assert((block.find("mask") != block.end()) - && std::string("Missing 'mask' param in " + block.at("type") + " layer") - .c_str()); - - std::string maskString = block.at("mask"); - while (!maskString.empty()) - { - size_t npos = maskString.find_first_of(','); - if (npos != std::string::npos) - { - uint32_t mask = std::stoul(trim(maskString.substr(0, npos))); - outputTensor.masks.push_back(mask); - maskString.erase(0, npos + 1); - } - else - { - uint32_t mask = std::stoul(trim(maskString)); - outputTensor.masks.push_back(mask); - break; - } - } - } - - outputTensor.numBBoxes = outputTensor.masks.size() > 0 - ? outputTensor.masks.size() - : std::stoul(trim(block.at("num"))); - outputTensor.numClasses = std::stoul(block.at("classes")); - if (m_ClassNames.empty()) - { - for (uint32_t i=0;i< outputTensor.numClasses;++i) - { - m_ClassNames.push_back(std::to_string(i)); - } - } - outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind); - outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind); - outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind); - outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind); - if (m_NetworkType == "yolov4")//pan - { - outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); - outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); - outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind); - } - outputTensor.stride = m_InputH / outputTensor.gridSize; - outputTensor.stride_h = m_InputH / outputTensor.grid_h; - outputTensor.stride_w = m_InputW / outputTensor.grid_w; - outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w - *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); - m_OutputTensors.push_back(outputTensor); - _n_yolo_ind++; - } - } -} - -void Yolo::parse_cfg_blocks_v5(const std::vector> &vec_block_) -{ - std::vector vec_anchors; - for (const auto &block : vec_block_) - { - if ("net" == block.at("type")) - { - assert((block.find("height") != block.end()) - && "Missing 'height' param in network cfg"); - assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); - assert((block.find("nc") != block.end()) - && "Missing 'nc' param in network cfg"); - assert((block.find("depth_multiple") != block.end()) - && "Missing 'depth_multiple' param in network cfg"); - assert((block.find("width_multiple") != block.end()) - && "Missing 'width_multiple' param in network cfg"); - assert((block.find("anchors") != block.end()) - && "Missing 'anchors' param in network cfg"); - assert((block.find("channels") != block.end()) - && "Missing 'channels' param in network cfg"); - - m_InputH = std::stoul(trim(block.at("height"))); - m_InputW = std::stoul(trim(block.at("width"))); - m_InputC = std::stoul(trim(block.at("channels"))); - m_BatchSize = std::stoi(trim(block.at("batch"))); - _f_depth_multiple = std::stof(trim(block.at("depth_multiple"))); - _f_width_multiple = std::stof(trim(block.at("width_multiple"))); - _n_classes = std::stoi(trim(block.at("nc"))); - m_InputSize = m_InputC * m_InputH * m_InputW; - std::string anchorString = block.at("anchors"); - while (!anchorString.empty()) - { - auto npos = anchorString.find_first_of(','); - if (npos != std::string::npos) - { - float anchor = std::stof(trim(anchorString.substr(0, npos))); - vec_anchors.push_back(anchor); - anchorString.erase(0, npos + 1); - } - else - { - float anchor = std::stof(trim(anchorString)); - vec_anchors.push_back(anchor); - break; - } - } - } - else if ("Detect" == block.at("type")) - { - assert((block.find("from") != block.end()) - && "Missing 'from' param in network cfg"); - std::string from = block.at("from"); - std::vector vec_from{}; - while (!from.empty()) - { - auto npos = from.find_first_of(","); - if (std::string::npos != npos) - { - vec_from.push_back(std::stoi(trim(from.substr(0, npos)))); - from.erase(0, npos + 1); - } - else - { - vec_from.push_back(std::stoi(trim(from))); - break; - } - } - - for (uint32_t i = 0; i < vec_from.size(); ++i) - { - TensorInfo outputTensor; - outputTensor.anchors = vec_anchors; - outputTensor.masks = std::vector{3*i,3*i+1,3*i+2}; - outputTensor.numBBoxes = static_cast(outputTensor.masks.size()); - outputTensor.numClasses = _n_classes; - outputTensor.blobName = "yolo_" + std::to_string(i); - outputTensor.grid_h = (m_InputH / 32) * pow(2 ,2-i); - outputTensor.grid_w = (m_InputW / 32) * pow(2 ,2-i); - outputTensor.stride_h = m_InputH / outputTensor.grid_h; - outputTensor.stride_w = m_InputW / outputTensor.grid_w; - outputTensor.volume = outputTensor.grid_h * outputTensor.grid_w - *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); - - m_OutputTensors.push_back(outputTensor); - - if (m_ClassNames.empty()) - { - for (uint32_t j = 0; j < outputTensor.numClasses; ++j) - { - m_ClassNames.push_back(std::to_string(j)); - } - } - } - - } - } -} -void Yolo::allocateBuffers() -{ - m_DeviceBuffers.resize(m_Engine->getNbBindings(), nullptr); - assert(m_InputBindingIndex != -1 && "Invalid input binding index"); - NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(m_InputBindingIndex), - m_BatchSize * m_InputSize * sizeof(float))); - - for (auto& tensor : m_OutputTensors) - { - tensor.bindingIndex = m_Engine->getBindingIndex(tensor.blobName.c_str()); - assert((tensor.bindingIndex != -1) && "Invalid output binding index"); - NV_CUDA_CHECK(cudaMalloc(&m_DeviceBuffers.at(tensor.bindingIndex), - m_BatchSize * tensor.volume * sizeof(float))); - NV_CUDA_CHECK( - cudaMallocHost(&tensor.hostBuffer, tensor.volume * m_BatchSize * sizeof(float))); - } -} - -bool Yolo::verifyYoloEngine() -{ - assert((m_Engine->getNbBindings() == (1 + m_OutputTensors.size()) - && "Binding info doesn't match between cfg and engine file \n")); - - for (auto tensor : m_OutputTensors) - { - assert(!strcmp(m_Engine->getBindingName(tensor.bindingIndex), tensor.blobName.c_str()) - && "Blobs names dont match between cfg and engine file \n"); - assert(get3DTensorVolume(m_Engine->getBindingDimensions(tensor.bindingIndex)) - == tensor.volume - && "Tensor volumes dont match between cfg and engine file \n"); - } - - assert(m_Engine->bindingIsInput(m_InputBindingIndex) && "Incorrect input binding index \n"); - assert(m_Engine->getBindingName(m_InputBindingIndex) == m_InputBlobName - && "Input blob name doesn't match between config and engine file"); - assert(get3DTensorVolume(m_Engine->getBindingDimensions(m_InputBindingIndex)) == m_InputSize); - return true; -} - -void Yolo::destroyNetworkUtils(std::vector& trtWeights) -{ - if (m_Network) m_Network->destroy(); - if (m_Engine) m_Engine->destroy(); - if (m_Builder) m_Builder->destroy(); - if (m_ModelStream) m_ModelStream->destroy(); - - // deallocate the weights - for (uint32_t i = 0; i < trtWeights.size(); ++i) - { - if (trtWeights[i].count > 0) free(const_cast(trtWeights[i].values)); - } -} - -void Yolo::writePlanFileToDisk() -{ - std::cout << "Serializing the TensorRT Engine..." << std::endl; - assert(m_Engine && "Invalid TensorRT Engine"); - m_ModelStream = m_Engine->serialize(); - assert(m_ModelStream && "Unable to serialize engine"); - assert(!m_EnginePath.empty() && "Enginepath is empty"); - - // write data to output file - std::stringstream gieModelStream; - gieModelStream.seekg(0, gieModelStream.beg); - gieModelStream.write(static_cast(m_ModelStream->data()), m_ModelStream->size()); - std::ofstream outFile; - outFile.open(m_EnginePath, std::ios::binary | std::ios::out); - outFile << gieModelStream.rdbuf(); - outFile.close(); - - std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl; -} - diff --git a/src/Detector/tensorrt_yolo/yolo.h b/src/Detector/tensorrt_yolo/yolo.h deleted file mode 100644 index c77ff6f5e..000000000 --- a/src/Detector/tensorrt_yolo/yolo.h +++ /dev/null @@ -1,246 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef _YOLO_H_ -#define _YOLO_H_ - -#include "calibrator.h" -#include "plugin_factory.h" -#include "trt_utils.h" - -#include "NvInfer.h" -#include "NvInferPlugin.h" -#include "NvInferRuntimeCommon.h" -#include "cuda_runtime_api.h" -#include -#include -#include -#include "class_timer.hpp" -#include "opencv2/opencv.hpp" -#include "detect.h" -//#include "logging.h" - -/** - * Holds all the file paths required to build a network. - */ -struct NetworkInfo -{ - std::string networkType; - std::string configFilePath; - std::string wtsFilePath; - std::string labelsFilePath; - std::string precision; - std::string deviceType; - std::string calibrationTablePath; - std::string enginePath; - std::string inputBlobName; - std::string data_path; -}; - -/** - * Holds information about runtime inference params. - */ -struct InferParams -{ - bool printPerfInfo = false; - bool printPredictionInfo = false; - std::string calibImages; - std::string calibImagesPath; - float probThresh = 0.5f; - float nmsThresh = 0.5f; - uint32_t batchSize = 1; -}; - -/** - * Holds information about an output tensor of the yolo network. - */ -struct TensorInfo -{ - std::string blobName; - uint32_t stride{0}; - uint32_t stride_h{0}; - uint32_t stride_w{0}; - uint32_t gridSize{0}; - uint32_t grid_h{ 0 }; - uint32_t grid_w{ 0 }; - uint32_t numClasses{0}; - uint32_t numBBoxes{0}; - uint64_t volume{0}; - std::vector masks; - std::vector anchors; - int bindingIndex{-1}; - float* hostBuffer{nullptr}; -}; - -class Yolo -{ -public: - std::string getNetworkType() const { return m_NetworkType; } - float getNMSThresh() const { return m_NMSThresh; } - std::string getClassName(const int& label) const { return m_ClassNames.at(label); } - int getClassId(const int& label) const { return m_ClassIds.at(label); } - uint32_t getInputH() const { return m_InputH; } - uint32_t getInputW() const { return m_InputW; } - uint32_t getNumClasses() const { return static_cast(m_ClassNames.size()); } - bool isPrintPredictions() const { return m_PrintPredictions; } - bool isPrintPerfInfo() const { return m_PrintPerfInfo; } - void doInference(const unsigned char* input, const uint32_t batchSize); - std::vector decodeDetections(const int& imageIdx, - const int& imageH, - const int& imageW); - - virtual ~Yolo(); - -protected: - Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams); - std::string m_EnginePath; - const std::string m_NetworkType; - const std::string m_ConfigFilePath; - const std::string m_WtsFilePath; - const std::string m_LabelsFilePath; - const std::string m_Precision; - const std::string m_DeviceType; - const std::string m_CalibImages; - const std::string m_CalibImagesFilePath; - std::string m_CalibTableFilePath; - const std::string m_InputBlobName; - std::vector m_OutputTensors; - std::vector> m_configBlocks; - uint32_t m_InputH; - uint32_t m_InputW; - uint32_t m_InputC; - uint64_t m_InputSize; - uint32_t _n_classes = 0; - float _f_depth_multiple = 0; - float _f_width_multiple = 0; - const float m_ProbThresh; - const float m_NMSThresh; - std::vector m_ClassNames; - // Class ids for coco benchmarking - const std::vector m_ClassIds{ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, - 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90}; - const bool m_PrintPerfInfo; - const bool m_PrintPredictions; - // TRT specific members - //Logger glogger; - uint32_t m_BatchSize = 1; - nvinfer1::INetworkDefinition* m_Network; - nvinfer1::IBuilder* m_Builder ; - nvinfer1::IHostMemory* m_ModelStream; - nvinfer1::ICudaEngine* m_Engine; - nvinfer1::IExecutionContext* m_Context; - std::vector m_DeviceBuffers; - int m_InputBindingIndex; - cudaStream_t m_CudaStream; - PluginFactory* m_PluginFactory; - std::unique_ptr m_TinyMaxpoolPaddingFormula; - - virtual std::vector decodeTensor(const int imageIdx, const int imageH, - const int imageW, const TensorInfo& tensor) - = 0; - - inline void addBBoxProposal(const float bx, const float by, const float bw, const float bh, - const uint32_t stride, const float scalingFactor, - const float /*xOffset*/, const float /*yOffset*/, - const int maxIndex, const float maxProb, - const uint32_t image_w, const uint32_t image_h, - std::vector& binfo) - { - BBoxInfo bbi; - bbi.box = convertBBoxNetRes(bx, by, bw, bh, stride, m_InputW, m_InputH); - if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) - { - return; - } - convertBBoxImgRes(scalingFactor, m_InputW,m_InputH,image_w,image_h, bbi.box); - bbi.label = maxIndex; - bbi.prob = maxProb; - bbi.classId = getClassId(maxIndex); - binfo.push_back(bbi); - } - - BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh, - const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH) - { - BBox b; - // Restore coordinates to network input resolution - float x = bx * stride_w_; - float y = by * stride_h_; - - b.x1 = x - bw / 2; - b.x2 = x + bw / 2; - - b.y1 = y - bh / 2; - b.y2 = y + bh / 2; - - b.x1 = clamp(b.x1, 0.f, static_cast(netW)); - b.x2 = clamp(b.x2, 0.f, static_cast(netW)); - b.y1 = clamp(b.y1, 0.f, static_cast(netH)); - b.y2 = clamp(b.y2, 0.f, static_cast(netH)); - - return b; - } - inline void add_bbox_proposal(const float bx, const float by, const float bw, const float bh, - const uint32_t stride_h_, const uint32_t stride_w_, const int maxIndex, const float maxProb, - const uint32_t image_w, const uint32_t image_h, - std::vector& binfo) - { - BBoxInfo bbi; - bbi.box = convert_bbox_res(bx, by, bw, bh, stride_h_, stride_w_, m_InputW, m_InputH); - if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) - { - return; - } - convertBBoxImgRes(0, m_InputW, m_InputH, image_w, image_h, bbi.box); - bbi.label = maxIndex; - bbi.prob = maxProb; - bbi.classId = getClassId(maxIndex); - binfo.push_back(bbi); - }; -private: - Logger m_Logger; - void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, - Int8EntropyCalibrator* calibrator = nullptr); - void create_engine_yolov5(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, - Int8EntropyCalibrator* calibrator = nullptr); - std::vector> parseConfigFile(const std::string cfgFilePath); - void parseConfigBlocks(); - void parse_cfg_blocks_v5(const std::vector> &vec_block_); - void allocateBuffers(); - bool verifyYoloEngine(); - void destroyNetworkUtils(std::vector& trtWeights); - void writePlanFileToDisk(); - -private: - Timer _timer; - void load_weights_v5(const std::string s_weights_path_, std::map> &vec_wts_); - - int _n_yolo_ind = 0; -}; - -#endif // _YOLO_H_ diff --git a/src/Detector/tensorrt_yolo/yolo_config_parser.cpp b/src/Detector/tensorrt_yolo/yolo_config_parser.cpp deleted file mode 100644 index 4bdc152f8..000000000 --- a/src/Detector/tensorrt_yolo/yolo_config_parser.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "yolo_config_parser.h" - -#include -#include - -//DEFINE_string(network_type, "not-specified", -// "[REQUIRED] Type of network architecture. Choose from yolov2, yolov2-tiny, " -// "yolov3 and yolov3-tiny"); -//DEFINE_string(config_file_path, "not-specified", "[REQUIRED] Darknet cfg file"); -//DEFINE_string(wts_file_path, "not-specified", "[REQUIRED] Darknet weights file"); -//DEFINE_string(labels_file_path, "not-specified", "[REQUIRED] Object class labels file"); -//DEFINE_string(precision, "kFLOAT", -// "[OPTIONAL] Inference precision. Choose from kFLOAT, kHALF and kINT8."); -//DEFINE_string(deviceType, "kGPU", -// "[OPTIONAL] The device that this layer/network will execute on. Choose from kGPU and kDLA(only for kHALF)."); -//DEFINE_string(calibration_table_path, "not-specified", -// "[OPTIONAL] Path to pre-generated calibration table. If flag is not set, a new calib " -// "table --calibration.table will be generated"); -//DEFINE_string(engine_file_path, "not-specified", -// "[OPTIONAL] Path to pre-generated engine(PLAN) file. If flag is not set, a new " -// "engine --.engine will be generated"); -//DEFINE_string(input_blob_name, "data", -// "[OPTIONAL] Name of the input layer in the tensorRT engine file"); -//DEFINE_bool(print_perf_info, false, "[OPTIONAl] Print performance info on the console"); -//DEFINE_bool(print_prediction_info, false, "[OPTIONAL] Print detection info on the console"); -//DEFINE_string( -// test_images, "data/test_images.txt", -// "[REQUIRED] Text file containing absolute paths or filenames of all the images to be " -// "used for inference. If only filenames are provided, their corresponding source directory " -// "has to be provided through 'test_images_path' flag"); -//DEFINE_string(test_images_path, "not-specified", -// "[OPTIONAL] absolute source directory path of the list of images supplied through " -// "'test_images' flag"); -//DEFINE_string(calibration_images, "data/calibration_images.txt", -// "[OPTIONAL] Text file containing absolute paths or filenames of calibration images. " -// "Flag required if precision is kINT8 and there is not pre-generated calibration " -// "table. If only filenames are provided, their corresponding source directory has to " -// "be provided through 'calibration_images_path' flag"); -//DEFINE_string(calibration_images_path, "not-specified", -// "[OPTIONAL] absolute source directory path of the list of images supplied through " -// "'calibration_images' flag"); -//DEFINE_uint64(batch_size, 1, "[OPTIONAL] Batch size for the inference engine."); -//DEFINE_double(prob_thresh, 0.01, "[OPTIONAL] Probability threshold for detected objects"); -//DEFINE_double(nms_thresh, 0.5, "[OPTIONAL] IOU threshold for bounding box candidates"); -//DEFINE_bool(do_benchmark, false, -// "[OPTIONAL] Generate JSON file with detection info in coco benchmark format"); -//DEFINE_bool(save_detections, false, -// "[OPTIONAL] Flag to save images overlayed with objects detected."); -//DEFINE_bool(view_detections, false, -// "[OPTIONAL] Flag to view images overlayed with objects detected."); -//DEFINE_string(save_detections_path, "not-specified", -// "[OPTIONAL] Path where the images overlayed with bounding boxes are to be saved"); -//DEFINE_bool( -// decode, true, -// "[OPTIONAL] Decode the detections. This can be set to false if benchmarking network for " -// "throughput only"); -//DEFINE_uint64(seed, std::time(0), "[OPTIONAL] Seed for the random number generator"); -//DEFINE_bool(shuffle_test_set, false, -// "[OPTIONAL] Shuffle the test set images before running inference"); -// -static bool isFlagDefault(std::string flag) { return flag == "not-specified" ? true : false; } - -static bool networkTypeValidator(const char* flagName, std::string value) -{ - /*if (((FLAGS_network_type) == "yolov2") || ((FLAGS_network_type) == "yolov2-tiny") - || ((FLAGS_network_type) == "yolov3") || ((FLAGS_network_type) == "yolov3-tiny")) - return true; - - else - std::cout << "Invalid value for --" << flagName << ": " << value << std::endl; -*/ - return false; -} - -static bool precisionTypeValidator(const char* flagName, std::string value) -{ - /* if ((FLAGS_precision == "kFLOAT") || (FLAGS_precision == "kINT8") - || (FLAGS_precision == "kHALF")) - return true; - else - std::cout << "Invalid value for --" << flagName << ": " << value << std::endl;*/ - return false; -} - -static bool verifyRequiredFlags() -{ - /* assert(!isFlagDefault(FLAGS_network_type) - && "Type of network is required and is not specified."); - assert(!isFlagDefault(FLAGS_config_file_path) - && "Darknet cfg file path is required and not specified."); - assert(!isFlagDefault(FLAGS_wts_file_path) - && "Darknet weights file is required and not specified."); - assert(!isFlagDefault(FLAGS_labels_file_path) && "Lables file is required and not specified."); - assert((FLAGS_wts_file_path.find(".weights") != std::string::npos) - && "wts file not recognised. File needs to be of '.weights' format"); - assert((FLAGS_config_file_path.find(".cfg") != std::string::npos) - && "config file not recognised. File needs to be of '.cfg' format"); - if (!(networkTypeValidator("network_type", FLAGS_network_type) - && precisionTypeValidator("precision", FLAGS_precision))) - return false; - */ - return true; -} - -//void yoloConfigParserInit(int argc, char** argv) -//{ - /*gflags::ParseCommandLineFlags(&argc, &argv, false); - assert(verifyRequiredFlags()); - - FLAGS_calibration_images_path - = isFlagDefault(FLAGS_calibration_images_path) ? "" : FLAGS_calibration_images_path; - FLAGS_test_images_path = isFlagDefault(FLAGS_test_images_path) ? "" : FLAGS_test_images_path; - - if (isFlagDefault(FLAGS_engine_file_path)) - { - int npos = FLAGS_wts_file_path.find(".weights"); - assert(npos != std::string::npos - && "wts file file not recognised. File needs to be of '.weights' format"); - std::string dataPath = FLAGS_wts_file_path.substr(0, npos); - FLAGS_engine_file_path = dataPath + "-" + FLAGS_precision + "-" + FLAGS_deviceType + "-batch" - + std::to_string(FLAGS_batch_size) + ".engine"; - } - - if (isFlagDefault(FLAGS_calibration_table_path)) - { - int npos = FLAGS_wts_file_path.find(".weights"); - - assert(npos != std::string::npos - && "wts file file not recognised. File needs to be of '.weights' format"); - std::string dataPath = FLAGS_wts_file_path.substr(0, npos); - FLAGS_calibration_table_path = dataPath + "-calibration.table"; - }*/ -//} - -//NetworkInfo getYoloNetworkInfo() -//{ - /*return NetworkInfo{FLAGS_network_type, FLAGS_config_file_path, FLAGS_wts_file_path, - FLAGS_labels_file_path, FLAGS_precision, FLAGS_deviceType, - FLAGS_calibration_table_path, FLAGS_engine_file_path, FLAGS_input_blob_name};*/ -//} - -//InferParams getYoloInferParams() -//{ - - //return InferParams{FLAGS_print_perf_info, FLAGS_print_prediction_info, - // FLAGS_calibration_images, FLAGS_calibration_images_path, - // (float)FLAGS_prob_thresh, (float)FLAGS_nms_thresh}; -//} - -//uint64_t getSeed() { return FLAGS_seed; } -// -//std::string getNetworkType() { return FLAGS_network_type; } -// -//std::string getPrecision() { return FLAGS_precision; } -// -//std::string getTestImages() -//{ -// size_t extIndex = FLAGS_test_images.find_last_of(".txt"); -// assert(extIndex != std::string::npos -// && "test_images file not recognised. File needs to be of type '.txt' format"); -// return FLAGS_test_images; -//} -// -//std::string getTestImagesPath() { return FLAGS_test_images_path; } -// -//bool getDecode() { return FLAGS_decode; } -//bool getDoBenchmark() { return FLAGS_do_benchmark; } -//bool getViewDetections() { return FLAGS_view_detections; } -//bool getSaveDetections() -//{ -// if (FLAGS_save_detections) -// assert(!isFlagDefault(FLAGS_save_detections_path) -// && "save_detections path has to be set if save_detections is set to true"); -// return FLAGS_save_detections; -//} -// -//std::string getSaveDetectionsPath() { return FLAGS_save_detections_path; } -// -//uint32_t getBatchSize() { return FLAGS_batch_size; } -// -//bool getShuffleTestSet() { return FLAGS_shuffle_test_set; } diff --git a/src/Detector/tensorrt_yolo/yolo_config_parser.h b/src/Detector/tensorrt_yolo/yolo_config_parser.h deleted file mode 100644 index ed5086c6d..000000000 --- a/src/Detector/tensorrt_yolo/yolo_config_parser.h +++ /dev/null @@ -1,52 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef _YOLO_CONFIG_PARSER_ -#define _YOLO_CONFIG_PARSER_ - -#include "yolo.h" - -#include -//#include - -// Init to be called at the very beginning to verify all config params are valid -//void yoloConfigParserInit(int argc, char** argv); - -//NetworkInfo getYoloNetworkInfo(); -//InferParams getYoloInferParams(); -//uint64_t getSeed(); -//std::string getNetworkType(); -//std::string getPrecision(); -//std::string getTestImages(); -//std::string getTestImagesPath(); -//bool getDecode(); -//bool getDoBenchmark(); -//bool getViewDetections(); -//bool getSaveDetections(); -//std::string getSaveDetectionsPath(); -//uint32_t getBatchSize(); -//bool getShuffleTestSet(); - -#endif //_YOLO_CONFIG_PARSER_ diff --git a/src/Detector/tensorrt_yolo/yoloplugin_lib.cpp b/src/Detector/tensorrt_yolo/yoloplugin_lib.cpp deleted file mode 100644 index af3d46c37..000000000 --- a/src/Detector/tensorrt_yolo/yoloplugin_lib.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "yoloplugin_lib.h" -#include "yolo_config_parser.h" -#include "yolov2.h" -#include "yolov3.h" - -#include -#include - -static void decodeBatchDetections(const YoloPluginCtx* ctx, std::vector& outputs) -{ - for (uint32_t p = 0; p < ctx->batchSize; ++p) - { - YoloPluginOutput* out = new YoloPluginOutput; - std::vector binfo = ctx->inferenceNetwork->decodeDetections( - p, ctx->initParams.processingHeight, ctx->initParams.processingWidth); - std::vector remaining; - /*std::vector remaining = nmsAllClasses( - ctx->inferenceNetwork->getNMSThresh(), binfo, ctx->inferenceNetwork->getNumClasses(),);*/ - out->numObjects = remaining.size(); - assert(out->numObjects <= MAX_OBJECTS_PER_FRAME); - for (uint32_t j = 0; j < remaining.size(); ++j) - { - BBoxInfo b = remaining.at(j); - YoloPluginObject obj; - obj.left = static_cast(b.box.x1); - obj.top = static_cast(b.box.y1); - obj.width = static_cast(b.box.x2 - b.box.x1); - obj.height = static_cast(b.box.y2 - b.box.y1); - strcpy(obj.label, ctx->inferenceNetwork->getClassName(b.label).c_str()); - out->object[j] = obj; - - if (ctx->inferParams.printPredictionInfo) - { - printPredictions(b, ctx->inferenceNetwork->getClassName(b.label)); - } - } - outputs.at(p) = out; - } -} - -static void dsPreProcessBatchInput(const std::vector& cvmats, cv::Mat& batchBlob, - const int& processingHeight, const int& processingWidth, - const int& inputH, const int& inputW) -{ - - std::vector batch_images( - cvmats.size(), cv::Mat(cv::Size(processingWidth, processingHeight), CV_8UC3)); - for (uint32_t i = 0; i < cvmats.size(); ++i) - { - cv::Mat imageResize, imageBorder, inputImage; - inputImage = *cvmats.at(i); - int maxBorder = std::max(inputImage.size().width, inputImage.size().height); - - assert((maxBorder - inputImage.size().height) % 2 == 0); - assert((maxBorder - inputImage.size().width) % 2 == 0); - - int yOffset = (maxBorder - inputImage.size().height) / 2; - int xOffset = (maxBorder - inputImage.size().width) / 2; - - // Letterbox and resize to maintain aspect ratio - cv::copyMakeBorder(inputImage, imageBorder, yOffset, yOffset, xOffset, xOffset, - cv::BORDER_CONSTANT, cv::Scalar(127.5, 127.5, 127.5)); - cv::resize(imageBorder, imageResize, cv::Size(inputW, inputH), 0, 0, cv::INTER_CUBIC); - batch_images.at(i) = imageResize; - } - - batchBlob = cv::dnn::blobFromImages(batch_images, 1.0, cv::Size(inputW, inputH), - cv::Scalar(0.0, 0.0, 0.0), false); -} - -YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize) -{ - char** gArgV = new char*[2]; - gArgV[0] = new char[64]; - gArgV[1] = new char[512]; - strcpy(gArgV[0], "yolo_plugin_ctx"); - strcpy(gArgV[1], std::string("--flagfile=" + initParams->configFilePath).c_str()); -// yoloConfigParserInit(2, gArgV); - - YoloPluginCtx* ctx = new YoloPluginCtx; - ctx->initParams = *initParams; - ctx->batchSize = batchSize; - ctx->networkInfo;// = getYoloNetworkInfo(); - ctx->inferParams;// = getYoloInferParams(); - uint32_t configBatchSize = 0;// = getBatchSize(); - - // Check if config batchsize matches buffer batch size in the pipeline - if (ctx->batchSize != configBatchSize) - { - std::cerr - << "WARNING: Batchsize set in config file overriden by pipeline. New batchsize is " - << ctx->batchSize << std::endl; - auto npos = ctx->networkInfo.wtsFilePath.find(".weights"); - assert(npos != std::string::npos - && "wts file file not recognised. File needs to be of '.weights' format"); - std::string dataPath = ctx->networkInfo.wtsFilePath.substr(0, npos); - ctx->networkInfo.enginePath = dataPath + "-" + ctx->networkInfo.precision + "-batch" - + std::to_string(ctx->batchSize) + ".engine"; - } - - if ((ctx->networkInfo.networkType == "yolov2") - || (ctx->networkInfo.networkType == "yolov2-tiny")) - { - ctx->inferenceNetwork = new YoloV2(ctx->networkInfo, ctx->inferParams); - } - else if ((ctx->networkInfo.networkType == "yolov3") - || (ctx->networkInfo.networkType == "yolov3-tiny")) - { - ctx->inferenceNetwork = new YoloV3( ctx->networkInfo, ctx->inferParams); - } - else - { - std::cerr << "ERROR: Unrecognized network type " << ctx->networkInfo.networkType - << std::endl; - std::cerr << "Network Type has to be one among the following : yolov2, yolov2-tiny, yolov3 " - "and yolov3-tiny" - << std::endl; - return nullptr; - } - - delete[] gArgV; - return ctx; -} - -std::vector YoloPluginProcess(YoloPluginCtx* ctx, std::vector& cvmats) -{ - assert((cvmats.size() <= ctx->batchSize) && "Image batch size exceeds TRT engines batch size"); - std::vector outputs = std::vector(cvmats.size(), nullptr); - cv::Mat preprocessedImages; - std::chrono::duration preElapsed, inferElapsed, postElapsed; - std::chrono::time_point preStart, preEnd, inferStart, inferEnd, postStart, postEnd; - - if (cvmats.size() > 0) - { -// preStart = std::chrono::high_resolution_clock::now(); - dsPreProcessBatchInput(cvmats, preprocessedImages, ctx->initParams.processingWidth, - ctx->initParams.processingHeight, ctx->inferenceNetwork->getInputH(), - ctx->inferenceNetwork->getInputW()); -// preEnd = std::chrono::high_resolution_clock::now(); - -// inferStart = std::chrono::high_resolution_clock::now(); - ctx->inferenceNetwork->doInference(preprocessedImages.data, cvmats.size()); - // inferEnd = std::chrono::high_resolution_clock::now(); - -// postStart = std::chrono::high_resolution_clock::now(); - decodeBatchDetections(ctx, outputs); -// postEnd = std::chrono::high_resolution_clock::now(); - } - - // Perf calc - if (ctx->inferParams.printPerfInfo) - { - preElapsed - = ((preEnd - preStart) + (preEnd - preStart) / 1000000.0) - * 1000; - inferElapsed = ((inferEnd - inferStart) - + (inferEnd - inferStart) / 1000000.0) - * 1000; - postElapsed = ((postEnd - postStart) - + (postEnd - postStart) / 1000000.0) - * 1000; - - ctx->inferTime += inferElapsed.count(); - ctx->preTime += preElapsed.count(); - ctx->postTime += postElapsed.count(); - ctx->imageCount += cvmats.size(); - } - return outputs; -} - -void YoloPluginCtxDeinit(YoloPluginCtx* ctx) -{ - if (ctx->inferParams.printPerfInfo) - { - std::cout << "Yolo Plugin Perf Summary " << std::endl; - std::cout << "Batch Size : " << ctx->batchSize << std::endl; - std::cout << std::fixed << std::setprecision(4) - << "PreProcess : " << ctx->preTime / ctx->imageCount - << " ms Inference : " << ctx->inferTime / ctx->imageCount - << " ms PostProcess : " << ctx->postTime / ctx->imageCount << " ms Total : " - << (ctx->preTime + ctx->postTime + ctx->inferTime) / ctx->imageCount - << " ms per Image" << std::endl; - } - - delete ctx->inferenceNetwork; - delete ctx; -} diff --git a/src/Detector/tensorrt_yolo/yoloplugin_lib.h b/src/Detector/tensorrt_yolo/yoloplugin_lib.h deleted file mode 100644 index 32ef742bf..000000000 --- a/src/Detector/tensorrt_yolo/yoloplugin_lib.h +++ /dev/null @@ -1,97 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ -#ifndef __YOLOPLUGIN_LIB__ -#define __YOLOPLUGIN_LIB__ - -//#include - -#include "calibrator.h" -#include "trt_utils.h" -#include "yolo.h" - -//#ifdef __cplusplus -//extern "C" { -//#endif - -#define MAX_OBJECTS_PER_FRAME 32 -typedef struct YoloPluginCtx YoloPluginCtx; -typedef struct YoloPluginOutput YoloPluginOutput; -// Init parameters structure as input, required for instantiating yoloplugin_lib -typedef struct -{ - // Width at which frame/object will be scaled - int processingWidth; - // height at which frame/object will be scaled - int processingHeight; - // Flag to indicate whether operating on crops of full frame - int fullFrame; - // Plugin config file - std::string configFilePath; -} YoloPluginInitParams; - -struct YoloPluginCtx -{ - YoloPluginInitParams initParams; - NetworkInfo networkInfo; - InferParams inferParams; - Yolo* inferenceNetwork; - - // perf vars - double inferTime = 0.0, preTime = 0.0, postTime = 0.0; - uint32_t batchSize = 0; - uint64_t imageCount = 0; -}; - -// Detected/Labelled object structure, stores bounding box info along with label -typedef struct -{ - int left; - int top; - int width; - int height; - char label[64]; -} YoloPluginObject; - -// Output data returned after processing -struct YoloPluginOutput -{ - int numObjects; - YoloPluginObject object[MAX_OBJECTS_PER_FRAME]; -}; - -// Initialize library context -YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchSize); - -// Dequeue processed output -std::vector YoloPluginProcess(YoloPluginCtx* ctx, std::vector& cvmats); - -// Deinitialize library context -void YoloPluginCtxDeinit(YoloPluginCtx* ctx); - -//#ifdef __cplusplus -//} -//#endif - -#endif diff --git a/src/Detector/tensorrt_yolo/yolov2.cpp b/src/Detector/tensorrt_yolo/yolov2.cpp deleted file mode 100644 index 44dfa83a1..000000000 --- a/src/Detector/tensorrt_yolo/yolov2.cpp +++ /dev/null @@ -1,92 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#include "yolov2.h" -#include - -YoloV2::YoloV2(const NetworkInfo& networkInfo, - const InferParams& inferParams) : - Yolo(networkInfo, inferParams){} - -std::vector YoloV2::decodeTensor(const int imageIdx, const int imageH, const int imageW, - const TensorInfo& tensor) -{ - float scalingFactor - = std::min(static_cast(m_InputW) / imageW, static_cast(m_InputH) / imageH); - float xOffset = (m_InputW - scalingFactor * imageW) / 2; - float yOffset = (m_InputH - scalingFactor * imageH) / 2; - - float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; - - std::vector binfo; - for (uint32_t y = 0; y < tensor.gridSize; y++) - { - for (uint32_t x = 0; x < tensor.gridSize; x++) - { - for (uint32_t b = 0; b < tensor.numBBoxes; b++) - { - const float pw = tensor.anchors[2 * b]; - const float ph = tensor.anchors[2 * b + 1]; - const int numGridCells = tensor.gridSize * tensor.gridSize; - const int bbindex = y * tensor.gridSize + x; - const float bx - = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; - const float by - = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; - const float bw = pw - * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]); - const float bh = ph - * exp(detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]); - - const float objectness - = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint32_t i = 0; i < tensor.numClasses; i++) - { - float prob - = detections[bbindex - + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]; - - if (prob > maxProb) - { - maxProb = prob; - maxIndex = i; - } - } - - maxProb = objectness * maxProb; - - if (maxProb > m_ProbThresh) - { - addBBoxProposal(bx, by, bw, bh, tensor.stride, scalingFactor, xOffset, yOffset, - maxIndex, maxProb,imageW,imageH, binfo); - } - } - } - } - return binfo; -} diff --git a/src/Detector/tensorrt_yolo/yolov2.h b/src/Detector/tensorrt_yolo/yolov2.h deleted file mode 100644 index 42d6b17e3..000000000 --- a/src/Detector/tensorrt_yolo/yolov2.h +++ /dev/null @@ -1,45 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef _YOLO_V2_ -#define _YOLO_V2_ - -#include "yolo.h" - -#include -#include -#include - -class YoloV2 : public Yolo -{ -public: - YoloV2( const NetworkInfo& networkInfo, const InferParams& inferParams); - -private: - std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, - const TensorInfo& tensor) override; -}; - -#endif // _YOLO_V2_ diff --git a/src/Detector/tensorrt_yolo/yolov3.cpp b/src/Detector/tensorrt_yolo/yolov3.cpp deleted file mode 100644 index 1a1fb597f..000000000 --- a/src/Detector/tensorrt_yolo/yolov3.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ -#include "yolov3.h" - -YoloV3::YoloV3(const NetworkInfo& networkInfo, - const InferParams& inferParams) : - Yolo( networkInfo, inferParams){} - -std::vector YoloV3::decodeTensor(const int imageIdx, - const int imageH, - const int imageW, - const TensorInfo& tensor) -{ - /*float scalingFactor - = std::min(static_cast(m_InputW) / imageW, static_cast(m_InputH) / imageH); - float xOffset = (m_InputW - scalingFactor * imageW) / 2; - float yOffset = (m_InputH - scalingFactor * imageH) / 2;*/ - - const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; - - std::vector binfo; - for (uint32_t y = 0; y < tensor.grid_h; ++y) - { - for (uint32_t x = 0; x < tensor.grid_w; ++x) - { - for (uint32_t b = 0; b < tensor.numBBoxes; ++b) - { - const float pw = tensor.anchors[tensor.masks[b] * 2]; - const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; - - const int numGridCells = tensor.grid_h * tensor.grid_w; - const int bbindex = y * tensor.grid_w+ x; - const float bx - = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; - - const float by - = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; - const float bw - = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]; - const float bh - = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]; - - const float objectness - = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint32_t i = 0; i < tensor.numClasses; ++i) - { - float prob - = (detections[bbindex - + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); - - if (prob > maxProb) - { - maxProb = prob; - maxIndex = i; - } - } - maxProb = objectness * maxProb; - - if (maxProb > m_ProbThresh) - { - add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w,maxIndex, maxProb, imageW, imageH, binfo); - } - } - } - } - return binfo; -} diff --git a/src/Detector/tensorrt_yolo/yolov3.h b/src/Detector/tensorrt_yolo/yolov3.h deleted file mode 100644 index 28fc037e2..000000000 --- a/src/Detector/tensorrt_yolo/yolov3.h +++ /dev/null @@ -1,45 +0,0 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ - -#ifndef _YOLO_V3_ -#define _YOLO_V3_ - -#include "yolo.h" - -#include -#include -#include - -class YoloV3 : public Yolo -{ -public: - YoloV3(const NetworkInfo& networkInfo, const InferParams& inferParams); - -private: - std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, - const TensorInfo& tensor) override; -}; - -#endif // _YOLO_V3_ diff --git a/src/Detector/tensorrt_yolo/yolov4.cpp b/src/Detector/tensorrt_yolo/yolov4.cpp deleted file mode 100644 index dffabc732..000000000 --- a/src/Detector/tensorrt_yolo/yolov4.cpp +++ /dev/null @@ -1,66 +0,0 @@ - -#include "yolov4.h" - -YoloV4::YoloV4( const NetworkInfo &network_info_, - const InferParams &infer_params_) : - Yolo(network_info_, infer_params_) {} - -std::vector YoloV4::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) -{ - /*float scalingFactor - = std::min(static_cast(m_InputW) / imageW, static_cast(m_InputH) / imageH); - float xOffset = (m_InputW - scalingFactor * imageW) / 2; - float yOffset = (m_InputH - scalingFactor * imageH) / 2;*/ - - const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; - - std::vector binfo; - for (uint32_t y = 0; y < tensor.grid_h; ++y) - { - for (uint32_t x = 0; x < tensor.grid_w; ++x) - { - for (uint32_t b = 0; b < tensor.numBBoxes; ++b) - { - const float pw = tensor.anchors[tensor.masks[b] * 2]; - const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; - - const int numGridCells = tensor.grid_h * tensor.grid_w; - const int bbindex = y * tensor.grid_w + x; - const float bx - = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; - const float by - = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; - const float bw - = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]; - const float bh - = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]; - - const float objectness - = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint32_t i = 0; i < tensor.numClasses; ++i) - { - float prob - = (detections[bbindex - + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); - - if (prob > maxProb) - { - maxProb = prob; - maxIndex = i; - } - } - maxProb = objectness * maxProb; - - if (maxProb > m_ProbThresh) - { - add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w,maxIndex, maxProb, imageW, imageH, binfo); - } - } - } - } - return binfo; -} diff --git a/src/Detector/tensorrt_yolo/yolov4.h b/src/Detector/tensorrt_yolo/yolov4.h deleted file mode 100644 index d900c04ca..000000000 --- a/src/Detector/tensorrt_yolo/yolov4.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CLASS_YOLOV4_H_ -#define CLASS_YOLOV4_H_ -#include "yolo.h" -class YoloV4 :public Yolo -{ -public: - YoloV4( - const NetworkInfo &network_info_, - const InferParams &infer_params_); -private: - std::vector decodeTensor(const int imageIdx, - const int imageH, - const int imageW, - const TensorInfo& tensor) override; -}; - -#endif diff --git a/src/Detector/tensorrt_yolo/yolov5.cpp b/src/Detector/tensorrt_yolo/yolov5.cpp deleted file mode 100644 index 224a01750..000000000 --- a/src/Detector/tensorrt_yolo/yolov5.cpp +++ /dev/null @@ -1,67 +0,0 @@ - -#include "yolov5.h" - - -YoloV5::YoloV5( - const NetworkInfo &network_info_, - const InferParams &infer_params_) : - Yolo( network_info_, infer_params_) {} - - - - -std::vector YoloV5::decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) -{ - const float* detections = &tensor.hostBuffer[imageIdx * tensor.volume]; - - std::vector binfo; - for (uint32_t y = 0; y < tensor.grid_h; ++y) - { - for (uint32_t x = 0; x < tensor.grid_w; ++x) - { - for (uint32_t b = 0; b < tensor.numBBoxes; ++b) - { - const float pw = tensor.anchors[tensor.masks[b] * 2]; - const float ph = tensor.anchors[tensor.masks[b] * 2 + 1]; - - const int numGridCells = tensor.grid_h * tensor.grid_w; - const int bbindex = y * tensor.grid_w+ x; - const float bx - = x + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 0)]; - - const float by - = y + detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 1)]; - const float bw - = pw * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 2)]; - const float bh - = ph * detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 3)]; - - const float objectness - = detections[bbindex + numGridCells * (b * (5 + tensor.numClasses) + 4)]; - - float maxProb = 0.0f; - int maxIndex = -1; - - for (uint32_t i = 0; i < tensor.numClasses; ++i) - { - float prob - = (detections[bbindex - + numGridCells * (b * (5 + tensor.numClasses) + (5 + i))]); - - if (prob > maxProb) - { - maxProb = prob; - maxIndex = i; - } - } - maxProb = objectness * maxProb; - - if (maxProb > m_ProbThresh) - { - add_bbox_proposal(bx, by, bw, bh, tensor.stride_h, tensor.stride_w,maxIndex, maxProb, imageW, imageH, binfo); - } - } - } - } - return binfo; -} diff --git a/src/Detector/tensorrt_yolo/yolov5.h b/src/Detector/tensorrt_yolo/yolov5.h deleted file mode 100644 index db487de33..000000000 --- a/src/Detector/tensorrt_yolo/yolov5.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef CLASS_YOLOV5_H_ -#define CLASS_YOLOV5_H_ -#include "yolo.h" -class YoloV5 :public Yolo -{ -public: - YoloV5( - const NetworkInfo &network_info_, - const InferParams &infer_params_); - - BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh, - const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH) - { - BBox b; - // Restore coordinates to network input resolution - float x = bx * stride_w_; - float y = by * stride_h_; - - b.x1 = x - bw / 2; - b.x2 = x + bw / 2; - - b.y1 = y - bh / 2; - b.y2 = y + bh / 2; - - b.x1 = clamp(b.x1, 0, static_cast(netW)); - b.x2 = clamp(b.x2, 0, static_cast(netW)); - b.y1 = clamp(b.y1, 0, static_cast(netH)); - b.y2 = clamp(b.y2, 0, static_cast(netH)); - - return b; - } - - -private: - std::vector decodeTensor(const int imageIdx, - const int imageH, - const int imageW, - const TensorInfo& tensor) override; -}; - -#endif diff --git a/src/Detector/vibe_src/vibe.cpp b/src/Detector/vibe_src/vibe.cpp index 1c9fb6b24..39ab68bdc 100644 --- a/src/Detector/vibe_src/vibe.cpp +++ b/src/Detector/vibe_src/vibe.cpp @@ -21,7 +21,7 @@ namespace vibe } /// - cv::Vec VIBE::getRndNeighbor(int i, int j) + cv::Vec2i VIBE::getRndNeighbor(int i, int j) { int neighbor_count = (m_pixelNeighbor * 2 + 1) * (m_pixelNeighbor * 2 + 1); int rnd = m_rng[m_rngIdx = (m_rngIdx + 1) % RANDOM_BUFFER_SIZE] % neighbor_count; diff --git a/src/Detector/vibe_src/vibe.hpp b/src/Detector/vibe_src/vibe.hpp index 00b36ed5d..46b7b109f 100644 --- a/src/Detector/vibe_src/vibe.hpp +++ b/src/Detector/vibe_src/vibe.hpp @@ -37,7 +37,7 @@ class VIBE unsigned int m_rng[RANDOM_BUFFER_SIZE]; int m_rngIdx = 0; - cv::Vec getRndNeighbor(int i, int j); + cv::Vec2i getRndNeighbor(int i, int j); void init(const cv::Mat& img); }; } diff --git a/src/Tracker/BaseTracker.cpp b/src/Tracker/BaseTracker.cpp new file mode 100644 index 000000000..ec68b858f --- /dev/null +++ b/src/Tracker/BaseTracker.cpp @@ -0,0 +1,680 @@ +#include +#include "BaseTracker.h" +#include "byte_track/BYTETracker.h" +#include "ShortPathCalculator.h" +#include "EmbeddingsCalculator.hpp" +#include "track.h" + +/// +/// \brief The CTracker class +/// +class CTracker final : public BaseTracker +{ +public: + CTracker(const TrackerSettings& settings); + CTracker(const CTracker&) = delete; + CTracker(CTracker&&) = delete; + CTracker& operator=(const CTracker&) = delete; + CTracker& operator=(CTracker&&) = delete; + + ~CTracker(void) = default; + + void Update(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime) override; + + void GetTracks(std::vector& tracks) const override; + void GetRemovedTracks(std::vector& trackIDs) const override; + +private: + TrackerSettings m_settings; + + tracks_t m_tracks; + time_point_t m_lastFrameTime; + + track_id_t m_nextTrackID; + std::vector m_removedObjects; + + cv::UMat m_prevFrame; + + std::unique_ptr m_SPCalculator; + std::map> m_embCalculators; + + void CreateDistaceMatrix(const regions_t& regions, const std::vector& regionEmbeddings, distMatrix_t& costMatrix, track_t maxPossibleCost, track_t& maxCost); + void UpdateTrackingState(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime); + void CalcEmbeddins(std::vector& regionEmbeddings, const regions_t& regions, cv::UMat currFrame) const; + + track_t GetEllipseDist(const CTrack& trackRef, const CRegion& reg); +}; +// ---------------------------------------------------------------------- + +/// +/// \brief CTracker::CTracker +/// Manage tracks: create, remove, update. +/// \param settings +/// +CTracker::CTracker(const TrackerSettings& settings) + : m_settings(settings) +{ + m_SPCalculator.reset(); + SPSettings spSettings = { settings.m_distThres, 12 }; + switch (m_settings.m_matchType) + { + case tracking::MatchHungrian: + m_SPCalculator = std::make_unique(spSettings); + break; + case tracking::MatchLAPJV: + m_SPCalculator = std::make_unique(spSettings); + break; + } + assert(m_SPCalculator); + + for (const auto& embParam : settings.m_embeddings) + { + std::shared_ptr embCalc = std::make_shared(); + if (!embCalc->Initialize(embParam.m_embeddingCfgName, embParam.m_embeddingWeightsName, embParam.m_inputLayer)) + { + std::cerr << "EmbeddingsCalculator initialization error: " << embParam.m_embeddingCfgName << ", " << embParam.m_embeddingWeightsName << std::endl; + } + else + { + for (auto objType : embParam.m_objectTypes) + { + m_embCalculators.try_emplace((objtype_t)objType, embCalc); + } + } + } +} + +/// +/// \brief GetTracks +/// \return +/// +void CTracker::GetTracks(std::vector& tracks) const +{ + tracks.clear(); + + if (m_tracks.size() > tracks.capacity()) + tracks.reserve(m_tracks.size()); + for (const auto& track : m_tracks) + { + tracks.emplace_back(track->ConstructObject(m_lastFrameTime)); + } +} + +/// +/// \brief GetRemovedTracks +/// \return +/// +void CTracker::GetRemovedTracks(std::vector& trackIDs) const +{ + trackIDs.assign(std::begin(m_removedObjects), std::end(m_removedObjects)); +} + +/// +/// \brief CTracker::Update +/// \param regions +/// \param currFrame +/// \param fps +/// +void CTracker::Update(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime) +{ + m_lastFrameTime = frameTime; + m_removedObjects.clear(); + + UpdateTrackingState(regions, currFrame, frameTime); + + currFrame.copyTo(m_prevFrame); +} + +#define DRAW_DBG_ASSIGNMENT 0 + +/// +/// \brief CTracker::UpdateTrackingState +/// \param regions +/// \param currFrame +/// \param fps +/// +void CTracker::UpdateTrackingState(const regions_t& regions, + cv::UMat currFrame, + time_point_t frameTime) +{ + const size_t colsTracks = m_tracks.size(); // Tracking objects + const size_t rowsRegions = regions.size(); // Detections or regions + + assignments_t assignmentT2R(colsTracks, -1); // Assignments: index - track, value - region + + std::vector regionEmbeddings; + CalcEmbeddins(regionEmbeddings, regions, currFrame); + +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: m_tracks = " << colsTracks << ", regions = " << rowsRegions << std::endl; + + int fontType = cv::FONT_HERSHEY_TRIPLEX; + double fontSize = (currFrame.cols < 1000) ? 0.4 : 0.6; + cv::Scalar colorRegionEllow(100, 100, 100); + cv::Scalar colorMatchedAboveThreshRed(0, 0, 255); + cv::Scalar colorMatchedGreen(0, 255, 0); + cv::Scalar colorMatchedNearMargenta(255, 0, 255); + cv::Scalar colorNotMatchedNearWhite(255, 255, 255); + cv::Scalar colorUnknownBlue(255, 0, 0); + + auto DrawRRect = [](cv::Mat& img, const cv::RotatedRect& rr, const cv::Scalar& cl, int thikness) + { + cv::Point2f rectPoints[4]; + rr.points(rectPoints); + for (int i = 0; i < 4; ++i) + { + cv::line(img, rectPoints[i], rectPoints[(i + 1) % 4], cl, thikness, cv::LINE_4); + } + }; + + cv::Mat dbgAssignment = currFrame.getMat(cv::ACCESS_READ).clone(); + { +#if 0 + cv::Mat foreground(dbgAssignment.size(), CV_8UC1, cv::Scalar(0, 0, 100)); + for (const auto& track : m_tracks) + { + cv::ellipse(foreground, track->GetLastRect(), cv::Scalar(255, 255, 255), cv::FILLED); + } + + const int chans = dbgAssignment.channels(); + const int height = dbgAssignment.rows; +#pragma omp parallel for + for (int y = 0; y < height; ++y) + { + uchar* imgPtr = dbgAssignment.ptr(y); + const uchar* frgrndPtr = foreground.ptr(y); + for (int x = 0; x < dbgAssignment.cols; ++x) + { + for (int ci = chans - 1; ci < chans; ++ci) + { + imgPtr[ci] = cv::saturate_cast(imgPtr[ci] + frgrndPtr[0]); + } + imgPtr += chans; + ++frgrndPtr; + } + } +#endif + for (const auto& reg : regions) + { + DrawRRect(dbgAssignment, reg.m_rrect, colorRegionEllow, 2); + } + } +#endif + + if (!m_tracks.empty()) + { + // Distance matrix between all tracks to all regions +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: Distance matrix between all tracks to all regions" << std::endl; +#endif + distMatrix_t costMatrix(colsTracks * rowsRegions); + const track_t maxPossibleCost = std::max(static_cast(1.), static_cast(currFrame.cols * currFrame.rows)); + track_t maxCost = 0; + CreateDistaceMatrix(regions, regionEmbeddings, costMatrix, maxPossibleCost, maxCost); +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: maxPossibleCost = " << maxPossibleCost << ", maxCost = " << maxCost << std::endl; + std::cout << "costMatrix: " << cv::Mat_(rowsRegions, colsTracks, costMatrix.data()) << std::endl; +#endif + + // Solving assignment problem (shortest paths) +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: Solving assignment problem (shortest paths)" << std::endl; +#endif + m_SPCalculator->Solve(costMatrix, colsTracks, rowsRegions, assignmentT2R, maxCost); + +#if 0 + { + static size_t saveSolveNum = 0; + std::ofstream resCSV("mt_example" + std::to_string(saveSolveNum) + ".csv"); + for (size_t r = 0; r < rowsRegions; ++r) + { + for (size_t c = 0; c < colsTracks; ++c) + { + if (c == colsTracks - 1) + resCSV << std::fixed << std::setw(2) << std::setprecision(2) << costMatrix[c + r * colsTracks] << std::endl; + else + resCSV << std::fixed << std::setw(2) << std::setprecision(2) << costMatrix[c + r * colsTracks] << ", "; + } + } + std::ofstream resSol("mt_example" + std::to_string(saveSolveNum) + ".sol"); + resSol << maxCost << std::endl; + for (size_t r = 0; r < assignmentT2R.size(); ++r) + { + resSol << assignmentT2R[r] << std::endl; + } + ++saveSolveNum; + } +#endif + + // Clean assignment from pairs with large distance +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: Clean assignment from pairs with large distance" << std::endl; +#endif + for (size_t i = 0; i < assignmentT2R.size(); ++i) + { +#if DRAW_DBG_ASSIGNMENT + std::stringstream ss; + if (assignmentT2R[i] != -1) + { + ss << m_tracks[i]->GetID().ID2Str() << "-" << assignmentT2R[i] << ": " << std::fixed << std::setprecision(2) << costMatrix[i + assignmentT2R[i] * colsTracks]; + + if (costMatrix[i + assignmentT2R[i] * colsTracks] > m_settings.m_distThres) + { + ss << ">" << m_settings.m_distThres; + cv::line(dbgAssignment, m_tracks[i]->GetLastRect().center, regions[assignmentT2R[i]].m_rrect.center, colorMatchedAboveThreshRed, 2); + DrawRRect(dbgAssignment, m_tracks[i]->LastRegion().m_rrect, colorMatchedAboveThreshRed, 1); + } + else + { + ss << "<" << m_settings.m_distThres; + cv::line(dbgAssignment, m_tracks[i]->GetLastRect().center, regions[assignmentT2R[i]].m_rrect.center, colorMatchedGreen, 1); + DrawRRect(dbgAssignment, m_tracks[i]->LastRegion().m_rrect, colorMatchedGreen, 1); + } + + for (size_t ri = 0; ri < regions.size(); ++ri) + { + if (static_cast(ri) != assignmentT2R[i] && costMatrix[i + ri * colsTracks] < 1) + { + std::stringstream liness; + liness << std::fixed << std::setprecision(2) << costMatrix[i + ri * colsTracks]; + auto p1 = m_tracks[i]->GetLastRect().center; + auto p2 = regions[ri].m_rrect.center; + cv::line(dbgAssignment, p1, p2, colorMatchedNearMargenta, 1); + cv::putText(dbgAssignment, liness.str(), cv::Point((p1.x + p2.x) / 2, (p1.y + p2.y) / 2), fontType, fontSize, colorMatchedNearMargenta, 1, 8); + } + } + } + else + { + // If track have no assigned detect, then increment skipped frames counter. + DrawRRect(dbgAssignment, m_tracks[i]->LastRegion().m_rrect, colorNotMatchedNearWhite, 1); + for (size_t ri = 0; ri < regions.size(); ++ri) + { + if (costMatrix[i + ri * colsTracks] < 1) + { + std::stringstream liness; + liness << std::fixed << std::setprecision(2) << costMatrix[i + ri * colsTracks]; + auto p1 = m_tracks[i]->GetLastRect().center; + auto p2 = regions[ri].m_rrect.center; + cv::line(dbgAssignment, p1, p2, colorNotMatchedNearWhite, 1); + cv::putText(dbgAssignment, liness.str(), cv::Point((p1.x + p2.x) / 2, (p1.y + p2.y) / 2), fontType, fontSize, colorNotMatchedNearWhite, 1, 8); + } + } + } + if (ss.str().length() > 0) + { + auto brect = m_tracks[i]->LastRegion().m_brect; + cv::putText(dbgAssignment, ss.str(), cv::Point(brect.x, brect.y), fontType, fontSize, colorUnknownBlue, 1, 8); + } +#endif + + if (assignmentT2R[i] != -1 && costMatrix[i + assignmentT2R[i] * colsTracks] > m_settings.m_distThres) + assignmentT2R[i] = -1; + } + + // If track didn't get detects long time, remove it +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: If track did not get detects long time, remove it" << std::endl; +#endif + for (size_t i = 0; i < m_tracks.size();) + { + if (m_tracks[i]->GetLostPeriod(frameTime) > m_settings.m_maximumAllowedLostTime || + m_tracks[i]->IsOutOfTheFrame() || + m_tracks[i]->IsStaticTimeout(frameTime, m_settings.m_maxStaticTime - m_settings.m_minStaticTime)) + { + m_removedObjects.push_back(m_tracks[i]->GetID()); +#if DRAW_DBG_ASSIGNMENT + std::cout << "Remove: " << m_tracks[i]->GetID().ID2Str() << ": lost = " << m_tracks[i]->GetLostPeriod(frameTime) << ", maximumAllowedLostTime = " << m_settings.m_maximumAllowedLostTime << ", out of frame " << m_tracks[i]->IsOutOfTheFrame() << std::endl; +#endif + m_tracks.erase(m_tracks.begin() + i); + assignmentT2R.erase(assignmentT2R.begin() + i); + } + else + { + ++i; + } + } + } + + // Search for unassigned detects and start new tracks for them +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: Search for unassigned detects and start new tracks for them" << std::endl; +#endif + for (size_t i = 0; i < regions.size(); ++i) + { +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::update: regions[" << i << "].m_rrect: " << regions[i].m_rrect.center << ", " << regions[i].m_rrect.angle << ", " << regions[i].m_rrect.size << std::endl; +#endif + if (std::find(assignmentT2R.begin(), assignmentT2R.end(), i) == assignmentT2R.end()) + { + if (regionEmbeddings.empty()) + m_tracks.push_back(std::make_unique(regions[i], + m_settings.m_kalmanType, + m_settings.m_dt, + m_settings.m_accelNoiseMag, + m_settings.m_useAcceleration, + m_nextTrackID, + m_settings.m_filterGoal, + m_settings.m_lostTrackType, + frameTime)); + else + m_tracks.push_back(std::make_unique(regions[i], + regionEmbeddings[i], + m_settings.m_kalmanType, + m_settings.m_dt, + m_settings.m_accelNoiseMag, + m_settings.m_useAcceleration, + m_nextTrackID, + m_settings.m_filterGoal, + m_settings.m_lostTrackType, + frameTime)); + m_nextTrackID = m_nextTrackID.NextID(); + } + } + + // Update Kalman Filters state +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: Update Kalman Filters state" << std::endl; +#endif + + const ptrdiff_t stop_i = static_cast(assignmentT2R.size()); +#pragma omp parallel for + for (ptrdiff_t i = 0; i < stop_i; ++i) + { + // If track updated less than one time, than filter state is not correct. + if (assignmentT2R[i] != -1) // If we have assigned detect, then update using its coordinates, + { + m_tracks[i]->ResetLostTime(frameTime); +#if DRAW_DBG_ASSIGNMENT + std::cout << "Update track " << i << " for " << assignmentT2R[i] << " region, regionEmbeddings.size = " << regionEmbeddings.size() << std::endl; +#endif + if (regionEmbeddings.empty()) + m_tracks[i]->Update(regions[assignmentT2R[i]], + true, m_settings.m_maxTraceLength, + m_prevFrame, currFrame, + m_settings.m_useAbandonedDetection ? m_settings.m_minStaticTime : 0, m_settings.m_maxSpeedForStatic, + frameTime); + else + m_tracks[i]->Update(regions[assignmentT2R[i]], regionEmbeddings[assignmentT2R[i]], + true, m_settings.m_maxTraceLength, + m_prevFrame, currFrame, + m_settings.m_useAbandonedDetection ? m_settings.m_minStaticTime : 0, m_settings.m_maxSpeedForStatic, + frameTime); + } + else // if not continue using predictions + { + m_tracks[i]->Update(CRegion(), false, m_settings.m_maxTraceLength, m_prevFrame, currFrame, 0, m_settings.m_maxSpeedForStatic, frameTime); + } + } + +#if DRAW_DBG_ASSIGNMENT + std::cout << "CTracker::UpdateTrackingState: show results" << std::endl; +#ifndef SILENT_WORK + cv::namedWindow("dbgAssignment", cv::WINDOW_NORMAL); + cv::imshow("dbgAssignment", dbgAssignment); + //cv::waitKey(1); +#endif +#endif + +} + +/// +/// \brief CTracker::CreateDistaceMatrix +/// \param regions +/// \param costMatrix +/// \param maxPossibleCost +/// \param maxCost +/// +void CTracker::CreateDistaceMatrix(const regions_t& regions, + const std::vector& regionEmbeddings, + distMatrix_t& costMatrix, + track_t maxPossibleCost, + track_t& maxCost) +{ + const size_t colsTracks = m_tracks.size(); // Tracking objects + maxCost = 0; + + for (size_t i = 0; i < colsTracks; ++i) + { + const auto& track = m_tracks[i]; + + // call kalman prediction fist + if (track->GetFilterGoal() == tracking::FilterGoal::FilterRect || + track->GetFilterGoal() == tracking::FilterGoal::FilterRRect) + track->KalmanPredictRect(); + else + track->KalmanPredictPoint(); + + constexpr bool DIST_LOGS = false; + + // Calc distance between track and regions + for (size_t j = 0; j < regions.size(); ++j) + { + const auto& reg = regions[j]; + + auto dist = maxPossibleCost; + if (m_settings.CheckType(m_tracks[i]->LastRegion().m_type, reg.m_type)) + { + dist = 0; + size_t ind = 0; + // Euclidean distance between centers + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistCenters) + { +#if 1 + track_t ellipseDist = GetEllipseDist(*track, reg); + if (ellipseDist > 1) + dist += m_settings.m_distType[ind]; + else + dist += ellipseDist * m_settings.m_distType[ind]; +#else + dist += m_settings.m_distType[ind] * track->CalcDistCenter(reg); +#endif + if constexpr (DIST_LOGS) + { + std::cout << "DistCenters : " << m_settings.m_distType[ind] << ", dist = " << dist << "\n"; + //std::cout << "dist = " << dist << ", ed = " << ellipseDist << ", reg.m_rrect.center = " << reg.m_rrect.center << ", predictedArea: center = " << predictedArea.center << ", size = " << predictedArea.size << ", angle = " << predictedArea.angle << "\n"; + std::cout << "track id = " << m_tracks[i]->GetID().ID2Str() << " type = " << TypeConverter::Type2Str(m_tracks[i]->LastRegion().m_type) << " (" << m_tracks[i]->LastRegion().m_type << "), region id = " << j << ", type = " << TypeConverter::Type2Str(reg.m_type) << " (" << reg.m_type << ")" << std::endl; + } + } + ++ind; + + // Euclidean distance between bounding rectangles + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistRects) + { +#if 1 + track_t ellipseDist = GetEllipseDist(*track, reg); + if (ellipseDist < 1) + { + track_t dw = track->WidthDist(reg); + track_t dh = track->HeightDist(reg); + dist += m_settings.m_distType[ind] * (1 - (1 - ellipseDist) * (dw + dh) * 0.5f); + } + else + { + dist += m_settings.m_distType[ind]; + } + + if constexpr (DIST_LOGS) + { + std::cout << "DistRects : " << m_settings.m_distType[ind] << ", dist = " << dist << "\n"; + track_t dw = track->WidthDist(reg); + track_t dh = track->HeightDist(reg); + std::cout << "dist = " << dist << ", ed = " << ellipseDist << ", dw = " << dw << ", dh = " << dh << "\n"; + std::cout << "track type = " << TypeConverter::Type2Str(m_tracks[i]->LastRegion().m_type) << " (" << m_tracks[i]->LastRegion().m_type << "), region type = " << TypeConverter::Type2Str(reg.m_type) << " (" << reg.m_type << ")\n"; + std::cout << "track = " << m_tracks[i]->LastRegion().m_brect << ", reg = " << reg.m_brect << ", rrect = [" << reg.m_rrect.size << " from " << reg.m_rrect.center << ", " << reg.m_rrect.angle << "]" << std::endl; + } +#else + dist += m_settings.m_distType[ind] * track->CalcDistRect(reg); +#endif + } + ++ind; + + // Intersection over Union, IoU + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistJaccard) + { + dist += m_settings.m_distType[ind] * track->CalcDistJaccard(reg); + if constexpr (DIST_LOGS) + std::cout << "DistJaccard : " << m_settings.m_distType[ind] << ", dist = " << dist << std::endl; + } + ++ind; + + // Bhatacharia distance between histograms + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistHist) + { + dist += m_settings.m_distType[ind] * track->CalcDistHist(regionEmbeddings[j]); + if constexpr (DIST_LOGS) + std::cout << "DistHist : " << m_settings.m_distType[ind] << ", dist = " << dist << std::endl; + } + ++ind; + + // Cosine distance between embeddings + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistFeatureCos) + { + if (reg.m_type == track->LastRegion().m_type) + { + auto resCos = track->CalcCosine(regionEmbeddings[j]); + if (resCos.second) + { + dist += m_settings.m_distType[ind] * resCos.first; + //std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = " << resCos.first << ", dist = " << dist << std::endl; + } + else + { + dist /= m_settings.m_distType[ind]; + //std::cout << "CalcCosine: " << TypeConverter::Type2Str(track->LastRegion().m_type) << ", reg = " << reg.m_brect << ", track = " << track->LastRegion().m_brect << ": res = 1, weight = " << m_settings.m_distType[ind] << ", dist = " << dist << std::endl; + } + } + if constexpr (DIST_LOGS) + std::cout << "DistFeatureCos : " << m_settings.m_distType[ind] << ", dist = " << dist << std::endl; + } + ++ind; + + // Mahalanobis + if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistMahalanobis) + dist += m_settings.m_distType[ind] * track->CalcMahalanobisDist(reg.m_rrect); + ++ind; + + assert(ind == tracking::DistsCount); + } + + costMatrix[i + j * colsTracks] = dist; + if constexpr (DIST_LOGS) + std::cout << "costMatrix[" << j << "][" << i << "] (or " << (i + j * colsTracks) << ") = " << dist << std::endl; + + if (dist < 0 || dist > maxPossibleCost) + { + assert(0); + exit(-1); + } + + if (dist > maxCost) + maxCost = dist; + } + } +} + +/// +/// \brief CTracker::CalcEmbeddins +/// \param regionEmbeddings +/// \param regions +/// \param currFrame +/// +void CTracker::CalcEmbeddins(std::vector& regionEmbeddings, const regions_t& regions, cv::UMat currFrame) const +{ + if (!regions.empty()) + { + regionEmbeddings.resize(regions.size()); + // Bhatacharia distance between histograms + if (m_settings.m_distType[tracking::DistHist] > 0.0f) + { + for (size_t j = 0; j < regions.size(); ++j) + { + int bins = 64; + std::vector histSize; + std::vector ranges; + std::vector channels; + + for (int i = 0, stop = currFrame.channels(); i < stop; ++i) + { + histSize.push_back(bins); + ranges.push_back(0); + ranges.push_back(255); + channels.push_back(i); + } + + std::vector regROI = { currFrame(regions[j].m_brect) }; + cv::calcHist(regROI, channels, cv::Mat(), regionEmbeddings[j].m_hist, histSize, ranges, false); + cv::normalize(regionEmbeddings[j].m_hist, regionEmbeddings[j].m_hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat()); + } + } + + // Cosine distance between embeddings + if (m_settings.m_distType[tracking::DistFeatureCos] > 0.0f) + { + for (size_t j = 0; j < regions.size(); ++j) + { + if (regionEmbeddings[j].m_embedding.empty()) + { + // std::cout << "Search embCalc for " << TypeConverter::Type2Str(regions[j].m_type) << ": "; + auto embCalc = m_embCalculators.find(regions[j].m_type); + if (embCalc != std::end(m_embCalculators)) + { + embCalc->second->Calc(currFrame, regions[j].m_brect, regionEmbeddings[j].m_embedding); + + //std::cout << "Founded! m_embedding = " << regionEmbeddings[j].m_embedding.size() << std::endl; + } + else + { + //std::cout << "Not found" << std::endl; + } + } + } + } + } +} + +/// +/// \brief CTracker::GetEllipseDist +/// \param trackRef +/// \param reg +/// \return +/// +track_t CTracker::GetEllipseDist(const CTrack& trackRef, const CRegion& reg) +{ + cv::Size_ minRadius; + + if (m_settings.m_minAreaRadiusPix <= 0) + { + minRadius.width = m_settings.m_minAreaRadiusK * trackRef.LastRegion().m_rrect.size.width; + minRadius.height = m_settings.m_minAreaRadiusK * trackRef.LastRegion().m_rrect.size.height; + } + else + { + minRadius.width = m_settings.m_minAreaRadiusPix; + minRadius.height = m_settings.m_minAreaRadiusPix; + } + + // Calc predicted area for track + cv::RotatedRect predictedArea = trackRef.CalcPredictionEllipse(minRadius); + + return trackRef.IsInsideArea(reg.m_rrect.center, predictedArea); +} + +/// +/// BaseTracker::CreateTracker +/// +std::unique_ptr BaseTracker::CreateTracker(const TrackerSettings& settings, float fps) +{ + switch (settings.m_tracker) + { + case tracking::UniversalTracker: + return std::make_unique(settings); + + case tracking::ByteTrack: + return std::make_unique((fps > 1.f) ? cvRound(fps) : 30, settings.m_byteTrackSettings.m_trackBuffer, + settings.m_byteTrackSettings.m_trackThresh, settings.m_byteTrackSettings.m_highThresh, settings.m_byteTrackSettings.m_matchThresh); + + default: + return std::make_unique(settings); + } +} diff --git a/src/Tracker/BaseTracker.h b/src/Tracker/BaseTracker.h new file mode 100644 index 000000000..7a43ed66d --- /dev/null +++ b/src/Tracker/BaseTracker.h @@ -0,0 +1,41 @@ +#pragma once + +#include +#include + +#include "defines.h" +#include "trajectory.h" +#include "TrackerSettings.h" + +/// +/// \brief The CTracker class +/// +class BaseTracker +{ +public: + BaseTracker() = default; + BaseTracker(const BaseTracker&) = delete; + BaseTracker(BaseTracker&&) = delete; + BaseTracker& operator=(const BaseTracker&) = delete; + BaseTracker& operator=(BaseTracker&&) = delete; + + virtual ~BaseTracker(void) = default; + + virtual void Update(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime) = 0; + virtual void UpdateMat(const regions_t& regions, cv::Mat currFrame, time_point_t frameTime) + { + cv::UMat frame = currFrame.getUMat(cv::ACCESS_READ); + Update(regions, frame, frameTime); + } + + virtual void GetTracks(std::vector& tracks) const = 0; + virtual std::vector GetTracksCopy() const + { + std::vector tracks; + GetTracks(tracks); + return tracks; + } + virtual void GetRemovedTracks(std::vector& trackIDs) const = 0; + + static std::unique_ptr CreateTracker(const TrackerSettings& settings, float fps); +}; diff --git a/src/Tracker/CMakeLists.txt b/src/Tracker/CMakeLists.txt index 9d05000d4..fbb355a8b 100644 --- a/src/Tracker/CMakeLists.txt +++ b/src/Tracker/CMakeLists.txt @@ -2,148 +2,42 @@ cmake_minimum_required(VERSION 3.9) project(mtracking) -set(main_sources ../common/nms.h ../common/defines.h ../common/object_types.h ../common/object_types.cpp) - - set(tracker_sources - Ctracker.cpp - Ctracker.h +set(main_sources + ../mtracking/nms.h + ../mtracking/defines.h + ../mtracking/object_types.h + ../mtracking/object_types.cpp) + +set(tracker_sources + BaseTracker.cpp + track.cpp + Kalman.cpp + TrackerSettings.cpp ShortPathCalculator.cpp + HungarianAlg/HungarianAlg.cpp + LAPJV_algorithm/lap.cpp + byte_track/BYTETracker.cpp + byte_track/KalmanFilter.cpp + byte_track/lapjv.cpp + byte_track/STrack.cpp) + +set(tracker_headers + BaseTracker.h ShortPathCalculator.h - track.cpp track.h - Kalman.cpp + trajectory.h Kalman.h - - HungarianAlg/HungarianAlg.cpp + TrackerSettings.h HungarianAlg/HungarianAlg.h - - VOTTracker.hpp + LAPJV_algorithm/lap.h EmbeddingsCalculator.hpp - dat/dat_tracker.cpp - dat/dat_tracker.hpp -) - -if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm|ARM|aarch64|AARCH64") + byte_track/BYTETracker.h + byte_track/KalmanFilter.h + byte_track/lapjv.h + byte_track/STrack.h) -else() - set(tracker_sources ${tracker_sources} - staple/fhog.cpp - staple/fhog.h - staple/sse.hpp - staple/staple_tracker.cpp - staple/staple_tracker.hpp - - ldes/correlation.cpp - ldes/correlation.h - ldes/fft_functions.cpp - ldes/fft_functions.h - ldes/fhog.cpp - ldes/fhog.hpp - ldes/hann.cpp - ldes/hann.h - ldes/ldes_tracker.cpp - ldes/ldes_tracker.h - ) - add_definitions(-DUSE_STAPLE_TRACKER) -endif() - - set(graph_source - graph/tokenise.cpp -# graph/script.cpp -# graph/rings.cpp - graph/mytree.cpp - graph/mygraph.cpp - graph/mwbmatching.cpp - graph/mincut.cpp - graph/gport.cpp -# graph/gml2nestedsql.cpp -# graph/gml2dot.cpp -# graph/components.cpp - graph/fheap.c - ) - - set(graph_header - graph/tokenise.h - graph/mytree.h - graph/mygraph.h - graph/mwbmatching.h - graph/mincut.h - graph/gport.h - graph/gdefs.h - graph/fheap.h - ) - - set(gtl_source - graph/GTL/src/bellman_ford.cpp - graph/GTL/src/bfs.cpp - graph/GTL/src/biconnectivity.cpp - graph/GTL/src/bid_dijkstra.cpp - graph/GTL/src/components.cpp - graph/GTL/src/debug.cpp - graph/GTL/src/dfs.cpp - graph/GTL/src/dijkstra.cpp - graph/GTL/src/edge.cpp - graph/GTL/src/embedding.cpp - graph/GTL/src/fm_partition.cpp - graph/GTL/src/gml_parser.cpp - graph/GTL/src/gml_scanner.cpp - graph/GTL/src/graph.cpp - graph/GTL/src/maxflow_ff.cpp - graph/GTL/src/maxflow_pp.cpp - graph/GTL/src/maxflow_sap.cpp - graph/GTL/src/min_tree.cpp - graph/GTL/src/node.cpp - graph/GTL/src/planarity.cpp - graph/GTL/src/pq_node.cpp - graph/GTL/src/pq_tree.cpp - graph/GTL/src/ratio_cut_partition.cpp - graph/GTL/src/st_number.cpp - graph/GTL/src/topsort.cpp - ) - - set(gtl_header - graph/GTL/include/GTL/algorithm.h - graph/GTL/include/GTL/bellman_ford.h - graph/GTL/include/GTL/bfs.h - graph/GTL/include/GTL/biconnectivity.h - graph/GTL/include/GTL/bid_dijkstra.h - graph/GTL/include/GTL/bin_heap.h - graph/GTL/include/GTL/components.h - graph/GTL/include/GTL/debug.h - graph/GTL/include/GTL/dfs.h - graph/GTL/include/GTL/dijkstra.h - graph/GTL/include/GTL/edge.h - graph/GTL/include/GTL/edge_data.h - graph/GTL/include/GTL/edge_map.h - graph/GTL/include/GTL/embedding.h - graph/GTL/include/GTL/fm_partition.h - graph/GTL/include/GTL/gml_parser.h - graph/GTL/include/GTL/gml_scanner.h - graph/GTL/include/GTL/graph.h - graph/GTL/include/GTL/maxflow_ff.h - graph/GTL/include/GTL/maxflow_pp.h - graph/GTL/include/GTL/maxflow_sap.h - graph/GTL/include/GTL/min_tree.h - graph/GTL/include/GTL/ne_map.h - graph/GTL/include/GTL/node.h - graph/GTL/include/GTL/node_data.h - graph/GTL/include/GTL/node_map.h - graph/GTL/include/GTL/planarity.h - graph/GTL/include/GTL/pq_node.h - graph/GTL/include/GTL/pq_tree.h - graph/GTL/include/GTL/ratio_cut_partition.h - graph/GTL/include/GTL/st_number.h - graph/GTL/include/GTL/symlist.h - graph/GTL/include/GTL/topsort.h - graph/GTL/include/GTL/version.h - graph/GTL/include/GTL/GTL.h - ) - - SOURCE_GROUP("Src" FILES ${main_sources}) - SOURCE_GROUP("Tracker" FILES ${tracker_sources}) - - SOURCE_GROUP("Tracker/graph" FILES ${graph_source} ${graph_header}) - SOURCE_GROUP("Tracker/GTL" FILES ${gtl_source} ${gtl_header}) +SOURCE_GROUP("Src" FILES ${main_sources}) +SOURCE_GROUP("Tracker" FILES ${tracker_sources} ${tracker_headers}) include(CheckIncludeFileCXX) check_include_file_cxx(opencv2/bgsegm.hpp HAVE_OPENCV_CONTRIB) @@ -172,37 +66,43 @@ else() remove_definitions(-DUSE_OCV_KCF) endif(USE_OCV_KCF) +option(USE_OCV_EMBEDDINGS "Should use the embeddings from opencv_dnn + OpenVINO?" ON) +if(USE_OCV_EMBEDDINGS) + add_definitions(-DUSE_OCV_EMBEDDINGS) +else() + remove_definitions(-DUSE_OCV_EMBEDDINGS) +endif(USE_OCV_EMBEDDINGS) + + include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_SOURCE_DIR}/../src) -include_directories(${PROJECT_SOURCE_DIR}/../common) -include_directories(${PROJECT_SOURCE_DIR}/graph) -include_directories(${PROJECT_SOURCE_DIR}/graph/GTL/include) - +include_directories(${PROJECT_SOURCE_DIR}/../mtracking) +include_directories(${PROJECT_SOURCE_DIR}/../../thirdparty) if (CMAKE_COMPILER_IS_GNUCXX) - add_library(${PROJECT_NAME} SHARED + add_library(${PROJECT_NAME} SHARED ${main_sources} - ${tracker_sources} - ${graph_source} ${graph_header} - ${gtl_source} ${gtl_header}) + ${tracker_sources}) + set(LIBS + ${OpenCV_LIBS} + inih) else(CMAKE_COMPILER_IS_GNUCXX) add_library(${PROJECT_NAME} ${main_sources} - ${tracker_sources} - ${graph_source} ${graph_header} - ${gtl_source} ${gtl_header}) + ${tracker_sources}) + set(LIBS + # ${OpenCV_LIBS} + inih) + target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS}) endif() -if (CMAKE_COMPILER_IS_GNUCXX) -set(LIBS - ${OpenCV_LIBS} -# iconv -) -else(CMAKE_COMPILER_IS_GNUCXX) -set(LIBS - ${OpenCV_LIBS} -) -endif() +target_link_libraries(${PROJECT_NAME} PRIVATE ${LIBS}) -target_link_libraries(${PROJECT_NAME} ${LIBS}) +set_target_properties(${PROJECT_NAME} PROPERTIES PUBLIC_HEADER "${tracker_headers};../mtracking/defines.h;../mtracking/object_types.h") +install(TARGETS ${PROJECT_NAME} + EXPORT MTTrackingExports + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/src/Tracker/Ctracker.cpp b/src/Tracker/Ctracker.cpp deleted file mode 100644 index 5c11f5ade..000000000 --- a/src/Tracker/Ctracker.cpp +++ /dev/null @@ -1,305 +0,0 @@ -#include "Ctracker.h" - -/// -/// \brief CTracker::CTracker -/// Tracker. Manage tracks. Create, remove, update. -/// \param settings -/// -CTracker::CTracker(const TrackerSettings& settings) - : - m_settings(settings), - m_nextTrackID(0) -{ - ShortPathCalculator* spcalc = nullptr; - SPSettings spSettings = { settings.m_distThres, 12 }; - switch (m_settings.m_matchType) - { - case tracking::MatchHungrian: - spcalc = new SPHungrian(spSettings); - break; - case tracking::MatchBipart: - spcalc = new SPBipart(spSettings); - break; - } - assert(spcalc != nullptr); - m_SPCalculator = std::unique_ptr(spcalc); - - for (const auto& embParam : settings.m_embeddings) - { - std::shared_ptr embCalc = std::make_shared(); - if (!embCalc->Initialize(embParam.m_embeddingCfgName, embParam.m_embeddingWeightsName, embParam.m_inputLayer)) - { - std::cerr << "EmbeddingsCalculator initialization error: " << embParam.m_embeddingCfgName << ", " << embParam.m_embeddingWeightsName << std::endl; - } - else - { - for (auto objType : embParam.m_objectTypes) - { - m_embCalculators.try_emplace((objtype_t)objType, embCalc); - } - } - } -} - -/// -/// \brief CTracker::~CTracker -/// -CTracker::~CTracker(void) -{ -} - -/// -/// \brief CTracker::Update -/// \param regions -/// \param currFrame -/// \param fps -/// -void CTracker::Update(const regions_t& regions, - cv::UMat currFrame, - float fps) -{ - UpdateTrackingState(regions, currFrame, fps); - - currFrame.copyTo(m_prevFrame); -} - -/// -/// \brief CTracker::UpdateTrackingState -/// \param regions -/// \param currFrame -/// \param fps -/// -void CTracker::UpdateTrackingState(const regions_t& regions, - cv::UMat currFrame, - float fps) -{ - const size_t N = m_tracks.size(); // Tracking objects - const size_t M = regions.size(); // Detections or regions - - assignments_t assignment(N, -1); // Assignments regions -> tracks - - std::vector regionEmbeddings; - - if (!m_tracks.empty()) - { - // Distance matrix between all tracks to all regions - distMatrix_t costMatrix(N * M); - const track_t maxPossibleCost = static_cast(currFrame.cols * currFrame.rows); - track_t maxCost = 0; - CreateDistaceMatrix(regions, regionEmbeddings, costMatrix, maxPossibleCost, maxCost, currFrame); - - // Solving assignment problem (shortest paths) - m_SPCalculator->Solve(costMatrix, N, M, assignment, maxCost); - - // clean assignment from pairs with large distance - for (size_t i = 0; i < assignment.size(); i++) - { - if (assignment[i] != -1) - { - if (costMatrix[i + assignment[i] * N] > m_settings.m_distThres) - { - assignment[i] = -1; - m_tracks[i]->SkippedFrames()++; - } - } - else - { - // If track have no assigned detect, then increment skipped frames counter. - m_tracks[i]->SkippedFrames()++; - } - } - - // If track didn't get detects long time, remove it. - for (size_t i = 0; i < m_tracks.size();) - { - if (m_tracks[i]->SkippedFrames() > m_settings.m_maximumAllowedSkippedFrames || - m_tracks[i]->IsOutOfTheFrame() || - m_tracks[i]->IsStaticTimeout(cvRound(fps * (m_settings.m_maxStaticTime - m_settings.m_minStaticTime)))) - { - m_tracks.erase(m_tracks.begin() + i); - assignment.erase(assignment.begin() + i); - } - else - { - ++i; - } - } - } - - // Search for unassigned detects and start new tracks for them. - for (size_t i = 0; i < regions.size(); ++i) - { - if (find(assignment.begin(), assignment.end(), i) == assignment.end()) - { - if (regionEmbeddings.empty()) - m_tracks.push_back(std::make_unique(regions[i], - m_settings.m_kalmanType, - m_settings.m_dt, - m_settings.m_accelNoiseMag, - m_settings.m_useAcceleration, - m_nextTrackID++, - m_settings.m_filterGoal == tracking::FilterRect, - m_settings.m_lostTrackType)); - else - m_tracks.push_back(std::make_unique(regions[i], - regionEmbeddings[i], - m_settings.m_kalmanType, - m_settings.m_dt, - m_settings.m_accelNoiseMag, - m_settings.m_useAcceleration, - m_nextTrackID++, - m_settings.m_filterGoal == tracking::FilterRect, - m_settings.m_lostTrackType)); - } - } - - // Update Kalman Filters state - const ptrdiff_t stop_i = static_cast(assignment.size()); -#pragma omp parallel for - for (ptrdiff_t i = 0; i < stop_i; ++i) - { - // If track updated less than one time, than filter state is not correct. - if (assignment[i] != -1) // If we have assigned detect, then update using its coordinates, - { - m_tracks[i]->SkippedFrames() = 0; - if (regionEmbeddings.empty()) - m_tracks[i]->Update(regions[assignment[i]], - true, m_settings.m_maxTraceLength, - m_prevFrame, currFrame, - m_settings.m_useAbandonedDetection ? cvRound(m_settings.m_minStaticTime * fps) : 0, m_settings.m_maxSpeedForStatic); - else - m_tracks[i]->Update(regions[assignment[i]], regionEmbeddings[assignment[i]], - true, m_settings.m_maxTraceLength, - m_prevFrame, currFrame, - m_settings.m_useAbandonedDetection ? cvRound(m_settings.m_minStaticTime * fps) : 0, m_settings.m_maxSpeedForStatic); - } - else // if not continue using predictions - { - m_tracks[i]->Update(CRegion(), false, m_settings.m_maxTraceLength, m_prevFrame, currFrame, 0, m_settings.m_maxSpeedForStatic); - } - } -} - -/// -/// \brief CTracker::CreateDistaceMatrix -/// \param regions -/// \param costMatrix -/// \param maxPossibleCost -/// \param maxCost -/// -void CTracker::CreateDistaceMatrix(const regions_t& regions, - std::vector& regionEmbeddings, - distMatrix_t& costMatrix, - track_t maxPossibleCost, - track_t& maxCost, - cv::UMat currFrame) -{ - const size_t N = m_tracks.size(); // Tracking objects - maxCost = 0; - - for (size_t i = 0; i < N; ++i) - { - const auto& track = m_tracks[i]; - - // Calc predicted area for track - cv::Size_ minRadius; - if (m_settings.m_minAreaRadiusPix < 0) - { - minRadius.width = m_settings.m_minAreaRadiusK * track->LastRegion().m_rrect.size.width; - minRadius.height = m_settings.m_minAreaRadiusK * track->LastRegion().m_rrect.size.height; - } - else - { - minRadius.width = m_settings.m_minAreaRadiusPix; - minRadius.height = m_settings.m_minAreaRadiusPix; - } - cv::RotatedRect predictedArea = track->CalcPredictionEllipse(minRadius); - - // Calc distance between track and regions - for (size_t j = 0; j < regions.size(); ++j) - { - const auto& reg = regions[j]; - - auto dist = maxPossibleCost; - if (m_settings.CheckType(m_tracks[i]->LastRegion().m_type, reg.m_type)) - { - dist = 0; - size_t ind = 0; - // Euclidean distance between centers - if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistCenters) - { -#if 1 - track_t ellipseDist = track->IsInsideArea(reg.m_rrect.center, predictedArea); - if (ellipseDist > 1) - dist += m_settings.m_distType[ind]; - else - dist += ellipseDist * m_settings.m_distType[ind]; -#else - dist += m_settings.m_distType[ind] * track->CalcDistCenter(reg); -#endif - } - ++ind; - - // Euclidean distance between bounding rectangles - if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistRects) - { -#if 1 - track_t ellipseDist = track->IsInsideArea(reg.m_rrect.center, predictedArea); - if (ellipseDist < 1) - { - track_t dw = track->WidthDist(reg); - track_t dh = track->HeightDist(reg); - dist += m_settings.m_distType[ind] * (1 - (1 - ellipseDist) * (dw + dh) * 0.5f); - } - else - { - dist += m_settings.m_distType[ind]; - } - //std::cout << "dist = " << dist << ", ed = " << ellipseDist << ", dw = " << dw << ", dh = " << dh << std::endl; -#else - dist += m_settings.m_distType[ind] * track->CalcDistRect(reg); -#endif - } - ++ind; - - // Intersection over Union, IoU - if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistJaccard) - dist += m_settings.m_distType[ind] * track->CalcDistJaccard(reg); - ++ind; - - // Bhatacharia distance between histograms - if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistHist) - { - if (regionEmbeddings.empty()) - regionEmbeddings.resize(regions.size()); - dist += m_settings.m_distType[ind] * track->CalcDistHist(reg, regionEmbeddings[j], currFrame); - } - ++ind; - - // Cosine distance between embeddings - if (m_settings.m_distType[ind] > 0.0f && ind == tracking::DistFeatureCos) - { - if (regionEmbeddings.empty()) - regionEmbeddings.resize(regions.size()); - if (regionEmbeddings[j].m_embedding.empty()) - { - auto embCalc = m_embCalculators.find(reg.m_type); - if (embCalc != std::end(m_embCalculators)) - { - embCalc->second->Calc(currFrame, reg.m_brect, regionEmbeddings[j].m_embedding); - regionEmbeddings[j].m_embDot = regionEmbeddings[j].m_embedding.dot(regionEmbeddings[j].m_embedding); - } - if (reg.m_type == track->LastRegion().m_type) - dist += m_settings.m_distType[ind] * track->CalcCosine(regionEmbeddings[j], currFrame); - } - } - ++ind; - assert(ind == tracking::DistsCount); - } - - costMatrix[i + j * N] = dist; - if (dist > maxCost) - maxCost = dist; - } - } -} diff --git a/src/Tracker/EmbeddingsCalculator.hpp b/src/Tracker/EmbeddingsCalculator.hpp index 32005fdb3..b285903f1 100644 --- a/src/Tracker/EmbeddingsCalculator.hpp +++ b/src/Tracker/EmbeddingsCalculator.hpp @@ -12,30 +12,100 @@ class EmbeddingsCalculator /// bool Initialize(const std::string& cfgName, const std::string& weightsName, const cv::Size& inputLayer) { - m_inputLayer = inputLayer; +#ifdef USE_OCV_EMBEDDINGS + m_inputLayer = inputLayer; #if 1 - m_net = cv::dnn::readNet(weightsName, cfgName); + m_net = cv::dnn::readNet(weightsName); #else - m_net = cv::dnn::readNetFromTensorflow(weightsName, cfgName); + m_net = cv::dnn::readNetFromTorch(weightsName); #endif + + std::cout << "Re-id model " << weightsName << " loaded: " << (!m_net.empty()) << std::endl; + if (!m_net.empty()) { - m_net.setPreferableBackend(cv::dnn::DNN_BACKEND_INFERENCE_ENGINE); - m_net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR >= 2)) || (CV_VERSION_MAJOR > 4)) + std::map dictTargets; + dictTargets[cv::dnn::DNN_TARGET_CPU] = "DNN_TARGET_CPU"; + dictTargets[cv::dnn::DNN_TARGET_OPENCL] = "DNN_TARGET_OPENCL"; + dictTargets[cv::dnn::DNN_TARGET_OPENCL_FP16] = "DNN_TARGET_OPENCL_FP16"; + dictTargets[cv::dnn::DNN_TARGET_MYRIAD] = "DNN_TARGET_MYRIAD"; + dictTargets[cv::dnn::DNN_TARGET_CUDA] = "DNN_TARGET_CUDA"; + dictTargets[cv::dnn::DNN_TARGET_CUDA_FP16] = "DNN_TARGET_CUDA_FP16"; +#if (CV_VERSION_MAJOR > 4) + dictTargets[cv::dnn::DNN_TARGET_HDDL] = "DNN_TARGET_HDDL"; + dictTargets[cv::dnn::DNN_TARGET_NPU] = "DNN_TARGET_NPU"; + dictTargets[cv::dnn::DNN_TARGET_CPU_FP16] = "DNN_TARGET_CPU_FP16"; +#endif + + std::map dictBackends; + dictBackends[cv::dnn::DNN_BACKEND_DEFAULT] = "DNN_BACKEND_DEFAULT"; + dictBackends[cv::dnn::DNN_BACKEND_INFERENCE_ENGINE] = "DNN_BACKEND_INFERENCE_ENGINE"; + dictBackends[cv::dnn::DNN_BACKEND_OPENCV] = "DNN_BACKEND_OPENCV"; + dictBackends[cv::dnn::DNN_BACKEND_VKCOM] = "DNN_BACKEND_VKCOM"; + dictBackends[cv::dnn::DNN_BACKEND_CUDA] = "DNN_BACKEND_CUDA"; +#if (CV_VERSION_MAJOR > 4) + dictBackends[cv::dnn::DNN_BACKEND_WEBNN] = "DNN_BACKEND_WEBNN"; + dictBackends[cv::dnn::DNN_BACKEND_TIMVX] = "DNN_BACKEND_TIMVX"; + dictBackends[cv::dnn::DNN_BACKEND_CANN] = "DNN_BACKEND_CANN"; +#endif + dictBackends[1000000] = "DNN_BACKEND_INFERENCE_ENGINE_NGRAPH"; + dictBackends[1000000 + 1] = "DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019"; + + std::cout << "Avaible pairs for Target - backend:" << std::endl; + std::vector> pairs = cv::dnn::getAvailableBackends(); + for (auto p : pairs) + { + std::cout << dictBackends[p.first] << " (" << p.first << ") - " << dictTargets[p.second] << " (" << p.second << ")" << std::endl; + + if (p.first == cv::dnn::DNN_BACKEND_CUDA) + { + //m_net.setPreferableTarget(p.second); + //m_net.setPreferableBackend(p.first); + //std::cout << "Set!" << std::endl; + } + } +#endif + + auto outNames = m_net.getUnconnectedOutLayersNames(); + auto outLayers = m_net.getUnconnectedOutLayers(); + auto outLayerType = m_net.getLayer(outLayers[0])->type; + +#if (CV_VERSION_MAJOR < 5) + std::vector outputs; + std::vector internals; + m_net.getLayerShapes(cv::dnn::MatShape(), 0, outputs, internals); +#else + std::vector outputs; + std::vector internals; + m_net.getLayerShapes(cv::MatShape(), CV_32F, 0, outputs, internals); +#endif + std::cout << "REID: getLayerShapes: outputs (" << outputs.size() << ") = " << (outputs.size() > 0 ? outputs[0].size() : 0) << ", internals (" << internals.size() << ") = " << (internals.size() > 0 ? internals[0].size() : 0) << std::endl; + if (outputs.size() && outputs[0].size() > 3) + std::cout << "outputs = [" << outputs[0][0] << ", " << outputs[0][1] << ", " << outputs[0][2] << ", " << outputs[0][3] << "], internals = [" << internals[0][0] << ", " << internals[0][1] << ", " << internals[0][2] << ", " << internals[0][3] << "]" << std::endl; } return !m_net.empty(); - } +#else + std::cerr << "EmbeddingsCalculator was disabled in CMAKE! Check SetDistances params." << std::endl; + return false; +#endif + } /// - bool IsInitialized() const - { - return !m_net.empty(); + bool IsInitialized() const + { +#ifdef USE_OCV_EMBEDDINGS + return !m_net.empty(); +#else + return false; +#endif } /// void Calc(const cv::UMat& img, cv::Rect rect, cv::Mat& embedding) - { + { +#ifdef USE_OCV_EMBEDDINGS auto Clamp = [](int& v, int& size, int hi) -> int { int res = 0; @@ -62,16 +132,21 @@ class EmbeddingsCalculator Clamp(rect.x, rect.width, img.cols); Clamp(rect.y, rect.height, img.rows); - cv::UMat obj; - cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_LINEAR); - cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0, cv::Size(), cv::Scalar(), false, false); + cv::Mat obj; + cv::resize(img(rect), obj, m_inputLayer, 0., 0., cv::INTER_CUBIC); + cv::Mat blob = cv::dnn::blobFromImage(obj, 1.0 / 255.0, cv::Size(), cv::Scalar(), false, false, CV_32F); m_net.setInput(blob); - embedding = m_net.forward(); //std::cout << "embedding: " << embedding.size() << ", chans = " << embedding.channels() << std::endl; + cv::normalize(m_net.forward(), embedding); +#else + std::cerr << "EmbeddingsCalculator was disabled in CMAKE! Check SetDistances params." << std::endl; +#endif } private: - cv::dnn::Net m_net; - cv::Size m_inputLayer{128, 256}; +#ifdef USE_OCV_EMBEDDINGS + cv::dnn::Net m_net; + cv::Size m_inputLayer{ 128, 256 }; +#endif }; diff --git a/src/Tracker/HungarianAlg/HungarianAlg.cpp b/src/Tracker/HungarianAlg/HungarianAlg.cpp index 2626e2a06..195f2acba 100644 --- a/src/Tracker/HungarianAlg/HungarianAlg.cpp +++ b/src/Tracker/HungarianAlg/HungarianAlg.cpp @@ -4,152 +4,164 @@ // -------------------------------------------------------------------------- // // -------------------------------------------------------------------------- -track_t AssignmentProblemSolver::Solve( - const distMatrix_t& distMatrixIn, - size_t nOfRows, - size_t nOfColumns, - std::vector& assignment, - TMethod Method - ) +track_t AssignmentProblemSolver::Solve(const distMatrix_t& distMatrixIn, + size_t nOfRows, + size_t nOfColumns, + std::vector& assignment, + TMethod Method) { - assignment.resize(nOfRows, -1); + assignment.resize(nOfRows, -1); - track_t cost = 0; + track_t cost = 0; - switch (Method) - { - case optimal: - assignmentoptimal(assignment, cost, distMatrixIn, nOfRows, nOfColumns); - break; + switch (Method) + { + case optimal: + assignmentoptimal(assignment, cost, distMatrixIn, nOfRows, nOfColumns); + break; - case many_forbidden_assignments: - assignmentsuboptimal1(assignment, cost, distMatrixIn, nOfRows, nOfColumns); - break; + case many_forbidden_assignments: + assignmentsuboptimal1(assignment, cost, distMatrixIn, nOfRows, nOfColumns); + break; - case without_forbidden_assignments: - assignmentsuboptimal2(assignment, cost, distMatrixIn, nOfRows, nOfColumns); - break; - } + case without_forbidden_assignments: + assignmentsuboptimal2(assignment, cost, distMatrixIn, nOfRows, nOfColumns); + break; + } - return cost; + return cost; } // -------------------------------------------------------------------------- // Computes the optimal assignment (minimum overall costs) using Munkres algorithm. // -------------------------------------------------------------------------- void AssignmentProblemSolver::assignmentoptimal(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns) { - // Generate distance cv::Matrix and check cv::Matrix elements positiveness + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Generate distance cv::Matrix and check cv::Matrix elements positiveness, assignment = " << assignment.size() << ", cost = " << cost << ", distMatrixIn = " << distMatrixIn.size() << ", nOfRows = " << nOfRows << ", nOfColumns = " << nOfColumns << std::endl; - // Total elements number + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Total elements number" << std::endl; const size_t nOfElements = nOfRows * nOfColumns; - // Memory allocation + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Memory allocation" << std::endl; m_distMatrix.assign(std::begin(distMatrixIn), std::end(distMatrixIn)); const track_t* distMatrixEnd = m_distMatrix.data() + nOfElements; - // Memory allocation - bool* coveredColumns = (bool*)calloc(nOfColumns, sizeof(bool)); - bool* coveredRows = (bool*)calloc(nOfRows, sizeof(bool)); - bool* starMatrix = (bool*)calloc(nOfElements, sizeof(bool)); - bool* primeMatrix = (bool*)calloc(nOfElements, sizeof(bool)); - bool* newStarMatrix = (bool*)calloc(nOfElements, sizeof(bool)); /* used in step4 */ - - /* preliminary steps */ - if (nOfRows <= nOfColumns) - { + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Memory allocation" << std::endl; + bool* coveredColumns = (bool*)calloc(nOfColumns, sizeof(bool)); + bool* coveredRows = (bool*)calloc(nOfRows, sizeof(bool)); + bool* starMatrix = (bool*)calloc(nOfElements, sizeof(bool)); + bool* primeMatrix = (bool*)calloc(nOfElements, sizeof(bool)); + bool* newStarMatrix = (bool*)calloc(nOfElements, sizeof(bool)); // used in step4 + + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: preliminary steps" << std::endl; + if (nOfRows <= nOfColumns) + { for (size_t row = 0; row < nOfRows; ++row) - { - /* find the smallest element in the row */ + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: find the smallest element in the row" << std::endl; track_t* distMatrixTemp = m_distMatrix.data() + row; track_t minValue = *distMatrixTemp; - distMatrixTemp += nOfRows; - while (distMatrixTemp < distMatrixEnd) - { - track_t value = *distMatrixTemp; - if (value < minValue) - minValue = value; - - distMatrixTemp += nOfRows; - } - /* subtract the smallest element from each element of the row */ + distMatrixTemp += nOfRows; + while (distMatrixTemp < distMatrixEnd) + { + track_t value = *distMatrixTemp; + if (value < minValue) + minValue = value; + + distMatrixTemp += nOfRows; + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: subtract the smallest element from each element of the row" << std::endl; distMatrixTemp = m_distMatrix.data() + row; - while (distMatrixTemp < distMatrixEnd) - { - *distMatrixTemp -= minValue; - distMatrixTemp += nOfRows; - } - } - /* Steps 1 and 2a */ + while (distMatrixTemp < distMatrixEnd) + { + *distMatrixTemp -= minValue; + distMatrixTemp += nOfRows; + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Steps 1 and 2a" << std::endl; for (size_t row = 0; row < nOfRows; ++row) - { + { for (size_t col = 0; col < nOfColumns; ++col) - { + { if (m_distMatrix[row + nOfRows*col] == 0) - { - if (!coveredColumns[col]) - { - starMatrix[row + nOfRows * col] = true; - coveredColumns[col] = true; - break; - } - } - } - } - } - else /* if(nOfRows > nOfColumns) */ - { + { + if (!coveredColumns[col]) + { + starMatrix[row + nOfRows * col] = true; + coveredColumns[col] = true; + break; + } + } + } + } + } + else // if(nOfRows > nOfColumns) + { for (size_t col = 0; col < nOfColumns; ++col) - { - /* find the smallest element in the column */ + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: find the smallest element in the column" << std::endl; track_t* distMatrixTemp = m_distMatrix.data() + nOfRows*col; - track_t* columnEnd = distMatrixTemp + nOfRows; - track_t minValue = *distMatrixTemp++; - while (distMatrixTemp < columnEnd) - { - track_t value = *distMatrixTemp++; - if (value < minValue) - minValue = value; - } - /* subtract the smallest element from each element of the column */ + track_t* columnEnd = distMatrixTemp + nOfRows; + track_t minValue = *distMatrixTemp++; + while (distMatrixTemp < columnEnd) + { + track_t value = *distMatrixTemp++; + if (value < minValue) + minValue = value; + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: subtract the smallest element from each element of the column" << std::endl; distMatrixTemp = m_distMatrix.data() + nOfRows*col; - while (distMatrixTemp < columnEnd) - { - *distMatrixTemp++ -= minValue; - } - } - /* Steps 1 and 2a */ + while (distMatrixTemp < columnEnd) + { + *distMatrixTemp++ -= minValue; + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: Steps 1 and 2a" << std::endl; for (size_t col = 0; col < nOfColumns; ++col) - { + { for (size_t row = 0; row < nOfRows; ++row) - { + { if (m_distMatrix[row + nOfRows*col] == 0) - { - if (!coveredRows[row]) - { - starMatrix[row + nOfRows*col] = true; - coveredColumns[col] = true; - coveredRows[row] = true; - break; - } - } - } - } + { + if (!coveredRows[row]) + { + starMatrix[row + nOfRows*col] = true; + coveredColumns[col] = true; + coveredRows[row] = true; + break; + } + } + } + } for (size_t row = 0; row < nOfRows; ++row) - { - coveredRows[row] = false; - } - } - /* move to step 2b */ + { + coveredRows[row] = false; + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: move to step 2b" << std::endl; step2b(assignment, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, (nOfRows <= nOfColumns) ? nOfRows : nOfColumns); - /* compute cost and remove invalid assignments */ - computeassignmentcost(assignment, cost, distMatrixIn, nOfRows); - /* free allocated memory */ - free(coveredColumns); - free(coveredRows); - free(starMatrix); - free(primeMatrix); - free(newStarMatrix); - return; + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: compute cost and remove invalid assignments" << std::endl; + computeassignmentcost(assignment, cost, distMatrixIn, nOfRows); + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentoptimal: free allocated memory" << std::endl; + free(coveredColumns); + free(coveredRows); + free(starMatrix); + free(primeMatrix); + free(newStarMatrix); + return; } // -------------------------------------------------------------------------- // @@ -157,16 +169,16 @@ void AssignmentProblemSolver::assignmentoptimal(assignments_t& assignment, track void AssignmentProblemSolver::buildassignmentvector(assignments_t& assignment, bool *starMatrix, size_t nOfRows, size_t nOfColumns) { for (size_t row = 0; row < nOfRows; ++row) - { + { for (size_t col = 0; col < nOfColumns; ++col) - { - if (starMatrix[row + nOfRows * col]) - { - assignment[row] = static_cast(col); - break; - } - } - } + { + if (starMatrix[row + nOfRows * col]) + { + assignment[row] = static_cast(col); + break; + } + } + } } // -------------------------------------------------------------------------- // @@ -174,11 +186,11 @@ void AssignmentProblemSolver::buildassignmentvector(assignments_t& assignment, b void AssignmentProblemSolver::computeassignmentcost(const assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows) { for (size_t row = 0; row < nOfRows; ++row) - { - const int col = assignment[row]; - if (col >= 0) - cost += distMatrixIn[row + nOfRows * col]; - } + { + const int col = assignment[row]; + if (col >= 0) + cost += distMatrixIn[row + nOfRows * col]; + } } // -------------------------------------------------------------------------- @@ -187,7 +199,8 @@ void AssignmentProblemSolver::computeassignmentcost(const assignments_t& assignm void AssignmentProblemSolver::step2a(assignments_t& assignment, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim) { bool *starMatrixTemp, *columnEnd; - /* cover every column containing a starred zero */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "step2a: cover every column containing a starred zero" << std::endl; for (size_t col = 0; col < nOfColumns; ++col) { starMatrixTemp = starMatrix + nOfRows * col; @@ -201,7 +214,8 @@ void AssignmentProblemSolver::step2a(assignments_t& assignment, bool *starMatrix } } } - /* move to step 3 */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "step2a: move to step 3" << std::endl; step2b(assignment, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim); } @@ -210,15 +224,16 @@ void AssignmentProblemSolver::step2a(assignments_t& assignment, bool *starMatrix // -------------------------------------------------------------------------- void AssignmentProblemSolver::step2b(assignments_t& assignment, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim) { - /* count covered columns */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "step2b: count covered columns" << std::endl; size_t nOfCoveredColumns = 0; for (size_t col = 0; col < nOfColumns; ++col) - { - if (coveredColumns[col]) - nOfCoveredColumns++; - } + { + if (coveredColumns[col]) + nOfCoveredColumns++; + } if (nOfCoveredColumns == minDim) // algorithm finished - buildassignmentvector(assignment, starMatrix, nOfRows, nOfColumns); + buildassignmentvector(assignment, starMatrix, nOfRows, nOfColumns); else // move to step 3 step3_5(assignment, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim); } @@ -228,88 +243,97 @@ void AssignmentProblemSolver::step2b(assignments_t& assignment, bool *starMatrix // -------------------------------------------------------------------------- void AssignmentProblemSolver::step3_5(assignments_t& assignment, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim) { - for (;;) - { - /* step 3 */ - bool zerosFound = true; - while (zerosFound) - { - zerosFound = false; + for (;;) + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: step 3" << std::endl; + bool zerosFound = true; + while (zerosFound) + { + zerosFound = false; for (size_t col = 0; col < nOfColumns; ++col) - { - if (!coveredColumns[col]) - { + { + if (!coveredColumns[col]) + { for (size_t row = 0; row < nOfRows; ++row) - { + { if ((!coveredRows[row]) && (m_distMatrix[row + nOfRows*col] == 0)) - { - /* prime zero */ - primeMatrix[row + nOfRows*col] = true; - /* find starred zero in current row */ - size_t starCol = 0; + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: prime zero" << std::endl; + primeMatrix[row + nOfRows*col] = true; + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: find starred zero in current row" << std::endl; + size_t starCol = 0; for (; starCol < nOfColumns; ++starCol) - { - if (starMatrix[row + nOfRows * starCol]) - break; - } - if (starCol == nOfColumns) /* no starred zero found */ - { - /* move to step 4 */ + { + if (starMatrix[row + nOfRows * starCol]) + break; + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: starCol = " << starCol << ", nOfColumns = " << nOfColumns << std::endl; + if (starCol == nOfColumns) // no starred zero found + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: move to step 4" << std::endl; step4(assignment, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim, row, col); - return; - } - else - { - coveredRows[row] = true; - coveredColumns[starCol] = false; - zerosFound = true; - break; - } - } - } - } - } - } - /* step 5 */ + return; + } + else + { + coveredRows[row] = true; + coveredColumns[starCol] = false; + zerosFound = true; + break; + } + } + } + } + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: step 5" << std::endl; track_t h = std::numeric_limits::max(); for (size_t row = 0; row < nOfRows; ++row) - { - if (!coveredRows[row]) - { + { + if (!coveredRows[row]) + { for (size_t col = 0; col < nOfColumns; ++col) - { - if (!coveredColumns[col]) - { + { + if (!coveredColumns[col]) + { const track_t value = m_distMatrix[row + nOfRows*col]; - if (value < h) - h = value; - } - } - } - } - /* add h to each covered row */ + if (value < h) + h = value; + } + } + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: add h to each covered row, h = " << h << std::endl; for (size_t row = 0; row < nOfRows; ++row) - { - if (coveredRows[row]) - { + { + if (coveredRows[row]) + { for (size_t col = 0; col < nOfColumns; ++col) - { + { m_distMatrix[row + nOfRows*col] += h; - } - } - } - /* subtract h from each uncovered column */ + } + } + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "step3_5: subtract h from each uncovered column" << std::endl; for (size_t col = 0; col < nOfColumns; ++col) - { - if (!coveredColumns[col]) - { + { + if (!coveredColumns[col]) + { for (size_t row = 0; row < nOfRows; ++row) - { + { m_distMatrix[row + nOfRows*col] -= h; - } - } - } - } + } + } + } + } } // -------------------------------------------------------------------------- @@ -317,56 +341,60 @@ void AssignmentProblemSolver::step3_5(assignments_t& assignment, bool *starMatri // -------------------------------------------------------------------------- void AssignmentProblemSolver::step4(assignments_t& assignment, bool *starMatrix, bool *newStarMatrix, bool *primeMatrix, bool *coveredColumns, bool *coveredRows, size_t nOfRows, size_t nOfColumns, size_t minDim, size_t row, size_t col) { - const size_t nOfElements = nOfRows * nOfColumns; - /* generate temporary copy of starMatrix */ + const size_t nOfElements = nOfRows * nOfColumns; + if constexpr (HUNGARIAN_LOGS) + std::cout << "step4: generate temporary copy of starMatrix" << std::endl; for (size_t n = 0; n < nOfElements; ++n) - { - newStarMatrix[n] = starMatrix[n]; - } - /* star current zero */ - newStarMatrix[row + nOfRows*col] = true; - /* find starred zero in current column */ - size_t starCol = col; - size_t starRow = 0; + { + newStarMatrix[n] = starMatrix[n]; + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "step4: star current zero" << std::endl; + newStarMatrix[row + nOfRows*col] = true; + if constexpr (HUNGARIAN_LOGS) + std::cout << "step4: find starred zero in current column" << std::endl; + size_t starCol = col; + size_t starRow = 0; for (; starRow < nOfRows; ++starRow) - { - if (starMatrix[starRow + nOfRows * starCol]) - break; - } - while (starRow < nOfRows) - { - /* unstar the starred zero */ - newStarMatrix[starRow + nOfRows*starCol] = false; - /* find primed zero in current row */ - size_t primeRow = starRow; - size_t primeCol = 0; + { + if (starMatrix[starRow + nOfRows * starCol]) + break; + } + while (starRow < nOfRows) + { + // unstar the starred zero + newStarMatrix[starRow + nOfRows*starCol] = false; + // find primed zero in current row + size_t primeRow = starRow; + size_t primeCol = 0; for (; primeCol < nOfColumns; ++primeCol) - { - if (primeMatrix[primeRow + nOfRows * primeCol]) - break; - } - /* star the primed zero */ - newStarMatrix[primeRow + nOfRows*primeCol] = true; - /* find starred zero in current column */ - starCol = primeCol; + { + if (primeMatrix[primeRow + nOfRows * primeCol]) + break; + } + // star the primed zero + newStarMatrix[primeRow + nOfRows*primeCol] = true; + // find starred zero in current column + starCol = primeCol; for (starRow = 0; starRow < nOfRows; ++starRow) - { - if (starMatrix[starRow + nOfRows * starCol]) - break; - } - } - /* use temporary copy as new starMatrix */ - /* delete all primes, uncover all rows */ + { + if (starMatrix[starRow + nOfRows * starCol]) + break; + } + } + // use temporary copy as new starMatrix + // delete all primes, uncover all rows for (size_t n = 0; n < nOfElements; ++n) - { - primeMatrix[n] = false; - starMatrix[n] = newStarMatrix[n]; - } + { + primeMatrix[n] = false; + starMatrix[n] = newStarMatrix[n]; + } for (size_t n = 0; n < nOfRows; ++n) - { - coveredRows[n] = false; - } - /* move to step 2a */ + { + coveredRows[n] = false; + } + if constexpr (HUNGARIAN_LOGS) + std::cout << "move to step 2a" << std::endl; step2a(assignment, starMatrix, newStarMatrix, primeMatrix, coveredColumns, coveredRows, nOfRows, nOfColumns, minDim); } @@ -375,281 +403,292 @@ void AssignmentProblemSolver::step4(assignments_t& assignment, bool *starMatrix, // -------------------------------------------------------------------------- void AssignmentProblemSolver::assignmentsuboptimal2(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns) { - /* make working copy of distance Matrix */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "make working copy of distance Matrix" << std::endl; m_distMatrix.assign(std::begin(distMatrixIn), std::end(distMatrixIn)); - /* recursively search for the minimum element and do the assignment */ - for (;;) - { - /* find minimum distance observation-to-track pair */ - track_t minValue = std::numeric_limits::max(); - size_t tmpRow = 0; - size_t tmpCol = 0; + if constexpr (HUNGARIAN_LOGS) + std::cout << "recursively search for the minimum element and do the assignment" << std::endl; + for (;;) + { + // find minimum distance observation-to-track pair + track_t minValue = std::numeric_limits::max(); + size_t tmpRow = 0; + size_t tmpCol = 0; for (size_t row = 0; row < nOfRows; ++row) - { + { for (size_t col = 0; col < nOfColumns; ++col) - { + { const track_t value = m_distMatrix[row + nOfRows*col]; - if (value != std::numeric_limits::max() && (value < minValue)) - { - minValue = value; - tmpRow = row; - tmpCol = col; - } - } - } - - if (minValue != std::numeric_limits::max()) - { - assignment[tmpRow] = static_cast(tmpCol); - cost += minValue; + if (value != std::numeric_limits::max() && (value < minValue)) + { + minValue = value; + tmpRow = row; + tmpCol = col; + } + } + } + + if (minValue != std::numeric_limits::max()) + { + assignment[tmpRow] = static_cast(tmpCol); + cost += minValue; for (size_t n = 0; n < nOfRows; ++n) - { + { m_distMatrix[n + nOfRows*tmpCol] = std::numeric_limits::max(); - } + } for (size_t n = 0; n < nOfColumns; ++n) - { + { m_distMatrix[tmpRow + nOfRows*n] = std::numeric_limits::max(); - } - } - else - { - break; - } - } + } + } + else + { + break; + } + } } // -------------------------------------------------------------------------- // Computes a suboptimal solution. Good for cases with many forbidden assignments. // -------------------------------------------------------------------------- void AssignmentProblemSolver::assignmentsuboptimal1(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns) { - /* make working copy of distance Matrix */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: make working copy of distance Matrix" << std::endl; m_distMatrix.assign(std::begin(distMatrixIn), std::end(distMatrixIn)); - /* allocate memory */ - int* nOfValidObservations = (int *)calloc(nOfRows, sizeof(int)); - int* nOfValidTracks = (int *)calloc(nOfColumns, sizeof(int)); + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: allocate memory" << std::endl; + int* nOfValidObservations = (int *)calloc(nOfRows, sizeof(int)); + int* nOfValidTracks = (int *)calloc(nOfColumns, sizeof(int)); - /* compute number of validations */ - bool infiniteValueFound = false; - bool finiteValueFound = false; + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: compute number of validations" << std::endl; + bool infiniteValueFound = false; + bool finiteValueFound = false; for (size_t row = 0; row < nOfRows; ++row) - { + { for (size_t col = 0; col < nOfColumns; ++col) - { + { if (m_distMatrix[row + nOfRows*col] != std::numeric_limits::max()) - { - nOfValidTracks[col] += 1; - nOfValidObservations[row] += 1; - finiteValueFound = true; - } - else - { - infiniteValueFound = true; - } - } - } - - if (infiniteValueFound) - { - if (!finiteValueFound) - { - /* free allocated memory */ + { + nOfValidTracks[col] += 1; + nOfValidObservations[row] += 1; + finiteValueFound = true; + } + else + { + infiniteValueFound = true; + } + } + } + + if (infiniteValueFound) + { + if (!finiteValueFound) + { + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: free allocated memory" << std::endl; free(nOfValidObservations); free(nOfValidTracks); - return; - } - bool repeatSteps = true; + return; + } + bool repeatSteps = true; - while (repeatSteps) - { - repeatSteps = false; + while (repeatSteps) + { + repeatSteps = false; - /* step 1: reject assignments of multiply validated tracks to singly validated observations */ + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: step 1: reject assignments of multiply validated tracks to singly validated observation" << std::endl; for (size_t col = 0; col < nOfColumns; ++col) - { - bool singleValidationFound = false; + { + bool singleValidationFound = false; for (size_t row = 0; row < nOfRows; ++row) - { + { if (m_distMatrix[row + nOfRows * col] != std::numeric_limits::max() && (nOfValidObservations[row] == 1)) - { - singleValidationFound = true; - break; - } - } - if (singleValidationFound) - { + { + singleValidationFound = true; + break; + } + } + if (singleValidationFound) + { for (size_t nestedRow = 0; nestedRow < nOfRows; ++nestedRow) if ((nOfValidObservations[nestedRow] > 1) && m_distMatrix[nestedRow + nOfRows * col] != std::numeric_limits::max()) - { + { m_distMatrix[nestedRow + nOfRows * col] = std::numeric_limits::max(); - nOfValidObservations[nestedRow] -= 1; - nOfValidTracks[col] -= 1; - repeatSteps = true; - } - } - } - - /* step 2: reject assignments of multiply validated observations to singly validated tracks */ - if (nOfColumns > 1) - { + nOfValidObservations[nestedRow] -= 1; + nOfValidTracks[col] -= 1; + repeatSteps = true; + } + } + } + + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: step 2: reject assignments of multiply validated observations to singly validated tracks" << std::endl; + if (nOfColumns > 1) + { for (size_t row = 0; row < nOfRows; ++row) - { - bool singleValidationFound = false; + { + bool singleValidationFound = false; for (size_t col = 0; col < nOfColumns; ++col) - { + { if (m_distMatrix[row + nOfRows*col] != std::numeric_limits::max() && (nOfValidTracks[col] == 1)) - { - singleValidationFound = true; - break; - } - } - - if (singleValidationFound) - { + { + singleValidationFound = true; + break; + } + } + + if (singleValidationFound) + { for (size_t col = 0; col < nOfColumns; ++col) - { + { if ((nOfValidTracks[col] > 1) && m_distMatrix[row + nOfRows*col] != std::numeric_limits::max()) - { + { m_distMatrix[row + nOfRows*col] = std::numeric_limits::max(); - nOfValidObservations[row] -= 1; - nOfValidTracks[col] -= 1; - repeatSteps = true; - } - } - } - } - } - } /* while(repeatSteps) */ - - /* for each multiply validated track that validates only with singly validated */ - /* observations, choose the observation with minimum distance */ + nOfValidObservations[row] -= 1; + nOfValidTracks[col] -= 1; + repeatSteps = true; + } + } + } + } + } + } // while(repeatSteps) + + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: for each multiply validated track that validates only with singly validated observations, choose the observation with minimum distance" << std::endl; for (size_t row = 0; row < nOfRows; ++row) - { - if (nOfValidObservations[row] > 1) - { - bool allSinglyValidated = true; - track_t minValue = std::numeric_limits::max(); - size_t tmpCol = 0; + { + if (nOfValidObservations[row] > 1) + { + bool allSinglyValidated = true; + track_t minValue = std::numeric_limits::max(); + size_t tmpCol = 0; for (size_t col = 0; col < nOfColumns; ++col) - { + { const track_t value = m_distMatrix[row + nOfRows*col]; - if (value != std::numeric_limits::max()) - { - if (nOfValidTracks[col] > 1) - { - allSinglyValidated = false; - break; - } - else if ((nOfValidTracks[col] == 1) && (value < minValue)) - { - tmpCol = col; - minValue = value; - } - } - } - - if (allSinglyValidated) - { - assignment[row] = static_cast(tmpCol); - cost += minValue; + if (value != std::numeric_limits::max()) + { + if (nOfValidTracks[col] > 1) + { + allSinglyValidated = false; + break; + } + else if ((nOfValidTracks[col] == 1) && (value < minValue)) + { + tmpCol = col; + minValue = value; + } + } + } + + if (allSinglyValidated) + { + assignment[row] = static_cast(tmpCol); + cost += minValue; for (size_t n = 0; n < nOfRows; ++n) - { + { m_distMatrix[n + nOfRows*tmpCol] = std::numeric_limits::max(); - } + } for (size_t n = 0; n < nOfColumns; ++n) - { + { m_distMatrix[row + nOfRows*n] = std::numeric_limits::max(); - } - } - } - } + } + } + } + } - // for each multiply validated observation that validates only with singly validated track, choose the track with minimum distance + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: for each multiply validated observation that validates only with singly validated track, choose the track with minimum distance" << std::endl; for (size_t col = 0; col < nOfColumns; ++col) - { - if (nOfValidTracks[col] > 1) - { - bool allSinglyValidated = true; - track_t minValue = std::numeric_limits::max(); - size_t tmpRow = 0; + { + if (nOfValidTracks[col] > 1) + { + bool allSinglyValidated = true; + track_t minValue = std::numeric_limits::max(); + size_t tmpRow = 0; for (size_t row = 0; row < nOfRows; ++row) - { + { const track_t value = m_distMatrix[row + nOfRows*col]; - if (value != std::numeric_limits::max()) - { - if (nOfValidObservations[row] > 1) - { - allSinglyValidated = false; - break; - } - else if ((nOfValidObservations[row] == 1) && (value < minValue)) - { - tmpRow = row; - minValue = value; - } - } - } - - if (allSinglyValidated) - { - assignment[tmpRow] = static_cast(col); - cost += minValue; + if (value != std::numeric_limits::max()) + { + if (nOfValidObservations[row] > 1) + { + allSinglyValidated = false; + break; + } + else if ((nOfValidObservations[row] == 1) && (value < minValue)) + { + tmpRow = row; + minValue = value; + } + } + } + + if (allSinglyValidated) + { + assignment[tmpRow] = static_cast(col); + cost += minValue; for (size_t n = 0; n < nOfRows; ++n) - { + { m_distMatrix[n + nOfRows*col] = std::numeric_limits::max(); - } + } for (size_t n = 0; n < nOfColumns; ++n) - { + { m_distMatrix[tmpRow + nOfRows*n] = std::numeric_limits::max(); - } - } - } - } - } /* if(infiniteValueFound) */ - - - /* now, recursively search for the minimum element and do the assignment */ - for (;;) - { - /* find minimum distance observation-to-track pair */ - track_t minValue = std::numeric_limits::max(); - size_t tmpRow = 0; - size_t tmpCol = 0; + } + } + } + } + } // if(infiniteValueFound) + + + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: now, recursively search for the minimum element and do the assignment" << std::endl; + for (;;) + { + // find minimum distance observation-to-track pair + track_t minValue = std::numeric_limits::max(); + size_t tmpRow = 0; + size_t tmpCol = 0; for (size_t row = 0; row < nOfRows; ++row) - { + { for (size_t col = 0; col < nOfColumns; ++col) - { + { const track_t value = m_distMatrix[row + nOfRows*col]; - if (value != std::numeric_limits::max() && (value < minValue)) - { - minValue = value; - tmpRow = row; - tmpCol = col; - } - } - } - - if (minValue != std::numeric_limits::max()) - { - assignment[tmpRow] = static_cast(tmpCol); - cost += minValue; + if (value != std::numeric_limits::max() && (value < minValue)) + { + minValue = value; + tmpRow = row; + tmpCol = col; + } + } + } + + if (minValue != std::numeric_limits::max()) + { + assignment[tmpRow] = static_cast(tmpCol); + cost += minValue; for (size_t n = 0; n < nOfRows; ++n) - { + { m_distMatrix[n + nOfRows*tmpCol] = std::numeric_limits::max(); - } + } for (size_t n = 0; n < nOfColumns; ++n) - { + { m_distMatrix[tmpRow + nOfRows*n] = std::numeric_limits::max(); - } - } - else - { - break; - } - } - - /* free allocated memory */ - free(nOfValidObservations); - free(nOfValidTracks); + } + } + else + { + break; + } + } + + if constexpr (HUNGARIAN_LOGS) + std::cout << "assignmentsuboptimal1: free allocated memory" << std::endl; + free(nOfValidObservations); + free(nOfValidTracks); } diff --git a/src/Tracker/HungarianAlg/HungarianAlg.h b/src/Tracker/HungarianAlg/HungarianAlg.h index 770e51076..7a27becc3 100644 --- a/src/Tracker/HungarianAlg/HungarianAlg.h +++ b/src/Tracker/HungarianAlg/HungarianAlg.h @@ -38,4 +38,6 @@ class AssignmentProblemSolver void assignmentsuboptimal2(assignments_t& assignment, track_t& cost, const distMatrix_t& distMatrixIn, size_t nOfRows, size_t nOfColumns); std::vector m_distMatrix; + + static constexpr bool HUNGARIAN_LOGS = false; }; diff --git a/src/Tracker/Kalman.cpp b/src/Tracker/Kalman.cpp index 258865416..4fc2ed50b 100644 --- a/src/Tracker/Kalman.cpp +++ b/src/Tracker/Kalman.cpp @@ -10,13 +10,17 @@ namespace kalman = cv::detail::tracking; #endif #endif -//--------------------------------------------------------------------------- -TKalmanFilter::TKalmanFilter( - tracking::KalmanType type, - bool useAcceleration, - track_t deltaTime, // time increment (lower values makes target more "massive") - track_t accelNoiseMag - ) +/// +/// \brief TKalmanFilter::TKalmanFilter +/// \param type +/// \param useAcceleration +/// \param deltaTime +/// \param accelNoiseMag +/// +TKalmanFilter::TKalmanFilter(tracking::KalmanType type, + bool useAcceleration, + track_t deltaTime, // time increment (lower values makes target more "massive") + track_t accelNoiseMag) : m_accelNoiseMag(accelNoiseMag), m_deltaTime(deltaTime), @@ -28,7 +32,11 @@ TKalmanFilter::TKalmanFilter( m_deltaStep = (m_deltaTimeMax - m_deltaTimeMin) / m_deltaStepsCount; } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::CreateLinear +/// \param xy0 +/// \param xyv0 +/// void TKalmanFilter::CreateLinear(Point_t xy0, Point_t xyv0) { // We don't know acceleration, so, assume it to process noise. @@ -72,12 +80,16 @@ void TKalmanFilter::CreateLinear(Point_t xy0, Point_t xyv0) cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); - m_initialPoints.reserve(MIN_INIT_VALS); + m_initialPoints.reserve(MIN_INIT_VALS); m_initialized = true; } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::CreateLinear +/// \param rect0 +/// \param rectv0 +/// void TKalmanFilter::CreateLinear(cv::Rect_ rect0, Point_t rectv0) { // We don't know acceleration, so, assume it to process noise. @@ -92,7 +104,7 @@ void TKalmanFilter::CreateLinear(cv::Rect_ rect0, Point_t rectv0) 1, 0, 0, 0, m_deltaTime, 0, 0, 0, 0, 1, 0, 0, 0, m_deltaTime, 0, 0, 0, 0, 1, 0, 0, 0, m_deltaTime, 0, - 0, 0, 0, 1, 0, 0, 0, m_deltaTime, + 0, 0, 0, 1, 0, 0, 0, m_deltaTime / 10.f, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, @@ -138,55 +150,76 @@ void TKalmanFilter::CreateLinear(cv::Rect_ rect0, Point_t rectv0) cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); - m_initialRects.reserve(MIN_INIT_VALS); + m_initialRects.reserve(MIN_INIT_VALS); m_initialized = true; } -//--------------------------------------------------------------------------- -void TKalmanFilter::CreateLinearAcceleration(Point_t xy0, Point_t xyv0) +/// +/// \brief TKalmanFilter::CreateLinear +/// \param rrect0 +/// \param rrectv0 +/// +void TKalmanFilter::CreateLinear(cv::RotatedRect rrect0, Point_t rrectv0) { - // 6 state variables, 2 measurements - m_linearKalman.init(6, 2, 0, El_t); - // Transition cv::Matrix - const track_t dt = m_deltaTime; - const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime; - m_linearKalman.transitionMatrix = (cv::Mat_(6, 6) << - 1, 0, dt, 0, dt2, 0, - 0, 1, 0, dt, 0, dt2, - 0, 0, 1, 0, dt, 0, - 0, 0, 0, 1, 0, dt, - 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 1); + // We don't know acceleration, so, assume it to process noise. + // But we can guess, the range of acceleration values thich can be achieved by tracked object. + // Process noise. (standard deviation of acceleration: m/s^2) + // shows, woh much target can accelerate. + // 10 state variables (x, y, vx, vy, width, height, vw, vh, angle, vangle), 5 measurements (x, y, width, height, angle) + m_linearKalman.init(10, 5, 0, El_t); + // Transition cv::Matrix + m_linearKalman.transitionMatrix = (cv::Mat_(10, 10) << + 1, 0, 0, 0, 0, m_deltaTime, 0, 0, 0, 0, + 0, 1, 0, 0, 0, 0, m_deltaTime, 0, 0, 0, + 0, 0, 1, 0, 0, 0, 0, m_deltaTime, 0, 0, + 0, 0, 0, 1, 0, 0, 0, 0, m_deltaTime, 0, + 0, 0, 0, 0, 1, 0, 0, 0, 0, m_deltaTime, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); // init... - m_lastPointResult = xy0; - m_linearKalman.statePre.at(0) = xy0.x; // x - m_linearKalman.statePre.at(1) = xy0.y; // y - m_linearKalman.statePre.at(2) = xyv0.x; // vx - m_linearKalman.statePre.at(3) = xyv0.y; // vy - m_linearKalman.statePre.at(4) = 0; // ax - m_linearKalman.statePre.at(5) = 0; // ay - - m_linearKalman.statePost.at(0) = xy0.x; - m_linearKalman.statePost.at(1) = xy0.y; - m_linearKalman.statePost.at(2) = xyv0.x; - m_linearKalman.statePost.at(3) = xyv0.y; - m_linearKalman.statePost.at(4) = 0; - m_linearKalman.statePost.at(5) = 0; + m_linearKalman.statePre.at(0) = rrect0.center.x; // x + m_linearKalman.statePre.at(1) = rrect0.center.y; // y + m_linearKalman.statePre.at(2) = rrect0.size.width; // width + m_linearKalman.statePre.at(3) = rrect0.size.height; // height + m_linearKalman.statePre.at(4) = rrect0.angle; // angle + m_linearKalman.statePre.at(5) = rrectv0.x; // dx + m_linearKalman.statePre.at(6) = rrectv0.y; // dy + m_linearKalman.statePre.at(7) = 0; // dw + m_linearKalman.statePre.at(8) = 0; // dh + m_linearKalman.statePre.at(9) = 0; // da + + m_linearKalman.statePost.at(0) = rrect0.center.x; + m_linearKalman.statePost.at(1) = rrect0.center.y; + m_linearKalman.statePost.at(2) = rrect0.size.width; + m_linearKalman.statePost.at(3) = rrect0.size.height; + m_linearKalman.statePost.at(4) = rrect0.angle; + m_linearKalman.statePost.at(5) = rrectv0.x; + m_linearKalman.statePost.at(6) = rrectv0.y; + m_linearKalman.statePost.at(7) = 0; + m_linearKalman.statePost.at(8) = 0; + m_linearKalman.statePost.at(9) = 0; cv::setIdentity(m_linearKalman.measurementMatrix); track_t n1 = pow(m_deltaTime, 4.f) / 4.f; track_t n2 = pow(m_deltaTime, 3.f) / 2.f; track_t n3 = pow(m_deltaTime, 2.f); - m_linearKalman.processNoiseCov = (cv::Mat_(6, 6) << - n1, 0, n2, 0, n2, 0, - 0, n1, 0, n2, 0, n2, - n2, 0, n3, 0, n3, 0, - 0, n2, 0, n3, 0, n3, - 0, 0, n2, 0, n3, 0, - 0, 0, 0, n2, 0, n3); + m_linearKalman.processNoiseCov = (cv::Mat_(10, 10) << + n1, 0, 0, 0, 0, n2, 0, 0, 0, 0, + 0, n1, 0, 0, 0, 0, n2, 0, 0, 0, + 0, 0, n1, 0, 0, 0, 0, n2, 0, 0, + 0, 0, 0, n1, 0, 0, 0, 0, n2, 0, + 0, 0, 0, 0, n1, 0, 0, 0, 0, n2, + n2, 0, 0, 0, 0, n3, 0, 0, 0, 0, + 0, n2, 0, 0, 0, 0, n3, 0, 0, 0, + 0, 0, n2, 0, 0, 0, 0, n3, 0, 0, + 0, 0, 0, n2, 0, 0, 0, 0, n3, 0, + 0, 0, 0, 0, n2, 0, 0, 0, 0, n3); m_linearKalman.processNoiseCov *= m_accelNoiseMag; @@ -194,93 +227,168 @@ void TKalmanFilter::CreateLinearAcceleration(Point_t xy0, Point_t xyv0) cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); - m_initialPoints.reserve(MIN_INIT_VALS); + m_initialRects.reserve(MIN_INIT_VALS); m_initialized = true; } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::CreateLinearAcceleration +/// \param xy0 +/// \param xyv0 +/// +void TKalmanFilter::CreateLinearAcceleration(Point_t xy0, Point_t xyv0) +{ + // 6 state variables, 2 measurements + m_linearKalman.init(6, 2, 0, El_t); + // Transition cv::Matrix + const track_t dt = m_deltaTime; + const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime; + m_linearKalman.transitionMatrix = (cv::Mat_(6, 6) << + 1, 0, dt, 0, dt2, 0, + 0, 1, 0, dt, 0, dt2, + 0, 0, 1, 0, dt, 0, + 0, 0, 0, 1, 0, dt, + 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 1); + + // init... + m_lastPointResult = xy0; + m_linearKalman.statePre.at(0) = xy0.x; // x + m_linearKalman.statePre.at(1) = xy0.y; // y + m_linearKalman.statePre.at(2) = xyv0.x; // vx + m_linearKalman.statePre.at(3) = xyv0.y; // vy + m_linearKalman.statePre.at(4) = 0; // ax + m_linearKalman.statePre.at(5) = 0; // ay + + m_linearKalman.statePost.at(0) = xy0.x; + m_linearKalman.statePost.at(1) = xy0.y; + m_linearKalman.statePost.at(2) = xyv0.x; + m_linearKalman.statePost.at(3) = xyv0.y; + m_linearKalman.statePost.at(4) = 0; + m_linearKalman.statePost.at(5) = 0; + + cv::setIdentity(m_linearKalman.measurementMatrix); + + track_t n1 = pow(m_deltaTime, 4.f) / 4.f; + track_t n2 = pow(m_deltaTime, 3.f) / 2.f; + track_t n3 = pow(m_deltaTime, 2.f); + m_linearKalman.processNoiseCov = (cv::Mat_(6, 6) << + n1, 0, n2, 0, n2, 0, + 0, n1, 0, n2, 0, n2, + n2, 0, n3, 0, n3, 0, + 0, n2, 0, n3, 0, n3, + 0, 0, n2, 0, n3, 0, + 0, 0, 0, n2, 0, n3); + + m_linearKalman.processNoiseCov *= m_accelNoiseMag; + + cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1)); + + cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); + + m_initialPoints.reserve(MIN_INIT_VALS); + + m_initialized = true; +} + +/// +/// \brief TKalmanFilter::CreateLinearAcceleration +/// \param rect0 +/// \param rectv0 +/// void TKalmanFilter::CreateLinearAcceleration(cv::Rect_ rect0, Point_t rectv0) { - // 12 state variables (x, y, vx, vy, ax, ay, width, height, vw, vh, aw, ah), 4 measurements (x, y, width, height) - m_linearKalman.init(12, 4, 0, El_t); - // Transition cv::Matrix - const track_t dt = m_deltaTime; - const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime; - m_linearKalman.transitionMatrix = (cv::Mat_(12, 12) << - 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, dt2, 0, - 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, dt2, - 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, - 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, - 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, - 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, - 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, - 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + // 12 state variables (x, y, vx, vy, ax, ay, width, height, vw, vh, aw, ah), 4 measurements (x, y, width, height) + m_linearKalman.init(12, 4, 0, El_t); + // Transition cv::Matrix + const track_t dt = m_deltaTime; + const track_t dt2 = 0.5f * m_deltaTime * m_deltaTime; + m_linearKalman.transitionMatrix = (cv::Mat_(12, 12) << + 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, dt2, 0, + 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, dt2, + 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, 0, + 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, dt2, + 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, dt, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); - // init... - m_linearKalman.statePre.at(0) = rect0.x; // x - m_linearKalman.statePre.at(1) = rect0.y; // y - m_linearKalman.statePre.at(2) = rect0.width; // width - m_linearKalman.statePre.at(3) = rect0.height; // height - m_linearKalman.statePre.at(4) = rectv0.x; // dx - m_linearKalman.statePre.at(5) = rectv0.y; // dy - m_linearKalman.statePre.at(6) = 0; // dw - m_linearKalman.statePre.at(7) = 0; // dh - m_linearKalman.statePre.at(8) = 0; // ax - m_linearKalman.statePre.at(9) = 0; // ay - m_linearKalman.statePre.at(10) = 0; // aw - m_linearKalman.statePre.at(11) = 0; // ah - - m_linearKalman.statePost.at(0) = rect0.x; - m_linearKalman.statePost.at(1) = rect0.y; - m_linearKalman.statePost.at(2) = rect0.width; - m_linearKalman.statePost.at(3) = rect0.height; - m_linearKalman.statePost.at(4) = rectv0.x; - m_linearKalman.statePost.at(5) = rectv0.y; - m_linearKalman.statePost.at(6) = 0; - m_linearKalman.statePost.at(7) = 0; - m_linearKalman.statePost.at(8) = 0; - m_linearKalman.statePost.at(9) = 0; - m_linearKalman.statePost.at(10) = 0; - m_linearKalman.statePost.at(11) = 0; + // init... + m_linearKalman.statePre.at(0) = rect0.x; // x + m_linearKalman.statePre.at(1) = rect0.y; // y + m_linearKalman.statePre.at(2) = rect0.width; // width + m_linearKalman.statePre.at(3) = rect0.height; // height + m_linearKalman.statePre.at(4) = rectv0.x; // dx + m_linearKalman.statePre.at(5) = rectv0.y; // dy + m_linearKalman.statePre.at(6) = 0; // dw + m_linearKalman.statePre.at(7) = 0; // dh + m_linearKalman.statePre.at(8) = 0; // ax + m_linearKalman.statePre.at(9) = 0; // ay + m_linearKalman.statePre.at(10) = 0; // aw + m_linearKalman.statePre.at(11) = 0; // ah - cv::setIdentity(m_linearKalman.measurementMatrix); + m_linearKalman.statePost.at(0) = rect0.x; + m_linearKalman.statePost.at(1) = rect0.y; + m_linearKalman.statePost.at(2) = rect0.width; + m_linearKalman.statePost.at(3) = rect0.height; + m_linearKalman.statePost.at(4) = rectv0.x; + m_linearKalman.statePost.at(5) = rectv0.y; + m_linearKalman.statePost.at(6) = 0; + m_linearKalman.statePost.at(7) = 0; + m_linearKalman.statePost.at(8) = 0; + m_linearKalman.statePost.at(9) = 0; + m_linearKalman.statePost.at(10) = 0; + m_linearKalman.statePost.at(11) = 0; - track_t n1 = pow(m_deltaTime, 4.f) / 4.f; - track_t n2 = pow(m_deltaTime, 3.f) / 2.f; - track_t n3 = pow(m_deltaTime, 2.f); - m_linearKalman.processNoiseCov = (cv::Mat_(12, 12) << - n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2, 0, - 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2, - 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, - 0, 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, - n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, 0, - 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, - 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, - 0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, - n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, 0, - 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, - 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, - 0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3); + cv::setIdentity(m_linearKalman.measurementMatrix); - m_linearKalman.processNoiseCov *= m_accelNoiseMag; + track_t n1 = pow(m_deltaTime, 4.f) / 4.f; + track_t n2 = pow(m_deltaTime, 3.f) / 2.f; + track_t n3 = pow(m_deltaTime, 2.f); + m_linearKalman.processNoiseCov = (cv::Mat_(12, 12) << + n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2, 0, + 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, n2, + 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, 0, + 0, 0, 0, n1, 0, 0, 0, n2, 0, 0, 0, n2, + n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, 0, + 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, n3, + 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, + 0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, + n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, 0, + 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, 0, + 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3, 0, + 0, 0, 0, n2, 0, 0, 0, n3, 0, 0, 0, n3); - cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1)); + m_linearKalman.processNoiseCov *= m_accelNoiseMag; - cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); + cv::setIdentity(m_linearKalman.measurementNoiseCov, cv::Scalar::all(0.1)); - m_initialRects.reserve(MIN_INIT_VALS); + cv::setIdentity(m_linearKalman.errorCovPost, cv::Scalar::all(.1)); - m_initialized = true; + m_initialRects.reserve(MIN_INIT_VALS); + + m_initialized = true; +} + +/// +/// \brief TKalmanFilter::CreateLinearAcceleration +/// \param rect0 +/// \param rectv0 +/// +void TKalmanFilter::CreateLinearAcceleration(cv::RotatedRect /*rrect0*/, Point_t /*rrectv0*/) +{ + // TODO + assert(0); } #ifdef USE_OCV_UKF //--------------------------------------------------------------------------- -class AcceleratedModel: public kalman::UkfSystemModel +class AcceleratedModel final : public kalman::UkfSystemModel { public: AcceleratedModel(track_t deltaTime, bool rectModel) @@ -540,7 +648,10 @@ void TKalmanFilter::CreateAugmentedUnscented(cv::Rect_ rect0, Point_t r } #endif -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::GetPointPrediction +/// \return +/// Point_t TKalmanFilter::GetPointPrediction() { if (m_initialized) @@ -569,7 +680,12 @@ Point_t TKalmanFilter::GetPointPrediction() return m_lastPointResult; } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::Update +/// \param pt +/// \param dataCorrect +/// \return +/// Point_t TKalmanFilter::Update(Point_t pt, bool dataCorrect) { if (!m_initialized) @@ -582,7 +698,7 @@ Point_t TKalmanFilter::Update(Point_t pt, bool dataCorrect) m_lastPointResult = pt; } } - if (m_initialPoints.size() == MIN_INIT_VALS) + if (m_initialPoints.size() >= MIN_INIT_VALS) { track_t kx = 0; track_t bx = 0; @@ -689,7 +805,10 @@ Point_t TKalmanFilter::Update(Point_t pt, bool dataCorrect) return m_lastPointResult; } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::GetRectPrediction +/// \return +/// cv::Rect TKalmanFilter::GetRectPrediction() { if (m_initialized) @@ -718,7 +837,12 @@ cv::Rect TKalmanFilter::GetRectPrediction() return cv::Rect(static_cast(m_lastRectResult.x), static_cast(m_lastRectResult.y), static_cast(m_lastRectResult.width), static_cast(m_lastRectResult.height)); } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::Update +/// \param rect +/// \param dataCorrect +/// \return +/// cv::Rect TKalmanFilter::Update(cv::Rect rect, bool dataCorrect) { if (!m_initialized) @@ -874,7 +998,211 @@ cv::Rect TKalmanFilter::Update(cv::Rect rect, bool dataCorrect) return cv::Rect(static_cast(m_lastRectResult.x), static_cast(m_lastRectResult.y), static_cast(m_lastRectResult.width), static_cast(m_lastRectResult.height)); } -//--------------------------------------------------------------------------- +/// +/// \brief TKalmanFilter::GetRRectPrediction +/// \return +/// +cv::RotatedRect TKalmanFilter::GetRRectPrediction() +{ + if (m_initialized) + { + cv::Mat prediction; + + switch (m_type) + { + case tracking::KalmanLinear: + prediction = m_linearKalman.predict(); + break; + + case tracking::KalmanUnscented: + case tracking::KalmanAugmentedUnscented: +#ifdef USE_OCV_UKF + prediction = m_uncsentedKalman->predict(); +#else + prediction = m_linearKalman.predict(); + std::cerr << "UnscentedKalmanFilter was disabled in CMAKE! Set KalmanLinear in constructor." << std::endl; +#endif + break; + } + + m_lastRRectResult.center.x = prediction.at(0); //update using measurements + m_lastRRectResult.center.y = prediction.at(1); + m_lastRRectResult.size.width = prediction.at(2); + m_lastRRectResult.size.height = prediction.at(3); + m_lastRRectResult.angle = prediction.at(4); + } + return m_lastRRectResult; +} + +/// +/// \brief TKalmanFilter::Update +/// \param rrect +/// \param dataCorrect +/// \return +/// +cv::RotatedRect TKalmanFilter::Update(cv::RotatedRect rrect, bool dataCorrect) +{ + if (!m_initialized) + { + if (m_initialRRects.size() < MIN_INIT_VALS) + { + if (dataCorrect) + { + m_initialRRects.push_back(rrect); + m_lastRRectResult = rrect; + } + } + if (m_initialRRects.size() == MIN_INIT_VALS) + { + std::vector initialPoints; + Point_t averageSize(0, 0); + track_t averageAngle = 0; + for (const auto& rr : m_initialRRects) + { + initialPoints.emplace_back(static_cast(rr.center.x), static_cast(rr.center.y)); + averageSize.x += rr.size.width; + averageSize.y += rr.size.height; + averageAngle += rr.angle; + } + averageSize.x /= MIN_INIT_VALS; + averageSize.y /= MIN_INIT_VALS; + averageAngle /= MIN_INIT_VALS; + + track_t kx = 0; + track_t bx = 0; + track_t ky = 0; + track_t by = 0; + get_lin_regress_params(initialPoints, 0, MIN_INIT_VALS, kx, bx, ky, by); + cv::RotatedRect rrect0(cv::Point2f(kx * (MIN_INIT_VALS - 1) + bx, ky * (MIN_INIT_VALS - 1) + by), averageSize, averageAngle); + Point_t rrectv0(kx, ky); + + switch (m_type) + { + case tracking::KalmanLinear: + if (m_useAcceleration) + CreateLinearAcceleration(rrect0, rrectv0); + else + CreateLinear(rrect0, rrectv0); + break; + + case tracking::KalmanUnscented: +#ifdef USE_OCV_UKF + assert(0); + //TODO: CreateUnscented(rrect0, rrectv0); +#else + if (m_useAcceleration) + CreateLinearAcceleration(rrect0, rrectv0); + else + CreateLinear(rrect0, rrectv0); + std::cerr << "UnscentedKalmanFilter was disabled in CMAKE! Set KalmanLinear in constructor." << std::endl; +#endif + break; + + case tracking::KalmanAugmentedUnscented: +#ifdef USE_OCV_UKF + assert(0); + // TODO: CreateAugmentedUnscented(rrect0, rrectv0); +#else + if (m_useAcceleration) + CreateLinearAcceleration(rrect0, rrectv0); + else + CreateLinear(rrect0, rrectv0); + std::cerr << "AugmentedUnscentedKalmanFilter was disabled in CMAKE! Set KalmanLinear in constructor." << std::endl; +#endif + break; + } + } + } + + if (m_initialized) + { + cv::Mat measurement(5, 1, Mat_t(1)); + if (!dataCorrect) + { + measurement.at(0) = m_lastRRectResult.center.x; // update using prediction + measurement.at(1) = m_lastRRectResult.center.y; + measurement.at(2) = m_lastRRectResult.size.width; + measurement.at(3) = m_lastRRectResult.size.height; + measurement.at(4) = m_lastRRectResult.angle; + } + else + { + measurement.at(0) = static_cast(rrect.center.x); // update using measurements + measurement.at(1) = static_cast(rrect.center.y); + measurement.at(2) = static_cast(rrect.size.width); + measurement.at(3) = static_cast(rrect.size.height); + measurement.at(4) = static_cast(rrect.angle); + } + // Correction + cv::Mat estimated; + switch (m_type) + { + case tracking::KalmanLinear: + { + estimated = m_linearKalman.correct(measurement); + + m_lastRRectResult.center.x = estimated.at(0); //update using measurements + m_lastRRectResult.center.y = estimated.at(1); + m_lastRRectResult.size.width = estimated.at(2); + m_lastRRectResult.size.height = estimated.at(3); + m_lastRRectResult.angle = estimated.at(4); + + // Inertia correction + if (!m_useAcceleration) + { + track_t currDist = sqrtf(sqr(estimated.at(0) - rrect.center.x) + sqr(estimated.at(1) - rrect.center.y) + + sqr(estimated.at(2) - rrect.size.width) + sqr(estimated.at(3) - rrect.size.height)); + if (currDist > m_lastDist) + m_deltaTime = std::min(m_deltaTime + m_deltaStep, m_deltaTimeMax); + else + m_deltaTime = std::max(m_deltaTime - m_deltaStep, m_deltaTimeMin); + + m_lastDist = currDist; + + m_linearKalman.transitionMatrix.at(0, 5) = m_deltaTime; + m_linearKalman.transitionMatrix.at(1, 6) = m_deltaTime; + m_linearKalman.transitionMatrix.at(2, 7) = m_deltaTime; + m_linearKalman.transitionMatrix.at(3, 8) = m_deltaTime; + m_linearKalman.transitionMatrix.at(4, 9) = m_deltaTime; + } + break; + } + + case tracking::KalmanUnscented: + case tracking::KalmanAugmentedUnscented: +#ifdef USE_OCV_UKF + estimated = m_uncsentedKalman->correct(measurement); + + m_lastRRectResult.center.x = estimated.at(0); //update using measurements + m_lastRRectResult.center.y = estimated.at(1); + m_lastRRectResult.size.width = estimated.at(6); + m_lastRRectResult.size.height = estimated.at(7); + m_lastRRectResult.angle = estimated.at(9); +#else + estimated = m_linearKalman.correct(measurement); + + m_lastRRectResult.center.x = estimated.at(0); //update using measurements + m_lastRRectResult.center.y = estimated.at(1); + m_lastRRectResult.size.width = estimated.at(2); + m_lastRRectResult.size.height = estimated.at(3); + m_lastRRectResult.angle = estimated.at(4); + std::cerr << "UnscentedKalmanFilter was disabled in CMAKE! Set KalmanLinear in constructor." << std::endl; +#endif + break; + } + } + else + { + if (dataCorrect) + m_lastRRectResult = rrect; + } + return m_lastRRectResult; +} + +/// +/// \brief TKalmanFilter::GetVelocity +/// \return +/// cv::Vec TKalmanFilter::GetVelocity() const { cv::Vec res(0, 0); @@ -893,6 +1221,7 @@ cv::Vec TKalmanFilter::GetVelocity() const indX = 4; indY = 5; } + //std::cout << "indX = " << indX << ", indY = " << indY << std::endl; res[0] = m_linearKalman.statePre.at(indX); res[1] = m_linearKalman.statePre.at(indY); } @@ -913,3 +1242,30 @@ cv::Vec TKalmanFilter::GetVelocity() const } return res; } + +//--------------------------------------------------------------------------- +void TKalmanFilter::GetPtStateAndResCov(cv::Mat& covar, cv::Mat& state) const +{ + if (m_initialized) + { + switch (m_type) + { + case tracking::KalmanLinear: + { + state = m_linearKalman.statePost.clone(); + covar = m_linearKalman.processNoiseCov.clone(); + break; + } + + case tracking::KalmanUnscented: + case tracking::KalmanAugmentedUnscented: +#ifdef USE_OCV_UKF + state = m_uncsentedKalman->getState(); +#else + std::cerr << "UnscentedKalmanFilter was disabled in CMAKE! Set KalmanLinear in constructor." << std::endl; +#endif + break; + } + } +} + diff --git a/src/Tracker/Kalman.h b/src/Tracker/Kalman.h index e9e95f8c1..6864f783d 100644 --- a/src/Tracker/Kalman.h +++ b/src/Tracker/Kalman.h @@ -26,7 +26,12 @@ class TKalmanFilter cv::Rect GetRectPrediction(); cv::Rect Update(cv::Rect rect, bool dataCorrect); - cv::Vec GetVelocity() const; + cv::RotatedRect GetRRectPrediction(); + cv::RotatedRect Update(cv::RotatedRect rrect, bool dataCorrect); + + cv::Vec GetVelocity() const; + + void GetPtStateAndResCov(cv::Mat& covar, cv::Mat& state) const; private: cv::KalmanFilter m_linearKalman; @@ -38,10 +43,12 @@ class TKalmanFilter #endif #endif - static constexpr size_t MIN_INIT_VALS = 4; + static constexpr size_t MIN_INIT_VALS = 2; std::vector m_initialPoints; std::vector m_initialRects; + std::vector m_initialRRects; + cv::RotatedRect m_lastRRectResult; cv::Rect_ m_lastRectResult; cv::Rect_ m_lastRect; Point_t m_lastPointResult; @@ -59,11 +66,13 @@ class TKalmanFilter // Constant velocity model void CreateLinear(Point_t xy0, Point_t xyv0); void CreateLinear(cv::Rect_ rect0, Point_t rectv0); + void CreateLinear(cv::RotatedRect rrect0, Point_t rrectv0); // Constant acceleration model // https://www.mathworks.com/help/driving/ug/linear-kalman-filters.html void CreateLinearAcceleration(Point_t xy0, Point_t xyv0); void CreateLinearAcceleration(cv::Rect_ rect0, Point_t rectv0); + void CreateLinearAcceleration(cv::RotatedRect rrect0, Point_t rrectv0); #ifdef USE_OCV_UKF void CreateUnscented(Point_t xy0, Point_t xyv0); @@ -73,58 +82,3 @@ class TKalmanFilter #endif }; -//--------------------------------------------------------------------------- -/// -/// \brief sqr -/// \param val -/// \return -/// -template inline -T sqr(T val) -{ - return val * val; -} - -/// -/// \brief get_lin_regress_params -/// \param in_data -/// \param start_pos -/// \param in_data_size -/// \param kx -/// \param bx -/// \param ky -/// \param by -/// -template -void get_lin_regress_params( - const CONT& in_data, - size_t start_pos, - size_t in_data_size, - T& kx, T& bx, T& ky, T& by) -{ - T m1(0.), m2(0.); - T m3_x(0.), m4_x(0.); - T m3_y(0.), m4_y(0.); - - const T el_count = static_cast(in_data_size - start_pos); - for (size_t i = start_pos; i < in_data_size; ++i) - { - m1 += i; - m2 += sqr(i); - - m3_x += in_data[i].x; - m4_x += i * in_data[i].x; - - m3_y += in_data[i].y; - m4_y += i * in_data[i].y; - } - T det_1 = 1 / (el_count * m2 - sqr(m1)); - - m1 *= -1; - - kx = det_1 * (m1 * m3_x + el_count * m4_x); - bx = det_1 * (m2 * m3_x + m1 * m4_x); - - ky = det_1 * (m1 * m3_y + el_count * m4_y); - by = det_1 * (m2 * m3_y + m1 * m4_y); -} diff --git a/src/Tracker/LAPJV_algorithm/lap.cpp b/src/Tracker/LAPJV_algorithm/lap.cpp new file mode 100644 index 000000000..594ab0391 --- /dev/null +++ b/src/Tracker/LAPJV_algorithm/lap.cpp @@ -0,0 +1,265 @@ +/************************************************************************ +* +* lap.cpp + version 1.0 - 4 September 1996 + author: Roy Jonker @ MagicLogic Optimization Inc. + e-mail: roy_jonker@magiclogic.com + + Code for Linear Assignment Problem, according to + + "A Shortest Augmenting Path Algorithm for Dense and Sparse Linear + Assignment Problems," Computing 38, 325-340, 1987 + + by + + R. Jonker and A. Volgenant, University of Amsterdam. + +* + CHANGED 2016-05-13 by Yong Yang(yongyanglink@gmail.com) in column reduction part according to + matlab version of LAPJV algorithm(Copyright (c) 2010, Yi Cao All rights reserved)-- + https://www.mathworks.com/matlabcentral/fileexchange/26836-lapjv-jonker-volgenant-algorithm-for-linear-assignment-problem-v3-0: +* +*************************************************************************/ + +#include +#include "lap.h" + + +/*This function is the jv shortest augmenting path algorithm to solve the assignment problem*/ +cost lap(const std::vector>& assigncost, + std::vector& rowsol, + std::vector& colsol, + std::vector& u, + std::vector& v) + +// input: +// assigncost - cost matrix + +// output: +// rowsol - column assigned to row in solution +// colsol - row assigned to column in solution +// u - dual variables, row reduction numbers +// v - dual variables, column reduction numbers + +{ + int dimRows = assigncost.size(); + int dimCols = assigncost[0].size(); + bool unassignedfound = false; + row numfree = 0; + col j2 = 0, endofpath = 0, last = 0; + cost min = std::numeric_limits::max(); + + std::vector freeunassigned(dimRows); // list of unassigned rows. + std::vector collist(dimCols); // list of columns to be scanned in various ways. + std::vector matches(dimRows, 0); // counts how many times a row could be assigned. + std::vector d(dimCols); // 'cost-distance' in augmenting path calculation. + std::vector pred(dimCols); // row-predecessor of column in augmenting/alternating path. + + // COLUMN REDUCTION + for (col j = dimCols; j--;) // reverse order gives better results. + { + // find minimum cost over rows. + min = assigncost[0][j]; + row imin = 0; + for (row i = 1; i < dimRows; i++) + if (assigncost[i][j] < min) { + min = assigncost[i][j]; + imin = i; + } + v[j] = min; + if (++matches[imin] == 1) { + // init assignment if minimum row assigned for first time. + rowsol[imin] = j; + colsol[j] = imin; + } else if (v[j] < v[rowsol[imin]]) { + int j1 = rowsol[imin]; + rowsol[imin] = j; + colsol[j] = imin; + colsol[j1] = -1; + } else + colsol[j] = -1; // row already assigned, column not assigned. + } + + // REDUCTION TRANSFER + for (row i = 0; i < dimRows; i++) + if (matches[i] == 0) // fill list of unassigned 'free' rows. + freeunassigned[numfree++] = i; + else if (matches[i] == 1) // transfer reduction from rows that are assigned once. + { + col j1 = rowsol[i]; + min = std::numeric_limits::max(); + for (col j = 0; j < dimCols; j++) + if (j != j1) + if (assigncost[i][j] - v[j] < min) min = assigncost[i][j] - v[j]; + v[j1] = v[j1] - min; + } + + // AUGMENTING ROW REDUCTION + int loopcnt = 0; // do-loop to be done twice. + do { + loopcnt++; + + // scan all free rows. + // in some cases, a free row may be replaced with another one to be scanned next. + row k = 0; + row prvnumfree = numfree; + numfree = 0; // start list of rows still free after augmenting row reduction. + while (k < prvnumfree) { + row i = freeunassigned[k]; + k++; + + // find minimum and second minimum reduced cost over columns. + cost umin = assigncost[i][0] - v[0]; + col j1 = 0; + cost usubmin = std::numeric_limits::max(); + for (col j = 1; j < dimCols; j++) { + cost h = assigncost[i][j] - v[j]; + if (h < usubmin) + if (h >= umin) { + usubmin = h; + j2 = j; + } else { + usubmin = umin; + umin = h; + j2 = j1; + j1 = j; + } + } + + row i0 = colsol[j1]; + if (umin < usubmin) + // change the reduction of the minimum column to increase the minimum + // reduced cost in the row to the subminimum. + v[j1] = v[j1] - (usubmin - umin); + else // minimum and subminimum equal. + if (i0 > -1) // minimum column j1 is assigned. + { + // swap columns j1 and j2, as j2 may be unassigned. + j1 = j2; + i0 = colsol[j2]; + } + + // (re-)assign i to j1, possibly de-assigning an i0. + rowsol[i] = j1; + colsol[j1] = i; + + if (i0 > -1) // minimum column j1 assigned earlier. + if (umin < usubmin) + // put in current k, and go back to that k. + // continue augmenting path i - j1 with i0. + freeunassigned[--k] = i0; + else + // no further augmenting reduction possible. + // store i0 in list of free rows for next phase. + freeunassigned[numfree++] = i0; + } + } while (loopcnt < 2); // repeat once. + + // AUGMENT SOLUTION for each free row. + for (row f = 0; f < numfree; f++) { + row freerow = freeunassigned[f]; // start row of augmenting path. + + // Dijkstra shortest path algorithm. + // runs until unassigned column added to shortest path tree. + for (col j = dimCols; j--;) { + d[j] = assigncost[freerow][j] - v[j]; + pred[j] = freerow; + collist[j] = j; // init column list. + } + + col low = 0; // columns in 0..low-1 are ready, now none. + col up = 0; // columns in low..up-1 are to be scanned for current minimum, now none. + // columns in up..dim-1 are to be considered later to find new minimum, + // at this stage the list simply contains all columns + unassignedfound = false; + do { + if (up == low) // no more columns to be scanned for current minimum. + { + last = low - 1; + + // scan columns for up..dim-1 to find all indices for which new minimum occurs. + // store these indices between low..up-1 (increasing up). + min = d[collist[up++]]; + for (row k = up; k < dimRows; k++) { + col j = collist[k]; + cost h = d[j]; + if (h <= min) { + if (h < min) // new minimum. + { + up = low; // restart list at index low. + min = h; + } + // new index with same minimum, put on undex up, and extend list. + collist[k] = collist[up]; + collist[up++] = j; + } + } + // check if any of the minimum columns happens to be unassigned. + // if so, we have an augmenting path right away. + for (row k = low; k < up; k++) + if (colsol[collist[k]] < 0) { + endofpath = collist[k]; + unassignedfound = true; + break; + } + } + + if (!unassignedfound) { + // update 'distances' between freerow and all unscanned columns, via next scanned + // column. + col j1 = collist[low]; + low++; + row i = colsol[j1]; + cost h = assigncost[i][j1] - v[j1] - min; + + for (row k = up; k < dimRows; k++) { + col j = collist[k]; + cost v2 = assigncost[i][j] - v[j] - h; + if (v2 < d[j]) { + pred[j] = i; + if (v2 == min) // new column found at same minimum value + if (colsol[j] < 0) { + // if unassigned, shortest augmenting path is complete. + endofpath = j; + unassignedfound = true; + break; + } + // else add to list to be scanned right away. + else { + collist[k] = collist[up]; + collist[up++] = j; + } + d[j] = v2; + } + } + } + } while (!unassignedfound); + + // update column prices. + for (row k = last + 1; k--;) { + col j1 = collist[k]; + v[j1] = v[j1] + d[j1] - min; + } + + // reset row and column assignments along the alternating path. + row i = 0; + do { + i = pred[endofpath]; + colsol[endofpath] = i; + col j1 = endofpath; + endofpath = rowsol[i]; + rowsol[i] = j1; + } while (i != freerow); + } + + // calculate optimal cost. + cost lapcost = 0; + // for (i = 0; i < dim; i++) + for (row i = dimRows; i--;) { + col j = rowsol[i]; + u[i] = assigncost[i][j] - v[j]; + lapcost = lapcost + assigncost[i][j]; + } + + return lapcost; +} diff --git a/src/Tracker/LAPJV_algorithm/lap.h b/src/Tracker/LAPJV_algorithm/lap.h new file mode 100644 index 000000000..3a11410ad --- /dev/null +++ b/src/Tracker/LAPJV_algorithm/lap.h @@ -0,0 +1,31 @@ +/************************************************************************ +* +* lap.h + version 1.0 - 21 june 1996 + author Roy Jonker, MagicLogic Optimization Inc. + + header file for LAP +* + pyLAPJV by Harold Cooper (hbc@mit.edu) + 2004-08-13: + -- fixed Jonker's function declarations to actually use row, col, + and cost types + -- row, col, and cost now based on basic types +* +**************************************************************************/ + +#include + +/*************** TYPES *******************/ + +typedef int row; +typedef int col; +typedef double cost; + +/*************** FUNCTIONS *******************/ + +extern cost lap(const std::vector>& assigncost, + std::vector& rowsol, + std::vector& colsol, + std::vector& u, + std::vector& v); diff --git a/src/Tracker/ShortPathCalculator.cpp b/src/Tracker/ShortPathCalculator.cpp index 39d09de82..0d2a9a4c8 100644 --- a/src/Tracker/ShortPathCalculator.cpp +++ b/src/Tracker/ShortPathCalculator.cpp @@ -1,64 +1,65 @@ #include "ShortPathCalculator.h" -#include -#include "mygraph.h" -#include "mwbmatching.h" -#include "tokenise.h" +#include "LAPJV_algorithm/lap.h" /// -/// \brief SPBipart::Solve -/// \param costMatrix -/// \param N -/// \param M -/// \param assignment -/// \param maxCost -/// -void SPBipart::Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t maxCost) +void SPLAPJV::Solve(const distMatrix_t& costMatrix, size_t colsTracks, size_t rowsRegions, assignments_t& assignmentT2R, track_t /*maxCost*/) { - MyGraph G; - G.make_directed(); + //std::cout << "SPLAPJV::Solve: colsTracks = " << colsTracks << ", rowsRegions = " << rowsRegions << std::endl; - std::vector nodes(N + M); + if (!colsTracks || !rowsRegions) + return; - for (size_t i = 0; i < nodes.size(); ++i) - { - nodes[i] = G.new_node(); - } + bool swithReg2Track = (rowsRegions > colsTracks); // For this algorithm rows <= cols - GTL::edge_map weights(G, 100); - for (size_t i = 0; i < N; i++) - { - bool hasZeroEdge = false; + size_t dimRows = swithReg2Track ? colsTracks : rowsRegions; // Set the dimension of matrix to 10, dim is the problem size + size_t dimCols = swithReg2Track ? rowsRegions : colsTracks; + std::vector> costMat; // A matrix to store all the costs from vertex i to vertex j + std::vector rowsol(dimRows, -1); // An array to store column indexes assigned to row in solution + std::vector colsol(dimCols, -1); // An array to store row indexes assigned to column in solution + std::vector u(dimRows); // u - dual variables, row reduction numbers + std::vector v(dimCols); // v - dual variables, column reduction numbers - for (size_t j = 0; j < M; j++) + costMat.resize(dimRows); + for (size_t i = 0; i < dimRows; i++) + { + costMat[i].resize(dimCols); + for (size_t j = 0; j < dimCols; ++j) { - track_t currCost = costMatrix[i + j * N]; - - GTL::edge e = G.new_edge(nodes[i], nodes[N + j]); + costMat[i][j] = swithReg2Track ? costMatrix[j * colsTracks + i] : costMatrix[i * colsTracks + j]; - if (currCost < m_settings.m_distThres) - { - int weight = static_cast(maxCost - currCost + 1); - G.set_edge_weight(e, weight); - weights[e] = weight; - } - else - { - if (!hasZeroEdge) - { - G.set_edge_weight(e, 0); - weights[e] = 0; - } - hasZeroEdge = true; - } + //std::cout << std::fixed << std::setw(2) << std::setprecision(2) << costMat[i][j] << " "; } + //std::cout << std::endl; } + //std::cout << "Cost matrix created" << std::endl; + cost totalCost = lap(costMat, rowsol, colsol, u, v); // Use lap algorithm to calculate the minimum total cost + //std::cout << "totalCost = " << totalCost << std::endl; + + //for (size_t i = 0; i < rowsol.size(); ++i) + //{ + // std::cout << "row[" << i << "]: " << rowsol[i] << ", u = " << u[i] << std::endl; + //} + //for (size_t i = 0; i < colsol.size(); ++i) + //{ + // std::cout << "col[" << i << "]: " << colsol[i] << ", u = " << v[i] << std::endl; + //} - GTL::edges_t L = MAX_WEIGHT_BIPARTITE_MATCHING(G, weights); - for (GTL::edges_t::iterator it = L.begin(); it != L.end(); ++it) + + if (swithReg2Track) { - GTL::node a = it->source(); - GTL::node b = it->target(); - assignment[b.id()] = static_cast(a.id() - N); + for (size_t i = 0; i < colsol.size(); ++i) + { + if (colsol[i] >= 0) + assignmentT2R[colsol[i]] = static_cast(i); + } + } + else + { + for (size_t i = 0; i < colsol.size(); ++i) + { + if (colsol[i] >= 0) + assignmentT2R[i] = colsol[i]; + } } } diff --git a/src/Tracker/ShortPathCalculator.h b/src/Tracker/ShortPathCalculator.h index 22094c851..6ae90b06f 100644 --- a/src/Tracker/ShortPathCalculator.h +++ b/src/Tracker/ShortPathCalculator.h @@ -32,17 +32,19 @@ class ShortPathCalculator /// /// \brief The SPHungrian class /// -class SPHungrian : public ShortPathCalculator +class SPHungrian final : public ShortPathCalculator { public: SPHungrian(const SPSettings& settings) : ShortPathCalculator(settings) { + //std::cout << "SPHungrian" << std::endl; } - void Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t /*maxCost*/) + void Solve(const distMatrix_t& costMatrix, size_t colsTracks, size_t rowsRegions, assignments_t& assignmentT2R, track_t /*maxCost*/) override { - m_solver.Solve(costMatrix, N, M, assignment, AssignmentProblemSolver::optimal); + //std::cout << "SPHungrian::Solve" << std::endl; + m_solver.Solve(costMatrix, colsTracks, rowsRegions, assignmentT2R, AssignmentProblemSolver::optimal); } private: @@ -52,13 +54,29 @@ class SPHungrian : public ShortPathCalculator /// /// \brief The SPBipart class /// -class SPBipart : public ShortPathCalculator +class SPBipart final : public ShortPathCalculator { public: SPBipart(const SPSettings& settings) : ShortPathCalculator(settings) { + //std::cout << "SPBipart" << std::endl; } - void Solve(const distMatrix_t& costMatrix, size_t N, size_t M, assignments_t& assignment, track_t maxCost); + void Solve(const distMatrix_t& costMatrix, size_t colsTracks, size_t rowsRegions, assignments_t& assignmentT2R, track_t maxCost) override; +}; + +/// +/// \brief The SPLAPJV class +/// +class SPLAPJV final : public ShortPathCalculator +{ +public: + SPLAPJV(const SPSettings& settings) + : ShortPathCalculator(settings) + { + //std::cout << "SPLAPJV" << std::endl; + } + + void Solve(const distMatrix_t& costMatrix, size_t colsTracks, size_t rowsRegions, assignments_t& assignmentT2R, track_t /*maxCost*/) override; }; diff --git a/src/Tracker/TrackerSettings.cpp b/src/Tracker/TrackerSettings.cpp new file mode 100644 index 000000000..e06f2311d --- /dev/null +++ b/src/Tracker/TrackerSettings.cpp @@ -0,0 +1,96 @@ +#include +#include "TrackerSettings.h" +#include + +/// +/// \brief CarsCounting::ParseTrackerSettings +/// +bool ParseTrackerSettings(const std::string& settingsFile, TrackerSettings& trackerSettings) +{ + bool res = false; + + std::cout << "ParseTrackerSettings: " << settingsFile << " ..." << std::endl; + + INIReader reader(settingsFile); + + if (reader.ParseError() >= 0) + { + std::cout << "ParseTrackerSettings - readed" << std::endl; + + trackerSettings = TrackerSettings(); + + // Read tracking settings + auto trackerType = reader.GetInteger("tracking", "tracker_type", -1); + if (trackerType == (int)tracking::ByteTrack) + trackerSettings.m_tracker = tracking::ByteTrack; + else + trackerSettings.m_tracker = tracking::UniversalTracker; + + auto distType = reader.GetInteger("tracking", "distance_type", -1); + if (distType >= 0 && distType < (int)tracking::DistsCount) + trackerSettings.SetDistance((tracking::DistType)distType); + + auto kalmanType = reader.GetInteger("tracking", "kalman_type", -1); + if (kalmanType >= 0 && kalmanType < (int)tracking::KalmanCount) + trackerSettings.m_kalmanType = (tracking::KalmanType)kalmanType; + + auto filterGoal = reader.GetInteger("tracking", "filter_goal", -1); + if (filterGoal >= 0 && filterGoal < (int)tracking::FiltersCount) + trackerSettings.m_filterGoal = (tracking::FilterGoal)filterGoal; + + auto lostTrackType = reader.GetInteger("tracking", "lost_track_type", -1); + if (lostTrackType >= 0 && lostTrackType < (int)tracking::SingleTracksCount) + trackerSettings.m_lostTrackType = (tracking::LostTrackType)lostTrackType; + + auto matchType = reader.GetInteger("tracking", "match_type", -1); + if (matchType >= 0 && matchType < (int)tracking::MatchCount) + trackerSettings.m_matchType = (tracking::MatchType)matchType; + + trackerSettings.m_useAcceleration = reader.GetInteger("tracking", "use_aceleration", 0) != 0; // Use constant acceleration motion model + trackerSettings.m_dt = static_cast(reader.GetReal("tracking", "delta_time", 0.4)); // Delta time for Kalman filter + trackerSettings.m_accelNoiseMag = static_cast(reader.GetReal("tracking", "accel_noise", 0.2)); // Accel noise magnitude for Kalman filter + trackerSettings.m_distThres = static_cast(reader.GetReal("tracking", "dist_thresh", 0.8)); // Distance threshold between region and object on two frames + trackerSettings.m_minAreaRadiusPix = static_cast(reader.GetReal("tracking", "min_area_radius_pix", -1.)); + trackerSettings.m_minAreaRadiusK = static_cast(reader.GetReal("tracking", "min_area_radius_k", 0.8)); + trackerSettings.m_maximumAllowedLostTime = reader.GetReal("tracking", "max_lost_time", 1.); // Maximum lost time in seconds + trackerSettings.m_maxTraceLength = reader.GetReal("tracking", "max_trace_len", 2.); // Maximum trace length in seconds + trackerSettings.m_useAbandonedDetection = reader.GetInteger("tracking", "detect_abandoned", 0) != 0; + trackerSettings.m_minStaticTime = reader.GetInteger("tracking", "min_static_time", 5); + trackerSettings.m_maxStaticTime = reader.GetInteger("tracking", "max_static_time", 25); + trackerSettings.m_maxSpeedForStatic = static_cast(reader.GetReal("tracking", "max_speed_for_static", 0.5)); + + trackerSettings.m_byteTrackSettings.m_trackBuffer = reader.GetInteger("tracking", "bytetrack_track_buffer", 30); + trackerSettings.m_byteTrackSettings.m_trackThresh = static_cast(reader.GetReal("tracking", "bytetrack_track_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_highThresh = static_cast(reader.GetReal("tracking", "bytetrack_high_thresh", 0.5)); + trackerSettings.m_byteTrackSettings.m_matchThresh = static_cast(reader.GetReal("tracking", "bytetrack_match_thresh", 0.8)); + + // Read detection settings + trackerSettings.m_nnWeights = reader.GetString("detection", "nn_weights", "data/yolov4-tiny_best.weights"); + trackerSettings.m_nnConfig = reader.GetString("detection", "nn_config", "data/yolov4-tiny.cfg"); + trackerSettings.m_classNames = reader.GetString("detection", "class_names", "data/traffic.names"); + trackerSettings.m_confidenceThreshold = static_cast(reader.GetReal("detection", "confidence_threshold", 0.5)); + trackerSettings.m_maxCropRatio = static_cast(reader.GetReal("detection", "max_crop_ratio", -1)); + trackerSettings.m_maxBatch = reader.GetInteger("detection", "max_batch", 1); + trackerSettings.m_gpuId = reader.GetInteger("detection", "gpu_id", 0); + trackerSettings.m_netType = reader.GetString("detection", "net_type", "YOLOV4"); + trackerSettings.m_inferencePrecision = reader.GetString("detection", "inference_precision", "FP16"); + trackerSettings.m_detectorBackend = reader.GetInteger("detection", "detector_backend", (int)tracking::Detectors::DNN_OCV); + trackerSettings.m_dnnTarget = reader.GetString("detection", "ocv_dnn_target", "DNN_TARGET_CPU"); + trackerSettings.m_dnnBackend = reader.GetString("detection", "ocv_dnn_backend", "DNN_BACKEND_OPENCV"); + trackerSettings.m_maxVideoMemory = reader.GetInteger("detection", "video_memory", 0); + trackerSettings.m_inputSize.width = reader.GetInteger("detection", "input_width", 0); + trackerSettings.m_inputSize.height = reader.GetInteger("detection", "input_height", 0); + + std::stringstream whiteList{ reader.GetString("detection", "white_list", "") }; + trackerSettings.m_whiteList.clear(); + std::string wname; + while (std::getline(whiteList, wname, ';')) + { + trackerSettings.m_whiteList.push_back(wname); + } + + res = true; + } + std::cout << "ParseTrackerSettings: " << res << std::endl; + return res; +} diff --git a/src/Tracker/Ctracker.h b/src/Tracker/TrackerSettings.h similarity index 50% rename from src/Tracker/Ctracker.h rename to src/Tracker/TrackerSettings.h index e06f7d18b..8e68b4bfe 100644 --- a/src/Tracker/Ctracker.h +++ b/src/Tracker/TrackerSettings.h @@ -1,30 +1,28 @@ #pragma once -#include #include -#include #include -#include #include -#include -#include #include "defines.h" -#include "track.h" -#include "ShortPathCalculator.h" -#include "EmbeddingsCalculator.hpp" -// ---------------------------------------------------------------------- /// /// \brief The TrackerSettings struct /// struct TrackerSettings { + /// + /// Tracker settings + /// + /// + + tracking::TrackerTemplate m_tracker = tracking::UniversalTracker; + tracking::KalmanType m_kalmanType = tracking::KalmanLinear; tracking::FilterGoal m_filterGoal = tracking::FilterCenter; tracking::LostTrackType m_lostTrackType = tracking::TrackKCF; // Used if m_filterGoal == tracking::FilterRect - tracking::MatchType m_matchType = tracking::MatchHungrian; + tracking::MatchType m_matchType = tracking::MatchLAPJV; - std::array m_distType; + std::array m_distType; /// /// \brief m_dt @@ -38,11 +36,11 @@ struct TrackerSettings /// track_t m_accelNoiseMag = 0.1f; - /// - /// \brief m_useAcceleration - /// Constant velocity or constant acceleration motion model - /// - bool m_useAcceleration = false; + /// + /// \brief m_useAcceleration + /// Constant velocity or constant acceleration motion model + /// + bool m_useAcceleration = false; /// /// \brief m_distThres @@ -56,23 +54,23 @@ struct TrackerSettings /// track_t m_minAreaRadiusPix = 20.f; - /// - /// \brief m_minAreaRadius - /// Minimal area radius in ration for object size. Used if m_minAreaRadiusPix < 0 - /// - track_t m_minAreaRadiusK = 0.5f; + /// + /// \brief m_minAreaRadius + /// Minimal area radius in ration for object size. Used if m_minAreaRadiusPix < 0 + /// + track_t m_minAreaRadiusK = 0.5f; /// - /// \brief m_maximumAllowedSkippedFrames - /// If the object don't assignment more than this frames then it will be removed + /// \brief m_maximumAllowedLostTime + /// If the object don't assignment more than this time in seconds then it will be removed /// - size_t m_maximumAllowedSkippedFrames = 25; + double m_maximumAllowedLostTime = 1.; /// /// \brief m_maxTraceLength - /// The maximum trajectory length + /// The maximum trajectory length in seconds /// - size_t m_maxTraceLength = 50; + double m_maxTraceLength = 2.f; /// /// \brief m_useAbandonedDetection @@ -92,16 +90,101 @@ struct TrackerSettings int m_maxStaticTime = 25; /// /// \brief m_maxSpeedForStatic - /// Speed in pixels + /// Speed in meters /// If speed of object is more that this value than object is non static /// - int m_maxSpeedForStatic = 10; + track_t m_maxSpeedForStatic = 0.5f; + + /// + /// \brief m_nearTypes + /// Object types that can be matched while tracking + /// + std::map> m_nearTypes; /// - /// \brief m_nearTypes - /// Object types that can be matched while tracking + /// \brief struct ByteTrackSettings + /// Settings only for m_tracker = tracking::ByteTrack + /// + struct ByteTrackSettings + { + int m_trackBuffer = 30; + float m_trackThresh = 0.5f; + float m_highThresh = 0.5f; + float m_matchThresh = 0.8f; + }; + ByteTrackSettings m_byteTrackSettings; + + + /// + /// Detector settings + /// + + /// + std::string m_nnWeights = "data/yolov4-tiny_best.weights"; + + /// + std::string m_nnConfig = "data/yolov4-tiny.cfg"; + + /// + std::string m_classNames = "data/traffic.names"; + /// - std::map> m_nearTypes; + std::deque m_whiteList; + + /// + float m_confidenceThreshold = 0.5f; + + /// + float m_maxCropRatio = -1.f; + + /// + int m_maxBatch = 1; + + /// + int m_gpuId = 0; + + /// + /// \brief Neural network input size + cv::Size m_inputSize{ 0, 0 }; + + /// + /// YOLOV2 + /// YOLOV3 + /// YOLOV4 + /// YOLOV4_TINY + /// YOLOV5 + std::string m_netType = "YOLOV4"; + + /// + /// INT8 + /// FP16 + /// FP32 + std::string m_inferencePrecision = "FP16"; + + // opencv_dnn = 6 + // tensorrt = 5 + int m_detectorBackend = 5; + + // DNN_TARGET_CPU + // DNN_TARGET_OPENCL + // DNN_TARGET_OPENCL_FP16 + // DNN_TARGET_MYRIAD + // DNN_TARGET_CUDA + // DNN_TARGET_CUDA_FP16 + std::string m_dnnTarget = "DNN_TARGET_CPU"; + + // DNN_BACKEND_DEFAULT + // DNN_BACKEND_HALIDE + // DNN_BACKEND_INFERENCE_ENGINE + // DNN_BACKEND_OPENCV + // DNN_BACKEND_VKCOM + // DNN_BACKEND_CUDA + // DNN_BACKEND_INFERENCE_ENGINE_NGRAPH + // DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019 + std::string m_dnnBackend = "DNN_BACKEND_OPENCV"; + + // For TensorRT optimization, bytes + size_t m_maxVideoMemory = 0; /// struct EmbeddingParams @@ -121,10 +204,10 @@ struct TrackerSettings cv::Size m_inputLayer{128, 256}; /// - std::vector m_objectTypes; + std::vector m_objectTypes; EmbeddingParams(const std::string& embeddingCfgName, const std::string& embeddingWeightsName, - const cv::Size& inputLayer, const std::vector& objectTypes) + const cv::Size& inputLayer, const std::vector& objectTypes) : m_embeddingCfgName(embeddingCfgName), m_embeddingWeightsName(embeddingWeightsName), m_inputLayer(inputLayer), @@ -139,11 +222,12 @@ struct TrackerSettings /// TrackerSettings() { - m_distType[tracking::DistCenters] = 0.0f; - m_distType[tracking::DistRects] = 0.0f; - m_distType[tracking::DistJaccard] = 0.5f; - m_distType[tracking::DistHist] = 0.5f; - m_distType[tracking::DistFeatureCos] = 0.0f; + m_distType[tracking::DistCenters] = static_cast(0.0); + m_distType[tracking::DistRects] = static_cast(0.0); + m_distType[tracking::DistJaccard] = static_cast(0.5); + m_distType[tracking::DistHist] = static_cast(0.5); + m_distType[tracking::DistFeatureCos] = static_cast(0.0); + m_distType[tracking::DistMahalanobis] = static_cast(0.0); assert(CheckDistance()); } @@ -151,10 +235,10 @@ struct TrackerSettings /// bool CheckDistance() const { - track_t sum = std::accumulate(m_distType.begin(), m_distType.end(), 0.0f); - track_t maxOne = std::max(1.0f, std::fabs(sum)); + track_t sum = std::accumulate(m_distType.begin(), m_distType.end(), static_cast(0.0)); + track_t maxOne = std::max(static_cast(1.0), std::fabs(sum)); //std::cout << "CheckDistance: " << sum << " - " << (std::numeric_limits::epsilon() * maxOne) << ", " << std::fabs(sum - 1.0f) << std::endl; - return std::fabs(sum - 1.0f) <= std::numeric_limits::epsilon() * maxOne; + return std::fabs(sum - static_cast(1.0)) <= std::numeric_limits::epsilon() * maxOne; } /// @@ -180,7 +264,7 @@ struct TrackerSettings } /// - void AddNearTypes(ObjectTypes type1, ObjectTypes type2, bool sym) + void AddNearTypes(objtype_t type1, objtype_t type2, bool sym) { auto AddOne = [&](objtype_t type1, objtype_t type2) { @@ -190,9 +274,9 @@ struct TrackerSettings else it->second.insert(type2); }; - AddOne((objtype_t)type1, (objtype_t)type2); + AddOne(type1, type2); if (sym) - AddOne((objtype_t)type2, (objtype_t)type1); + AddOne(type2, type1); } /// @@ -203,88 +287,11 @@ struct TrackerSettings { auto it = m_nearTypes.find(type1); if (it != std::end(m_nearTypes)) - { res = it->second.find(type2) != std::end(it->second); - } } return res; } }; /// -/// \brief The CTracker class -/// -class CTracker -{ -public: - CTracker(const TrackerSettings& settings); - CTracker(const CTracker&) = delete; - CTracker(CTracker&&) = delete; - CTracker& operator=(const CTracker&) = delete; - CTracker& operator=(CTracker&&) = delete; - - ~CTracker(void); - - void Update(const regions_t& regions, cv::UMat currFrame, float fps); - - /// - /// \brief CanGrayFrameToTrack - /// \return - /// - bool CanGrayFrameToTrack() const - { - bool needColor = (m_settings.m_lostTrackType == tracking::LostTrackType::TrackGOTURN) || - (m_settings.m_lostTrackType == tracking::LostTrackType::TrackDAT) || - (m_settings.m_lostTrackType == tracking::LostTrackType::TrackSTAPLE) || - (m_settings.m_lostTrackType == tracking::LostTrackType::TrackLDES); - return !needColor; - } - - /// - /// \brief CanColorFrameToTrack - /// \return - /// - bool CanColorFrameToTrack() const - { - return true; - } - - /// - /// \brief GetTracksCount - /// \return - /// - size_t GetTracksCount() const - { - return m_tracks.size(); - } - /// - /// \brief GetTracks - /// \return - /// - void GetTracks(std::vector& tracks) const - { - tracks.clear(); - - if (m_tracks.size() > tracks.capacity()) - tracks.reserve(m_tracks.size()); - for (const auto& track : m_tracks) - { - tracks.emplace_back(track->ConstructObject()); - } - } - -private: - TrackerSettings m_settings; - - tracks_t m_tracks; - - size_t m_nextTrackID; - - cv::UMat m_prevFrame; - - std::unique_ptr m_SPCalculator; - std::map> m_embCalculators; - - void CreateDistaceMatrix(const regions_t& regions, std::vector& regionEmbeddings, distMatrix_t& costMatrix, track_t maxPossibleCost, track_t& maxCost, cv::UMat currFrame); - void UpdateTrackingState(const regions_t& regions, cv::UMat currFrame, float fps); -}; +bool ParseTrackerSettings(const std::string& settingsFile, TrackerSettings& trackerSettings); diff --git a/src/Tracker/VOTTracker.hpp b/src/Tracker/VOTTracker.hpp deleted file mode 100644 index 6f64ad355..000000000 --- a/src/Tracker/VOTTracker.hpp +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -/// -/// \brief The VOTTracker class -/// -class VOTTracker -{ -public: - VOTTracker() = default; - virtual ~VOTTracker() = default; - - virtual void Initialize(const cv::Mat &im, cv::Rect region) = 0; - virtual cv::RotatedRect Update(const cv::Mat &im, float& confidence) = 0; - virtual void Train(const cv::Mat &im, bool first) = 0; -}; diff --git a/src/Tracker/byte_track/BYTETracker.cpp b/src/Tracker/byte_track/BYTETracker.cpp new file mode 100644 index 000000000..144490b1c --- /dev/null +++ b/src/Tracker/byte_track/BYTETracker.cpp @@ -0,0 +1,576 @@ +#include "BYTETracker.h" + +#include + +#include "defines.h" +#include "trajectory.h" +#include "TrackerSettings.h" + +/// +byte_track::BYTETracker::BYTETracker(const int& frame_rate, + const int& track_buffer, + const float& track_thresh, + const float& high_thresh, + const float& match_thresh) : + track_thresh_(track_thresh), + high_thresh_(high_thresh), + match_thresh_(match_thresh), + max_time_lost_(static_cast(frame_rate / 30.0 * track_buffer)), + frame_id_(0), + track_id_count_(0) +{ +} + +/// +void byte_track::BYTETracker::GetTracks(std::vector& tracks) const +{ + tracks.clear(); + + if (output_stracks_.size() > tracks.capacity()) + tracks.reserve(output_stracks_.size()); + for (const auto& track : output_stracks_) + { + std::chrono::duration period = m_lastFrameTime - m_lastFrameTime; + cv::RotatedRect rr(track->getRect().tl(), cv::Point2f(static_cast(track->getRect().x + track->getRect().width), static_cast(track->getRect().y)), track->getRect().br()); + TrackingObject to(rr, track->getTrackId(), track->getTrace(), false, cvRound(period.count()), false, + track->getType(), track->getScore(), track->getVelocity()); + + tracks.emplace_back(to); + } +} + +/// +void byte_track::BYTETracker::GetRemovedTracks(std::vector& trackIDs) const +{ + if (removed_stracks_.size() > trackIDs.capacity()) + trackIDs.reserve(removed_stracks_.size()); + for (const auto& remTrack : removed_stracks_) + { + trackIDs.emplace_back(remTrack->getTrackId()); + } +} + +/// +void byte_track::BYTETracker::Update(const regions_t& regions, cv::UMat /*currFrame*/, time_point_t frameTime) +{ + m_lastFrameTime = frameTime; + + ////////////////// Step 1: Get detections ////////////////// + frame_id_++; + + // Create new STracks using the result of object detection + std::vector det_stracks; + std::vector det_low_stracks; + + for (const auto ®ion : regions) + { + const auto strack = std::make_shared(region.m_brect, region.m_confidence, region.m_type, frameTime); + if (region.m_confidence >= track_thresh_) + det_stracks.push_back(strack); + else + det_low_stracks.push_back(strack); + } + + // Create lists of existing STrack + std::vector active_stracks; + std::vector non_active_stracks; + std::vector strack_pool; + + for (const auto& tracked_strack : tracked_stracks_) + { + if (!tracked_strack->isActivated()) + non_active_stracks.push_back(tracked_strack); + else + active_stracks.push_back(tracked_strack); + } + + strack_pool = jointStracks(active_stracks, lost_stracks_); + + // Predict current pose by KF + for (auto &strack : strack_pool) + { + strack->predict(); + } + + ////////////////// Step 2: First association, with IoU ////////////////// + std::vector current_tracked_stracks; + std::vector remain_tracked_stracks; + std::vector remain_det_stracks; + std::vector refind_stracks; + + { + std::vector> matches_idx; + std::vector unmatch_detection_idx, unmatch_track_idx; + + const auto dists = calcIouDistance(strack_pool, det_stracks); + linearAssignment(dists, strack_pool.size(), det_stracks.size(), match_thresh_, + matches_idx, unmatch_track_idx, unmatch_detection_idx); + + for (const auto &match_idx : matches_idx) + { + const auto track = strack_pool[match_idx[0]]; + const auto det = det_stracks[match_idx[1]]; + if (track->getSTrackState() == STrackState::Tracked) + { + track->update(*det, frame_id_, frameTime); + current_tracked_stracks.push_back(track); + } + else + { + track->reActivate(*det, frame_id_, -1, frameTime); + refind_stracks.push_back(track); + } + } + + for (const auto &unmatch_idx : unmatch_detection_idx) + { + remain_det_stracks.push_back(det_stracks[unmatch_idx]); + } + + for (const auto &unmatch_idx : unmatch_track_idx) + { + if (strack_pool[unmatch_idx]->getSTrackState() == STrackState::Tracked) + remain_tracked_stracks.push_back(strack_pool[unmatch_idx]); + } + } + + ////////////////// Step 3: Second association, using low score dets ////////////////// + std::vector current_lost_stracks; + + { + std::vector> matches_idx; + std::vector unmatch_track_idx, unmatch_detection_idx; + + const auto dists = calcIouDistance(remain_tracked_stracks, det_low_stracks); + linearAssignment(dists, remain_tracked_stracks.size(), det_low_stracks.size(), 0.5, + matches_idx, unmatch_track_idx, unmatch_detection_idx); + + for (const auto &match_idx : matches_idx) + { + const auto track = remain_tracked_stracks[match_idx[0]]; + const auto det = det_low_stracks[match_idx[1]]; + if (track->getSTrackState() == STrackState::Tracked) + { + track->update(*det, frame_id_, frameTime); + current_tracked_stracks.push_back(track); + } + else + { + track->reActivate(*det, frame_id_, -1, frameTime); + refind_stracks.push_back(track); + } + } + + for (const auto &unmatch_track : unmatch_track_idx) + { + const auto track = remain_tracked_stracks[unmatch_track]; + if (track->getSTrackState() != STrackState::Lost) + { + track->markAsLost(); + current_lost_stracks.push_back(track); + } + } + } + + ////////////////// Step 4: Init new stracks ////////////////// + std::vector current_removed_stracks; + + { + std::vector unmatch_detection_idx; + std::vector unmatch_unconfirmed_idx; + std::vector> matches_idx; + + // Deal with unconfirmed tracks, usually tracks with only one beginning frame + const auto dists = calcIouDistance(non_active_stracks, remain_det_stracks); + linearAssignment(dists, non_active_stracks.size(), remain_det_stracks.size(), 0.7, + matches_idx, unmatch_unconfirmed_idx, unmatch_detection_idx); + + for (const auto &match_idx : matches_idx) + { + non_active_stracks[match_idx[0]]->update(*remain_det_stracks[match_idx[1]], frame_id_, frameTime); + current_tracked_stracks.push_back(non_active_stracks[match_idx[0]]); + } + + for (const auto &unmatch_idx : unmatch_unconfirmed_idx) + { + const auto track = non_active_stracks[unmatch_idx]; + track->markAsRemoved(); + current_removed_stracks.push_back(track); + } + + // Add new stracks + for (const auto &unmatch_idx : unmatch_detection_idx) + { + const auto track = remain_det_stracks[unmatch_idx]; + if (track->getScore() < high_thresh_) + continue; + + track_id_count_++; + track->activate(frame_id_, track_id_count_, frameTime); + current_tracked_stracks.push_back(track); + } + } + + ////////////////// Step 5: Update state ////////////////// + for (const auto &lost_strack : lost_stracks_) + { + if (frame_id_ - lost_strack->getFrameId() > max_time_lost_) + { + lost_strack->markAsRemoved(); + current_removed_stracks.push_back(lost_strack); + } + } + + tracked_stracks_ = jointStracks(current_tracked_stracks, refind_stracks); + lost_stracks_ = subStracks(jointStracks(subStracks(lost_stracks_, tracked_stracks_), current_lost_stracks), removed_stracks_); + removed_stracks_ = jointStracks(removed_stracks_, current_removed_stracks); + + std::vector tracked_stracks_out, lost_stracks_out; + removeDuplicateStracks(tracked_stracks_, lost_stracks_, tracked_stracks_out, lost_stracks_out); + tracked_stracks_ = tracked_stracks_out; + lost_stracks_ = lost_stracks_out; + + + output_stracks_.clear(); + for (const auto &track : tracked_stracks_) + { + if (track->isActivated()) + output_stracks_.push_back(track); + } +} + +/// +std::vector byte_track::BYTETracker::jointStracks(const std::vector &a_tlist, + const std::vector &b_tlist) const +{ + std::map exists; + std::vector res; + for (size_t i = 0; i < a_tlist.size(); i++) + { + exists.emplace(a_tlist[i]->getTrackId(), 1); + res.push_back(a_tlist[i]); + } + for (size_t i = 0; i < b_tlist.size(); i++) + { + const size_t &tid = b_tlist[i]->getTrackId(); + if (!exists[tid] || exists.count(tid) == 0) + { + exists[tid] = 1; + res.push_back(b_tlist[i]); + } + } + return res; +} + +/// +std::vector byte_track::BYTETracker::subStracks(const std::vector &a_tlist, + const std::vector &b_tlist) const +{ + std::map stracks; + for (size_t i = 0; i < a_tlist.size(); i++) + { + stracks.emplace(a_tlist[i]->getTrackId(), a_tlist[i]); + } + + for (size_t i = 0; i < b_tlist.size(); i++) + { + const size_t&tid = b_tlist[i]->getTrackId(); + if (stracks.count(tid) != 0) + stracks.erase(tid); + } + + std::vector res; + std::map::iterator it; + for (it = stracks.begin(); it != stracks.end(); ++it) + { + res.push_back(it->second); + } + + return res; +} + +/// +void byte_track::BYTETracker::removeDuplicateStracks(const std::vector &a_stracks, + const std::vector &b_stracks, + std::vector &a_res, + std::vector &b_res) const +{ + const auto ious = calcIouDistance(a_stracks, b_stracks); + + std::vector> overlapping_combinations; + for (size_t i = 0; i < ious.size(); i++) + { + for (size_t j = 0; j < ious[i].size(); j++) + { + if (ious[i][j] < 0.15) + overlapping_combinations.emplace_back(i, j); + } + } + + std::vector a_overlapping(a_stracks.size(), false), b_overlapping(b_stracks.size(), false); + for (const auto &[a_idx, b_idx] : overlapping_combinations) + { + const size_t timep = a_stracks[a_idx]->getFrameId() - a_stracks[a_idx]->getStartFrameId(); + const size_t timeq = b_stracks[b_idx]->getFrameId() - b_stracks[b_idx]->getStartFrameId(); + if (timep > timeq) + b_overlapping[b_idx] = true; + else + a_overlapping[a_idx] = true; + } + + for (size_t ai = 0; ai < a_stracks.size(); ai++) + { + if (!a_overlapping[ai]) + a_res.push_back(a_stracks[ai]); + } + + for (size_t bi = 0; bi < b_stracks.size(); bi++) + { + if (!b_overlapping[bi]) + b_res.push_back(b_stracks[bi]); + } +} + +/// +void byte_track::BYTETracker::linearAssignment(const std::vector> &cost_matrix, + const size_t &cost_matrix_size, + const size_t &cost_matrix_size_size, + const float &thresh, + std::vector> &matches, + std::vector &a_unmatched, + std::vector &b_unmatched) const +{ + if (cost_matrix.size() == 0) + { + for (size_t i = 0; i < cost_matrix_size; i++) + { + a_unmatched.push_back(i); + } + for (size_t i = 0; i < cost_matrix_size_size; i++) + { + b_unmatched.push_back(i); + } + return; + } + + std::vector rowsol; + std::vector colsol; + execLapjv(cost_matrix, rowsol, colsol, true, thresh); + for (size_t i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] >= 0) + matches.push_back({ (int)i, rowsol[i] }); + else + a_unmatched.push_back(i); + } + + for (size_t i = 0; i < colsol.size(); i++) + { + if (colsol[i] < 0) + b_unmatched.push_back(i); + } +} + +/// +std::vector> byte_track::BYTETracker::calcIous(const std::vector &a_rect, + const std::vector &b_rect) const +{ + std::vector> ious; + if (a_rect.size() * b_rect.size() == 0) + return ious; + + ious.resize(a_rect.size()); + for (size_t i = 0; i < ious.size(); i++) + { + ious[i].resize(b_rect.size()); + } + + auto calcIoU = [](const cv::Rect2f& r1, const cv::Rect2f& r2) + { + const float box_area = (r2.width + 1) * (r2.height + 1); + const float iw = std::min(r1.x + r1.width, r2.x + r2.width) - std::max(r1.x, r2.x) + 1; + float iou = 0; + if (iw > 0) + { + const float ih = std::min(r1.y + r1.height, r2.y + r2.height) - std::max(r1.y, r2.y) + 1; + if (ih > 0) + { + const float ua = (r1.width + 1) * (r1.height + 1) + box_area - iw * ih; + iou = iw * ih / ua; + } + } + return iou; + }; + + for (size_t bi = 0; bi < b_rect.size(); bi++) + { + for (size_t ai = 0; ai < a_rect.size(); ai++) + { + ious[ai][bi] = calcIoU(b_rect[bi], a_rect[ai]); + } + } + return ious; +} + +/// +std::vector > byte_track::BYTETracker::calcIouDistance(const std::vector &a_tracks, + const std::vector &b_tracks) const +{ + std::vector a_rects, b_rects; + for (size_t i = 0; i < a_tracks.size(); i++) + { + a_rects.push_back(a_tracks[i]->getRect()); + } + + for (size_t i = 0; i < b_tracks.size(); i++) + { + b_rects.push_back(b_tracks[i]->getRect()); + } + + const auto ious = calcIous(a_rects, b_rects); + + std::vector> cost_matrix; + for (size_t i = 0; i < ious.size(); i++) + { + std::vector iou; + for (size_t j = 0; j < ious[i].size(); j++) + { + iou.push_back(1 - ious[i][j]); + } + cost_matrix.push_back(iou); + } + + return cost_matrix; +} + +/// +double byte_track::BYTETracker::execLapjv(const std::vector> &cost, + std::vector &rowsol, + std::vector &colsol, + bool extend_cost, + float cost_limit, + bool return_cost) const +{ + std::vector > cost_c; + cost_c.assign(cost.begin(), cost.end()); + + std::vector > cost_c_extended; + + size_t n_rows = cost.size(); + size_t n_cols = cost[0].size(); + rowsol.resize(n_rows); + colsol.resize(n_cols); + + size_t n = 0; + if (n_rows == n_cols) + { + n = n_rows; + } + else + { + if (!extend_cost) + throw std::runtime_error("The `extend_cost` variable should set True"); + } + + if (extend_cost || cost_limit < std::numeric_limits::max()) + { + n = n_rows + n_cols; + cost_c_extended.resize(n); + for (size_t i = 0; i < cost_c_extended.size(); i++) + cost_c_extended[i].resize(n); + + if (cost_limit < std::numeric_limits::max()) + { + for (size_t i = 0; i < cost_c_extended.size(); i++) + { + for (size_t j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_limit / 2.0f; + } + } + } + else + { + float cost_max = -1; + for (size_t i = 0; i < cost_c.size(); i++) + { + for (size_t j = 0; j < cost_c[i].size(); j++) + { + if (cost_c[i][j] > cost_max) + cost_max = cost_c[i][j]; + } + } + for (size_t i = 0; i < cost_c_extended.size(); i++) + { + for (size_t j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_max + 1; + } + } + } + + for (size_t i = n_rows; i < cost_c_extended.size(); i++) + { + for (size_t j = n_cols; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = 0; + } + } + for (size_t i = 0; i < n_rows; i++) + { + for (size_t j = 0; j < n_cols; j++) + { + cost_c_extended[i][j] = cost_c[i][j]; + } + } + + cost_c.clear(); + cost_c.assign(cost_c_extended.begin(), cost_c_extended.end()); + } + + std::vector x_c(n, -1); + std::vector y_c(n, 0); + + int ret = lapjv_internal(n, cost_c, x_c, y_c); + if (ret != 0) + throw std::runtime_error("The result of lapjv_internal() is invalid."); + + double opt = 0.0; + + if (n != n_rows) + { + for (size_t i = 0; i < n; i++) + { + if (x_c[i] >= n_cols) + x_c[i] = -1; + if (y_c[i] >= n_rows) + y_c[i] = -1; + } + for (size_t i = 0; i < n_rows; i++) + { + rowsol[i] = x_c[i]; + } + for (size_t i = 0; i < n_cols; i++) + { + colsol[i] = y_c[i]; + } + + if (return_cost) + { + for (size_t i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] != -1) + opt += cost_c[i][rowsol[i]]; + } + } + } + else if (return_cost) + { + for (size_t i = 0; i < rowsol.size(); i++) + { + opt += cost_c[i][rowsol[i]]; + } + } + + return opt; +} diff --git a/src/Tracker/byte_track/BYTETracker.h b/src/Tracker/byte_track/BYTETracker.h new file mode 100644 index 000000000..95146a7bc --- /dev/null +++ b/src/Tracker/byte_track/BYTETracker.h @@ -0,0 +1,75 @@ +#pragma once + +#include "BaseTracker.h" + +#include "STrack.h" +#include "lapjv.h" + +namespace byte_track +{ +class BYTETracker final : public BaseTracker +{ +public: + using STrackPtr = std::shared_ptr; + + BYTETracker(const int& frame_rate, // 30 + const int& track_buffer, // 30 + const float& track_thresh, // 0.5f + const float& high_thresh, // 0.5f + const float& match_thresh); // 0.8f + ~BYTETracker() = default; + + void Update(const regions_t& regions, cv::UMat currFrame, time_point_t frameTime) override; + + void GetTracks(std::vector& tracks) const override; + void GetRemovedTracks(std::vector& trackIDs) const override; + +private: + std::vector jointStracks(const std::vector &a_tlist, + const std::vector &b_tlist) const; + + std::vector subStracks(const std::vector &a_tlist, + const std::vector &b_tlist) const; + + void removeDuplicateStracks(const std::vector &a_stracks, + const std::vector &b_stracks, + std::vector &a_res, + std::vector &b_res) const; + + void linearAssignment(const std::vector> &cost_matrix, + const size_t &cost_matrix_size, + const size_t &cost_matrix_size_size, + const float &thresh, + std::vector> &matches, + std::vector &b_unmatched, + std::vector &a_unmatched) const; + + std::vector> calcIouDistance(const std::vector &a_tracks, + const std::vector &b_tracks) const; + + std::vector> calcIous(const std::vector &a_rect, + const std::vector &b_rect) const; + + double execLapjv(const std::vector > &cost, + std::vector &rowsol, + std::vector &colsol, + bool extend_cost = false, + float cost_limit = std::numeric_limits::max(), + bool return_cost = true) const; + +private: + const float track_thresh_ = 0.5f; + const float high_thresh_ = 0.6f; + const float match_thresh_ = 0.8f; + const size_t max_time_lost_ = 30; + + time_point_t m_lastFrameTime; + size_t frame_id_ = 0; + size_t track_id_count_ = 0; + + std::vector tracked_stracks_; + std::vector lost_stracks_; + std::vector removed_stracks_; + std::vector output_stracks_; +}; +} \ No newline at end of file diff --git a/src/Tracker/byte_track/KalmanFilter.cpp b/src/Tracker/byte_track/KalmanFilter.cpp new file mode 100644 index 000000000..3591eb05d --- /dev/null +++ b/src/Tracker/byte_track/KalmanFilter.cpp @@ -0,0 +1,108 @@ +#include "KalmanFilter.h" + +namespace byte_track +{ + +KalmanFilter::KalmanFilter(const float& std_weight_position, + const float& std_weight_velocity) : + std_weight_position_(std_weight_position), + std_weight_velocity_(std_weight_velocity) +{ + constexpr size_t ndim = 4; + constexpr float dt = 1.0f; + + motion_mat_ = cv::Matx::eye(); + update_mat_ = cv::Matx::eye(); + + for (size_t i = 0; i < ndim; i++) + { + motion_mat_(i, ndim + i) = dt; + } +} + +void KalmanFilter::initiate(StateMean& mean, StateCov& covariance, const DetectBox& measurement) +{ + for (int i = 0; i < 4; i++) + { + mean(0, i) = measurement(0, i); + mean(0, i + 4) = 0.0f; + } + + StateMean std( + 2 * std_weight_position_ * measurement(3), + 2 * std_weight_position_ * measurement(3), + 1e-2f, + 2 * std_weight_position_ * measurement(3), + 10 * std_weight_velocity_ * measurement(3), + 10 * std_weight_velocity_ * measurement(3), + 1e-5f, + 10 * std_weight_velocity_ * measurement(3)); + + covariance = StateCov::zeros(); + for (int i = 0; i < 8; i++) + { + covariance(i, i) = std(i) * std(i); + } +} + +void KalmanFilter::predict(StateMean& mean, StateCov& covariance) +{ + StateMean std( + std_weight_position_ * mean(3), + std_weight_position_ * mean(3), + 1e-2f, + std_weight_position_ * mean(3), + std_weight_velocity_ * mean(3), + std_weight_velocity_ * mean(3), + 1e-5f, + std_weight_velocity_ * mean(3)); + + StateCov motion_cov = StateCov::zeros(); + for (int i = 0; i < 8; i++) + { + motion_cov(i, i) = std(i) * std(i); + } + + StateMean new_mean = mean * motion_mat_.t(); + mean = new_mean; + + covariance = motion_mat_ * covariance * motion_mat_.t() + motion_cov; +} + +void KalmanFilter::update(StateMean& mean, StateCov& covariance, const DetectBox& measurement) +{ + StateHMean projected_mean; + StateHCov projected_cov; + project(projected_mean, projected_cov, mean, covariance); + + cv::Matx B = (covariance * update_mat_.t()).t(); + + cv::Matx kalman_gain; + cv::solve(projected_cov, B, kalman_gain, cv::DECOMP_CHOLESKY); + + StateHMean innovation = measurement - projected_mean; + + StateMean tmp = innovation * kalman_gain; + mean = mean + tmp; + covariance = covariance - kalman_gain.t() * projected_cov * kalman_gain; +} + +void KalmanFilter::project(StateHMean& projected_mean, StateHCov& projected_covariance, + const StateMean& mean, const StateCov& covariance) +{ + DetectBox std( + std_weight_position_ * mean(3), + std_weight_position_ * mean(3), + 1e-1f, + std_weight_position_ * mean(3)); + + projected_mean = (update_mat_ * mean.t()).t(); + projected_covariance = update_mat_ * covariance * update_mat_.t(); + + for (int i = 0; i < 4; i++) + { + projected_covariance(i, i) += std(i) * std(i); + } +} + +} diff --git a/src/Tracker/byte_track/KalmanFilter.h b/src/Tracker/byte_track/KalmanFilter.h new file mode 100644 index 000000000..79dc6cc84 --- /dev/null +++ b/src/Tracker/byte_track/KalmanFilter.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +namespace byte_track +{ + +class KalmanFilter +{ +public: + using DetectBox = cv::Matx; + using StateMean = cv::Matx; + using StateCov = cv::Matx; + using StateHMean = cv::Matx; + using StateHCov = cv::Matx; + + KalmanFilter(const float& std_weight_position = 1.0f / 20, + const float& std_weight_velocity = 1.0f / 160); + + void initiate(StateMean& mean, StateCov& covariance, const DetectBox& measurement); + void predict(StateMean& mean, StateCov& covariance); + void update(StateMean& mean, StateCov& covariance, const DetectBox& measurement); + +private: + float std_weight_position_; + float std_weight_velocity_; + + cv::Matx motion_mat_; + cv::Matx update_mat_; + + void project(StateHMean& projected_mean, StateHCov& projected_covariance, + const StateMean& mean, const StateCov& covariance); +}; +} diff --git a/src/Tracker/byte_track/STrack.cpp b/src/Tracker/byte_track/STrack.cpp new file mode 100644 index 000000000..728b4e6e2 --- /dev/null +++ b/src/Tracker/byte_track/STrack.cpp @@ -0,0 +1,167 @@ +#include "STrack.h" + +#include + +byte_track::STrack::STrack(const cv::Rect2f& rect, const float& score, objtype_t type, time_point_t currTime) : + kalman_filter_(), + mean_(), + covariance_(), + type_(type), + rect_(rect), + state_(STrackState::New), + is_activated_(false), + score_(score), + track_id_(0), + frame_id_(0), + start_frame_id_(0), + tracklet_len_(0) +{ + Point_t pt(rect.x + rect.width / 2.f, rect.y + rect.height); + trace_.push_back(pt, pt, currTime); +} + +const cv::Rect2f& byte_track::STrack::getRect() const +{ + return rect_; +} + +const byte_track::STrackState& byte_track::STrack::getSTrackState() const +{ + return state_; +} + +const bool& byte_track::STrack::isActivated() const +{ + return is_activated_; +} +const float& byte_track::STrack::getScore() const +{ + return score_; +} + +const size_t& byte_track::STrack::getTrackId() const +{ + return track_id_; +} + +const size_t& byte_track::STrack::getFrameId() const +{ + return frame_id_; +} + +const size_t& byte_track::STrack::getStartFrameId() const +{ + return start_frame_id_; +} + +const size_t& byte_track::STrack::getTrackletLength() const +{ + return tracklet_len_; +} + +objtype_t byte_track::STrack::getType() const +{ + return type_; +} + +const Trace& byte_track::STrack::getTrace() const +{ + return trace_; +} + +cv::Vec byte_track::STrack::getVelocity() const +{ + return cv::Vec(mean_(4), mean_(5)); +} + +byte_track::KalmanFilter::DetectBox GetXyah(const cv::Rect2f& rect) +{ + return byte_track::KalmanFilter::DetectBox( + rect.x + rect.width / 2.f, + rect.y + rect.height / 2.f, + rect.width / rect.height, + rect.height + ); +} + +void byte_track::STrack::activate(const size_t& frame_id, const size_t& track_id, time_point_t currTime) +{ + kalman_filter_.initiate(mean_, covariance_, GetXyah(rect_)); + + updateRect(); + + state_ = STrackState::Tracked; + if (frame_id == 1) + is_activated_ = true; + + track_id_ = track_id; + frame_id_ = frame_id; + start_frame_id_ = frame_id; + tracklet_len_ = 0; + + Point_t pt_pr(rect_.x + rect_.width / 2.f, rect_.y + rect_.height); + trace_.push_back(pt_pr, currTime); +} + +void byte_track::STrack::reActivate(const STrack &new_track, const size_t &frame_id, const int &new_track_id, time_point_t currTime) +{ + kalman_filter_.update(mean_, covariance_, GetXyah(new_track.getRect())); + + updateRect(); + + state_ = STrackState::Tracked; + is_activated_ = true; + score_ = new_track.getScore(); + if (0 <= new_track_id) + track_id_ = new_track_id; + + frame_id_ = frame_id; + tracklet_len_ = 0; + + Point_t pt_pr(rect_.x + rect_.width / 2.f, rect_.y + rect_.height); + Point_t pt_raw(new_track.getRect().x + new_track.getRect().width / 2.f, new_track.getRect().y + new_track.getRect().height); + trace_.push_back(pt_pr, pt_raw, currTime); +} + +void byte_track::STrack::predict() +{ + if (state_ != STrackState::Tracked) + mean_(7) = 0; + + kalman_filter_.predict(mean_, covariance_); +} + +void byte_track::STrack::update(const STrack &new_track, const size_t &frame_id, time_point_t currTime) +{ + kalman_filter_.update(mean_, covariance_, GetXyah(new_track.getRect())); + + updateRect(); + + state_ = STrackState::Tracked; + is_activated_ = true; + score_ = new_track.getScore(); + frame_id_ = frame_id; + tracklet_len_++; + + Point_t pt_pr(rect_.x + rect_.width / 2.f, rect_.y + rect_.height); + Point_t pt_raw(new_track.getRect().x + new_track.getRect().width / 2.f, new_track.getRect().y + new_track.getRect().height); + trace_.push_back(pt_pr, pt_raw, currTime); +} + +void byte_track::STrack::markAsLost() +{ + state_ = STrackState::Lost; +} + +void byte_track::STrack::markAsRemoved() +{ + state_ = STrackState::Removed; +} + +void byte_track::STrack::updateRect() +{ + rect_.width = mean_(2) * mean_(3); + rect_.height = mean_(3); + rect_.x = mean_(0) - rect_.width / 2.f; + rect_.y = mean_(1) - rect_.height / 2.f; +} diff --git a/src/Tracker/byte_track/STrack.h b/src/Tracker/byte_track/STrack.h new file mode 100644 index 000000000..65607f784 --- /dev/null +++ b/src/Tracker/byte_track/STrack.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include "KalmanFilter.h" +#include "trajectory.h" + +namespace byte_track +{ +enum class STrackState { + New = 0, + Tracked = 1, + Lost = 2, + Removed = 3, +}; + +class STrack +{ +public: + STrack(const cv::Rect2f& rect, const float& score, objtype_t type, time_point_t currTime); + ~STrack() = default; + + const cv::Rect2f& getRect() const; + const STrackState& getSTrackState() const; + + const bool& isActivated() const; + const float& getScore() const; + const size_t& getTrackId() const; + const size_t& getFrameId() const; + const size_t& getStartFrameId() const; + const size_t& getTrackletLength() const; + objtype_t getType() const; + const Trace& getTrace() const; + cv::Vec getVelocity() const; + + void activate(const size_t& frame_id, const size_t& track_id, time_point_t currTime); + void reActivate(const STrack &new_track, const size_t &frame_id, const int &new_track_id, time_point_t currTime); // new_track_id = -1 + + void predict(); + void update(const STrack &new_track, const size_t &frame_id, time_point_t currTime); + + void markAsLost(); + void markAsRemoved(); + +private: + KalmanFilter kalman_filter_; + KalmanFilter::StateMean mean_; + KalmanFilter::StateCov covariance_; + + objtype_t type_ = bad_type; + cv::Rect2f rect_; + STrackState state_{ STrackState::New }; + + bool is_activated_ = false; + float score_ = 0.f; + size_t track_id_ = 0; + size_t frame_id_ = 0; + size_t start_frame_id_ = 0; + size_t tracklet_len_ = 0; + + Trace trace_; + + void updateRect(); +}; +} \ No newline at end of file diff --git a/src/Tracker/byte_track/lapjv.cpp b/src/Tracker/byte_track/lapjv.cpp new file mode 100644 index 000000000..358be4ae6 --- /dev/null +++ b/src/Tracker/byte_track/lapjv.cpp @@ -0,0 +1,315 @@ +#include +#include +#include +#include +#include + +#include "lapjv.h" + +namespace +{ + constexpr size_t LARGE = std::numeric_limits::max(); + + enum class fp_t { + FP_1 = 1, + FP_2 = 2, + FP_DYNAMIC = 3, + }; + + /** Column-reduction and reduction transfer for a dense cost matrix. + */ + int _ccrrt_dense(const size_t n, const std::vector>& cost, + std::vector& free_rows, std::vector& x, std::vector& y, std::vector& v) + { + for (size_t i = 0; i < n; i++) + { + for (size_t j = 0; j < n; j++) + { + const lapjv_t c = cost[i][j]; + if (c < v[j]) { + v[j] = c; + y[j] = i; + } + } + } + + std::vector unique(n, true); + { + int j = n; + do { + j--; + const int i = y[j]; + if (x[i] < 0) { + x[i] = j; + } + else { + unique[i] = false; + y[j] = -1; + } + } while (j > 0); + } + int n_free_rows = 0; + for (size_t i = 0; i < n; i++) + { + if (x[i] < 0) { + free_rows[n_free_rows++] = i; + } + else if (unique[i]) { + const int j = x[i]; + lapjv_t min = LARGE; + for (size_t j2 = 0; j2 < n; j2++) + { + if (j2 == (size_t)j) + continue; + + const lapjv_t c = cost[i][j2] - v[j2]; + if (c < min) + min = c; + } + v[j] -= min; + } + } + return n_free_rows; + } + + + /** Augmenting row reduction for a dense cost matrix. + */ + int _carr_dense( + const size_t n, const std::vector>& cost, + const size_t n_free_rows, + std::vector& free_rows, std::vector& x, std::vector& y, std::vector& v) + { + size_t current = 0; + int new_free_rows = 0; + size_t rr_cnt = 0; + while (current < n_free_rows) + { + rr_cnt++; + const int free_i = free_rows[current++]; + int j1 = 0; + lapjv_t v1 = cost[free_i][0] - v[0]; + int j2 = -1; + lapjv_t v2 = LARGE; + for (size_t j = 1; j < n; j++) { + const lapjv_t c = cost[free_i][j] - v[j]; + if (c < v2) + { + if (c >= v1) { + v2 = c; + j2 = j; + } + else { + v2 = v1; + v1 = c; + j2 = j1; + j1 = j; + } + } + } + int i0 = y[j1]; + lapjv_t v1_new = v[j1] - (v2 - v1); + bool v1_lowers = v1_new < v[j1]; + if (rr_cnt < current * n) + { + if (v1_lowers) { + v[j1] = v1_new; + } + else if (i0 >= 0 && j2 >= 0) { + j1 = j2; + i0 = y[j2]; + } + if (i0 >= 0) + { + if (v1_lowers) + free_rows[--current] = i0; + else + free_rows[new_free_rows++] = i0; + } + } + else { + if (i0 >= 0) + free_rows[new_free_rows++] = i0; + } + x[free_i] = j1; + y[j1] = free_i; + } + return new_free_rows; + } + + + /** Find columns with minimum d[j] and put them on the SCAN list. + */ + size_t _find_dense(const size_t n, size_t lo, const std::vector& d, std::vector& cols) + { + size_t hi = lo + 1; + lapjv_t mind = d[cols[lo]]; + for (size_t k = hi; k < n; k++) + { + int j = cols[k]; + if (d[j] <= mind) { + if (d[j] < mind) { + hi = lo; + mind = d[j]; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + return hi; + } + + + // Scan all columns in TODO starting from arbitrary column in SCAN + // and try to decrease d of the TODO columns using the SCAN column. + int _scan_dense(const size_t n, const std::vector>& cost, + size_t* plo, size_t* phi, + std::vector& d, std::vector& cols, std::vector& pred, + const std::vector& y, const std::vector& v) + { + size_t lo = *plo; + size_t hi = *phi; + + while (lo != hi) + { + int j = cols[lo++]; + const int i = y[j]; + const lapjv_t mind = d[j]; + lapjv_t h = cost[i][j] - v[j] - mind; + // For all columns in TODO + for (size_t k = hi; k < n; k++) + { + j = cols[k]; + lapjv_t cred_ij = cost[i][j] - v[j] - h; + if (cred_ij < d[j]) + { + d[j] = cred_ij; + pred[j] = i; + if (cred_ij == mind) + { + if (y[j] < 0) + return j; + + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + } + } + *plo = lo; + *phi = hi; + return -1; + } + + + /** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper. + * + * This is a dense matrix version. + * + * \return The closest free column index. + */ + int find_path_dense( + const size_t n, const std::vector>& cost, + const int start_i, + std::vector& y, std::vector& v, + std::vector& pred) + { + size_t lo = 0, hi = 0; + int final_j = -1; + size_t n_ready = 0; + + std::vector cols(n); + std::vector d(n); + + for (size_t i = 0; i < n; i++) + { + cols[i] = i; + pred[i] = start_i; + d[i] = cost[start_i][i] - v[i]; + } + + while (final_j == -1) + { + // No columns left on the SCAN list. + if (lo == hi) + { + n_ready = lo; + hi = _find_dense(n, lo, d, cols); + for (size_t k = lo; k < hi; k++) + { + const int j = cols[k]; + if (y[j] < 0) + final_j = j; + } + } + if (final_j == -1) + final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v); + } + + { + const lapjv_t mind = d[cols[lo]]; + for (size_t k = 0; k < n_ready; k++) { + const int j = cols[k]; + v[j] += d[j] - mind; + } + } + + return final_j; + } + + + /** Augment for a dense cost matrix. + */ + int _ca_dense( + const size_t n, const std::vector>& cost, + const size_t n_free_rows, + std::vector& free_rows, std::vector& x, std::vector& y, std::vector& v) + { + std::vector pred(n); + + for (size_t pfree_i = 0; pfree_i < n_free_rows; ++pfree_i) + { + int i = -1; + size_t k = 0; + + int j = find_path_dense(n, cost, free_rows[pfree_i], y, v, pred); + if (j < 0) + throw std::runtime_error("Error occured in _ca_dense(): j < 0"); + + if (j >= static_cast(n)) + throw std::runtime_error("Error occured in _ca_dense(): j >= n"); + + while (i != free_rows[pfree_i]) + { + i = pred[j]; + y[j] = i; + std::swap(j, x[i]); + ++k; + if (k >= n) + throw std::runtime_error("Error occured in _ca_dense(): k >= n"); + } + } + return 0; + } +} + +/** Solve dense sparse LAP. */ +int byte_track::lapjv_internal( + const size_t n, const std::vector>& cost, + std::vector& x, std::vector& y) +{ + std::vector free_rows(n); + std::vector v(n, LARGE); + + int ret = _ccrrt_dense(n, cost, free_rows, x, y, v); + int i = 0; + while (ret > 0 && i < 2) + { + ret = _carr_dense(n, cost, ret, free_rows, x, y, v); + ++i; + } + if (ret > 0) + ret = _ca_dense(n, cost, ret, free_rows, x, y, v); + + return ret; +} diff --git a/src/Tracker/byte_track/lapjv.h b/src/Tracker/byte_track/lapjv.h new file mode 100644 index 000000000..55dad2e05 --- /dev/null +++ b/src/Tracker/byte_track/lapjv.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +typedef float lapjv_t; + +namespace byte_track +{ +int lapjv_internal(const size_t n, const std::vector>& cost, std::vector& x, std::vector& y); +} \ No newline at end of file diff --git a/src/Tracker/dat/dat_tracker.cpp b/src/Tracker/dat/dat_tracker.cpp deleted file mode 100644 index d4c33b480..000000000 --- a/src/Tracker/dat/dat_tracker.cpp +++ /dev/null @@ -1,784 +0,0 @@ -#include -#include "dat_tracker.hpp" - -/// -/// \brief DAT_TRACKER::DAT_TRACKER -/// -DAT_TRACKER::DAT_TRACKER() -{ - cfg = default_parameters_dat(cfg); -} - -/// -/// \brief DAT_TRACKER::~DAT_TRACKER -/// -DAT_TRACKER::~DAT_TRACKER() -{ - -} - -/// -/// \brief DAT_TRACKER::tracker_dat_initialize -/// \param I -/// \param region -/// -void DAT_TRACKER::Initialize(const cv::Mat &im, cv::Rect region) -{ - double cx = region.x + double(region.width - 1) / 2.0; - double cy = region.y + double(region.height - 1) / 2.0; - double w = region.width; - double h = region.height; - - cv::Point target_pos(round(cx),round(cy)); - cv::Size target_sz(round(w),round(h)); - - scale_factor_ = std::min(1.0, round(10.0 * double(cfg.img_scale_target_diagonal) / cv::norm(cv::Point(target_sz.width,target_sz.height))) / 10.0); - target_pos.x = target_pos.x * scale_factor_; target_pos.y = target_pos.y * scale_factor_; - target_sz.width = target_sz.width * scale_factor_; target_sz.height = target_sz.height * scale_factor_; - - cv::Mat img; - cv::resize(im, img, cv::Size(), scale_factor_, scale_factor_); - switch (cfg.color_space) { - case 1: //1rgb - if (img.channels() == 1) - { - cv::cvtColor(img, img, CV_GRAY2BGR); - } - break; - case 2: //2lab - cv::cvtColor(img, img, CV_BGR2Lab); - break; - case 3: //3hsv - cv::cvtColor(img, img, CV_BGR2HSV); - break; - case 4: //4gray - if (img.channels() == 3) - { - cv::cvtColor(img, img, CV_BGR2GRAY); - } - break; - default: - std::cout << "int_variable does not equal any of the above cases" << std::endl; - } - cv::Size surr_sz(floor(cfg.surr_win_factor * target_sz.width), - floor(cfg.surr_win_factor * target_sz.height)); - cv::Rect surr_rect = pos2rect(target_pos, surr_sz, img.size()); - cv::Rect obj_rect_surr = pos2rect(target_pos, target_sz, img.size()); - obj_rect_surr.x -= surr_rect.x; - obj_rect_surr.y -= surr_rect.y; - cv::Mat surr_win = getSubwindow(img, target_pos, surr_sz); - cv::Mat prob_map; - getForegroundBackgroundProbs(surr_win, obj_rect_surr, cfg.num_bins, cfg.bin_mapping, prob_lut_, prob_map); - - prob_lut_distractor_ = prob_lut_.clone(); - prob_lut_masked_ = prob_lut_.clone(); - adaptive_threshold_ = getAdaptiveThreshold(prob_map, obj_rect_surr); - - target_pos_history_.push_back(cv::Point(target_pos.x / scale_factor_, target_pos.y / scale_factor_)); - target_sz_history_.push_back(cv::Size(target_sz.width / scale_factor_, target_sz.height / scale_factor_)); -} - -/// -/// \brief DAT_TRACKER::tracker_dat_update -/// \param I -/// \param confidence -/// \return -/// -cv::RotatedRect DAT_TRACKER::Update(const cv::Mat &im, float& confidence) -{ - confidence = 0; - - cv::Mat img_preprocessed; - cv::resize(im, img_preprocessed, cv::Size(), scale_factor_, scale_factor_); - cv::Mat img; - switch (cfg.color_space) { - case 1://1rgb - if (img_preprocessed.channels() == 1) - { - cv::cvtColor(img_preprocessed, img, CV_GRAY2BGR); - } - else - { - img_preprocessed.copyTo(img); - } - break; - case 2://2lab - cv::cvtColor(img_preprocessed, img, CV_BGR2Lab); - break; - case 3://3hsv - cv::cvtColor(img_preprocessed, img, CV_BGR2HSV); - break; - case 4://4gray - if (img_preprocessed.channels() == 3) - { - cv::cvtColor(img_preprocessed, img, CV_BGR2GRAY); - } - break; - default: - std::cout << "int_variable does not equal any of the above cases" << std::endl; - } - cv::Point prev_pos = target_pos_history_.back(); - cv::Size prev_sz = target_sz_history_.back(); - - if (cfg.motion_estimation_history_size > 0) - prev_pos = prev_pos + getMotionPrediction(target_pos_history_, cfg.motion_estimation_history_size); - - cv::Point2f target_pos(prev_pos.x*scale_factor_, prev_pos.y*scale_factor_); - cv::Size target_sz(prev_sz.width*scale_factor_, prev_sz.height*scale_factor_); - - cv::Size search_sz; - search_sz.width = floor(target_sz.width + cfg.search_win_padding*std::max(target_sz.width, target_sz.height)); - search_sz.height = floor(target_sz.height + cfg.search_win_padding*std::max(target_sz.width, target_sz.height)); - cv::Rect search_rect = pos2rect(target_pos, search_sz); - cv::Mat search_win, padded_search_win; - getSubwindowMasked(img, target_pos, search_sz, search_win, padded_search_win); - - // Apply probability LUT - cv::Mat pm_search = getForegroundProb(search_win, prob_lut_, cfg.bin_mapping); - cv::Mat pm_search_dist; - if (cfg.distractor_aware) { - pm_search_dist = getForegroundProb(search_win, prob_lut_distractor_, cfg.bin_mapping); - pm_search = (pm_search + pm_search_dist)/2.; - } - pm_search.setTo(0, padded_search_win); - - // Cosine / Hanning window - cv::Mat cos_win = CalculateHann(search_sz); - - std::vector hypotheses; - std::vector vote_scores; - std::vector dist_scores; - getNMSRects(pm_search, target_sz, cfg.nms_scale, cfg.nms_overlap, - cfg.nms_score_factor, cos_win, cfg.nms_include_center_vote, - hypotheses, vote_scores, dist_scores); - - std::vector candidate_centers; - std::vector candidate_scores; - for (size_t i = 0; i < hypotheses.size(); ++i) { - candidate_centers.push_back(cv::Point2f(float(hypotheses[i].x) + float(hypotheses[i].width) / 2., - float(hypotheses[i].y) + float(hypotheses[i].height) / 2.)); - candidate_scores.push_back(vote_scores[i] * dist_scores[i]); - } - auto maxEl = std::max_element(candidate_scores.begin(), candidate_scores.end()); - size_t best_candidate = maxEl - candidate_scores.begin(); - confidence = *maxEl; - - target_pos = candidate_centers[best_candidate]; - - std::vector distractors; - std::vector distractor_overlap; - if (hypotheses.size() > 1) { - distractors.clear(); - distractor_overlap.clear(); - cv::Rect target_rect = pos2rect(target_pos, target_sz, pm_search.size()); - for (size_t i = 0; i < hypotheses.size(); ++i){ - if (i != best_candidate) { - distractors.push_back(hypotheses[i]); - distractor_overlap.push_back(intersectionOverUnion(target_rect, distractors.back())); - } - } - } else { - distractors.clear(); - distractor_overlap.clear(); - } - - // Localization visualization - if (cfg.show_figures) - { - cv::Mat pm_search_color; - pm_search.convertTo(pm_search_color,CV_8UC1,255); - applyColorMap(pm_search_color, pm_search_color, cv::COLORMAP_JET); - for (size_t i = 0; i < hypotheses.size(); ++i){ - cv::rectangle(pm_search_color, hypotheses[i], cv::Scalar(0, 255, 255 * (i != best_candidate)), 2); - } -#ifndef SILENT_WORK - //cv::imshow("Search Window", pm_search_color); - //cv::waitKey(1); -#endif - } - - // Appearance update - // Get current target position within full(possibly downscaled) image coorinates - cv::Point2f target_pos_img; - target_pos_img.x = target_pos.x + search_rect.x; - target_pos_img.y = target_pos.y + search_rect.y; - if (cfg.prob_lut_update_rate > 0) { - // Extract surrounding region - cv::Size surr_sz; - surr_sz.width = floor(cfg.surr_win_factor * target_sz.width); - surr_sz.height = floor(cfg.surr_win_factor * target_sz.height); - cv::Rect surr_rect = pos2rect(target_pos_img, surr_sz, img.size()); - cv::Rect obj_rect_surr = pos2rect(target_pos_img, target_sz, img.size()); - obj_rect_surr.x -= surr_rect.x; - obj_rect_surr.y -= surr_rect.y; - - cv::Mat surr_win = getSubwindow(img, target_pos_img, surr_sz); - - cv::Mat prob_lut_bg; - getForegroundBackgroundProbs(surr_win, obj_rect_surr, cfg.num_bins, prob_lut_bg); - - cv::Mat prob_map; - if (cfg.distractor_aware) { - // Handle distractors - if (distractors.size() > 1) { - cv::Rect obj_rect = pos2rect(target_pos, target_sz, search_win.size()); - cv::Mat prob_lut_dist = getForegroundDistractorProbs(search_win, obj_rect, distractors, cfg.num_bins); - - prob_lut_distractor_ = (1 - cfg.prob_lut_update_rate) * prob_lut_distractor_ + cfg.prob_lut_update_rate * prob_lut_dist; - } - else { - // If there are no distractors, trigger decay of distractor LUT - prob_lut_distractor_ = (1 - cfg.prob_lut_update_rate) * prob_lut_distractor_ + cfg.prob_lut_update_rate * prob_lut_bg; - } - - // Only update if distractors are not overlapping too much - if (distractors.empty() || (*max_element(distractor_overlap.begin(), distractor_overlap.end()) < 0.1)) { - prob_lut_ = (1 - cfg.prob_lut_update_rate) * prob_lut_ + cfg.prob_lut_update_rate * prob_lut_bg; - } - - prob_map = getForegroundProb(surr_win, prob_lut_, cfg.bin_mapping); - cv::Mat dist_map = getForegroundProb(surr_win, prob_lut_distractor_, cfg.bin_mapping); - prob_map = .5 * prob_map + .5 * dist_map; - } - else { // No distractor - awareness - prob_lut_ = (1 - cfg.prob_lut_update_rate) * prob_lut_ + cfg.prob_lut_update_rate * prob_lut_bg; - prob_map = getForegroundProb(surr_win, prob_lut_, cfg.bin_mapping); - } - // Update adaptive threshold - adaptive_threshold_ = getAdaptiveThreshold(prob_map, obj_rect_surr); - } - - // Store current location - target_pos.x = target_pos.x + search_rect.x ; - target_pos.y = target_pos.y + search_rect.y; - cv::Point target_pos_original; - cv::Size target_sz_original; - target_pos_original.x = target_pos.x / scale_factor_; - target_pos_original.y = target_pos.y / scale_factor_; - target_sz_original.width = target_sz.width / scale_factor_; - target_sz_original.height = target_sz.height / scale_factor_; - - target_pos_history_.push_back(target_pos_original); - target_sz_history_.push_back(target_sz_original); - - // Report current location - cv::Rect location = pos2rect(target_pos_history_.back(), target_sz_history_.back(), im.size()); - - // Adapt image scale factor - scale_factor_ = std::min(1.0, round(10.0 * double(cfg.img_scale_target_diagonal) / cv::norm(cv::Point(target_sz_original.width, target_sz_original.height))) / 10.0); - - return cv::RotatedRect(cv::Point2f(location.x + 0.5f * location.width, location.y + 0.5f * location.height), - cv::Size2f(location.width, location.height), 0.f); -} - -/// -/// \brief DAT_TRACKER::Train -/// \param im -/// \param first -/// -void DAT_TRACKER::Train(const cv::Mat &/*im*/, bool /*first*/) -{ - -} - -/// -/// \brief DAT_TRACKER::getNMSRects -/// \param prob_map -/// \param obj_sz -/// \param scale -/// \param overlap -/// \param score_frac -/// \param dist_map -/// \param include_inner -/// \param top_rects -/// \param top_vote_scores -/// \param top_dist_scores -/// -void DAT_TRACKER::getNMSRects(cv::Mat prob_map, cv::Size obj_sz, double scale, - double overlap, double score_frac, cv::Mat dist_map, bool include_inner, - std::vector &top_rects, std::vector &top_vote_scores, std::vector &top_dist_scores){ - int height = prob_map.rows; - int width = prob_map.cols; - cv::Size rect_sz(floor(obj_sz.width * scale), floor(obj_sz.height * scale)); - - int stepx = std::max(1, int(round(rect_sz.width * (1.0 - overlap)))); - int stepy = std::max(1, int(round(rect_sz.height * (1.0 - overlap)))); - - std::vector posx, posy; - for (int i = 0; i <= (width -1 - rect_sz.width); i += stepx) - { - posx.push_back(i); - } - for (int i = 0; i <= (height -1 - rect_sz.height); i += stepy) - { - posy.push_back(i); - } - cv::Mat xgv(posx); cv::Mat ygv(posy); cv::Mat x; cv::Mat y; - cv::repeat(xgv.reshape(1, 1), ygv.total(), 1, x); - cv::repeat(ygv.reshape(1, 1).t(), 1, xgv.total(), y); - - cv::Mat r = x + rect_sz.width;; - cv::Mat b = y + rect_sz.height; - r.setTo(width-1, r > (width-1)); - b.setTo(height-1, b > (height-1)); - - std::vector boxes; - int n = x.rows*x.cols; - int *p_x = x.ptr(0); - int *p_y = y.ptr(0); - int *p_r = r.ptr(0); - int *p_b = b.ptr(0); - for (int i = 0; i < n; ++i) - boxes.push_back(cv::Rect(p_x[i], p_y[i], p_r[i] - p_x[i], p_b[i] - p_y[i])); - - std::vector boxes_inner; - int o_x = round(std::max(1.0, rect_sz.width*0.2)); - int o_y = round(std::max(1.0, rect_sz.height*0.2)); - if (include_inner) { - for (int i = 0; i < n; ++i) - boxes_inner.push_back(cv::Rect(p_x[i] + o_x, p_y[i] + o_y, p_r[i] - p_x[i] - 2 * o_x, p_b[i] - p_y[i] - 2 * o_y)); - } - - // Linear indices - cv::Mat l = x; - cv::Mat t = y; - std::vectorbl, br, tl, tr; - - int *p_l = l.ptr(0); - int *p_t = t.ptr(0); - for (int i = 0; i < n; ++i){ - bl.push_back(cv::Point(p_l[i], p_b[i])); - br.push_back(cv::Point(p_r[i], p_b[i])); - tl.push_back(cv::Point(p_l[i], p_t[i])); - tr.push_back(cv::Point(p_r[i], p_t[i])); - } - cv::Size rect_sz_inner; - std::vectorbl_inner, br_inner, tl_inner, tr_inner; - if (include_inner){ - rect_sz_inner.width = rect_sz.width - 2 * o_x; - rect_sz_inner.height = rect_sz.height - 2 *o_y; - - for (int i = 0; i < n; ++i){ - bl_inner.push_back(cv::Point(p_l[i]+o_x, p_b[i]-o_y)); - br_inner.push_back(cv::Point(p_r[i]-o_x, p_b[i]-o_y)); - tl_inner.push_back(cv::Point(p_l[i]+o_x, p_t[i]+o_y)); - tr_inner.push_back(cv::Point(p_r[i]-o_x, p_t[i]+o_y)); - } - } - - cv::Mat intProbMap; - cv::integral(prob_map, intProbMap); - cv::Mat intDistMap; - cv::integral(dist_map, intDistMap); - - std::vector v_scores(n, 0); - std::vector d_scores(n, 0); - for (size_t i = 0; i < bl.size(); ++i){ - v_scores[i] = intProbMap.at(br[i]) - intProbMap.at(bl[i]) - intProbMap.at(tr[i]) + intProbMap.at(tl[i]); - d_scores[i] = intDistMap.at(br[i]) - intDistMap.at(bl[i]) - intDistMap.at(tr[i]) + intDistMap.at(tl[i]); - } - std::vector scores_inner(n, 0); - if (include_inner){ - for (size_t i = 0; i < bl.size(); ++i){ - scores_inner[i] = intProbMap.at(br_inner[i]) - intProbMap.at(bl_inner[i]) - intProbMap.at(tr_inner[i]) + intProbMap.at(tl_inner[i]); - v_scores[i] = v_scores[i] / double(rect_sz.area()) + scores_inner[i] / double(rect_sz_inner.area()); - } - } - - top_rects.clear();; - top_vote_scores.clear(); - top_dist_scores.clear(); - int midx = max_element(v_scores.begin(), v_scores.end()) - v_scores.begin(); - double ms = v_scores[midx]; - - double best_score = ms; - - while (ms > score_frac * best_score){ - prob_map(boxes[midx]) = cv::Scalar(0.0); - top_rects.push_back(boxes[midx]); - top_vote_scores.push_back(v_scores[midx]); - top_dist_scores.push_back(d_scores[midx]); - boxes.erase(boxes.begin() + midx); - if (include_inner) - boxes_inner.erase(boxes_inner.begin() + midx); - - bl.erase(bl.begin() + midx); - br.erase(br.begin() + midx); - tl.erase(tl.begin() + midx); - tr.erase(tr.begin() + midx); - if (include_inner){ - bl_inner.erase(bl_inner.begin() + midx); - br_inner.erase(br_inner.begin() + midx); - tl_inner.erase(tl_inner.begin() + midx); - tr_inner.erase(tr_inner.begin() + midx); - } - - cv::integral(prob_map, intProbMap); - cv::integral(dist_map, intDistMap); - - v_scores.resize(bl.size(), 0); - d_scores.resize(bl.size(), 0); - for (size_t i = 0; i < bl.size(); ++i){ - v_scores[i] = intProbMap.at(br[i]) - intProbMap.at(bl[i]) - intProbMap.at(tr[i]) + intProbMap.at(tl[i]); - d_scores[i] = intDistMap.at(br[i]) - intDistMap.at(bl[i]) - intDistMap.at(tr[i]) + intDistMap.at(tl[i]); - } - scores_inner.resize(bl.size(), 0); - if (include_inner){ - for (size_t i = 0; i < bl.size(); ++i){ - scores_inner[i] = intProbMap.at(br_inner[i]) - intProbMap.at(bl_inner[i]) - intProbMap.at(tr_inner[i]) + intProbMap.at(tl_inner[i]); - v_scores[i] = v_scores[i] / (rect_sz.area()) + scores_inner[i] / (rect_sz_inner.area()); - } - } - midx = max_element(v_scores.begin(), v_scores.end()) - v_scores.begin(); - ms = v_scores[midx]; - } -} - -/// -/// \brief DAT_TRACKER::intersectionOverUnion -/// \param target_rect -/// \param candidates -/// \return -/// -double DAT_TRACKER::intersectionOverUnion(cv::Rect target_rect, cv::Rect candidates) { - return double((target_rect & candidates).area()) / double(target_rect.area() + candidates.area() - (target_rect & candidates).area()); -} - -/// -/// \brief DAT_TRACKER::getForegroundDistractorProbs -/// \param frame -/// \param obj_rect -/// \param distractors -/// \param num_bins -/// \return -/// -cv::Mat DAT_TRACKER::getForegroundDistractorProbs(cv::Mat frame, cv::Rect obj_rect, std::vector distractors, int num_bins) { - int imgCount = 1; - int dims = 3; - const int sizes[] = { num_bins, num_bins, num_bins }; - const int channels[] = { 0, 1, 2 }; - float rRange[] = { 0, 256 }; - float gRange[] = { 0, 256 }; - float bRange[] = { 0, 256 }; - const float *ranges[] = { rRange, gRange, bRange }; - - cv::Mat Md(frame.size(), CV_8UC1, cv::Scalar(0)); - cv::Mat Mo(frame.size(), CV_8UC1, cv::Scalar(0)); - for (size_t i = 0; i < distractors.size(); ++i) { - Mo(distractors[i]) = true; - } - Mo(obj_rect) = true; - - cv::Mat obj_hist, distr_hist; - cv::calcHist(&frame, imgCount, channels, Md, distr_hist, dims, sizes, ranges); - cv::calcHist(&frame, imgCount, channels, Mo, obj_hist, dims, sizes, ranges); - cv::Mat prob_lut = (obj_hist*distractors.size() + 1) / (distr_hist + obj_hist*distractors.size() + 2); - return prob_lut; -} - -/// -/// \brief DAT_TRACKER::CalculateHann -/// \param sz -/// \return -/// -cv::Mat DAT_TRACKER::CalculateHann(cv::Size sz) { - cv::Mat temp1(cv::Size(sz.width, 1), CV_32FC1); - cv::Mat temp2(cv::Size(sz.height, 1), CV_32FC1); - float *p1 = temp1.ptr(0); - float *p2 = temp2.ptr(0); - for (int i = 0; i < sz.width; ++i) - p1[i] = 0.5*(1 - cos(CV_2PI*i / (sz.width - 1))); - for (int i = 0; i < sz.height; ++i) - p2[i] = 0.5*(1 - cos(CV_2PI*i / (sz.height - 1))); - return temp2.t()*temp1; -} - -/// -/// \brief DAT_TRACKER::getForegroundProb -/// \param frame -/// \param prob_lut -/// \param bin_mapping -/// \return -/// -cv::Mat DAT_TRACKER::getForegroundProb(cv::Mat frame, cv::Mat prob_lut, cv::Mat bin_mapping){ - cv::Mat frame_bin; - cv::Mat prob_map(frame.size(), CV_32FC1); - cv::LUT(frame, bin_mapping, frame_bin); - float *p_prob_map = prob_map.ptr(0); - cv::MatIterator_ it, end; - for (it = frame_bin.begin(), end = frame_bin.end(); it != end; ++it) - { - *p_prob_map++ = prob_lut.at((*it)[0], (*it)[1], (*it)[2]); - } - return prob_map; -} - -/// -/// \brief DAT_TRACKER::getSubwindowMasked -/// \param im -/// \param pos -/// \param sz -/// \param out -/// \param mask -/// -void DAT_TRACKER::getSubwindowMasked(cv::Mat im, cv::Point pos, cv::Size sz, cv::Mat &out, cv::Mat &mask){ - - int xs_1 = floor(pos.x) + 1 - floor(double(sz.width) / 2.); - //int xs_2 = floor(pos.x) + sz.width - floor(double(sz.width) / 2.); - int ys_1 = floor(pos.y) + 1 - floor(double(sz.height) / 2.); - //int ys_2 = floor(pos.y) + sz.height - floor(double(sz.height) / 2.); - - out = getSubwindow(im, pos, sz); - - cv::Rect bbox(xs_1, ys_1, sz.width, sz.height); - bbox = bbox&cv::Rect(0, 0, im.cols - 1, im.rows - 1); - bbox.x = bbox.x - xs_1; - bbox.y = bbox.y - ys_1; - mask = cv::Mat(sz, CV_8UC1,cv::Scalar(1)); - mask(bbox) = cv::Scalar(0.0); -} - -/// -/// \brief DAT_TRACKER::getMotionPrediction -/// \param values -/// \param maxNumFrames -/// \return -/// -cv::Point DAT_TRACKER::getMotionPrediction(std::vectorvalues, int maxNumFrames){ - cv::Point2f pred(0, 0); - if (values.size() < 3){ - pred.x = 0; pred.y = 0; - } - else { - maxNumFrames = maxNumFrames + 2; - double A1 = 0.8; - double A2 = -1; - - std::vector V; - for (size_t i = std::max(0, int(int(values.size()) - maxNumFrames)); i < values.size(); ++i) - V.push_back(values[i]); - - std::vector P; - for (size_t i = 2; i < V.size(); ++i){ - P.push_back(cv::Point2f(A1*(V[i].x - V[i - 2].x) + A2*(V[i - 1].x - V[i - 2].x), - A1*(V[i].y - V[i - 2].y) + A2*(V[i - 1].y - V[i - 2].y))); - } - for (size_t i = 0; i < P.size(); ++i){ - pred.x += P[i].x; - pred.y += P[i].y; - } - pred.x = pred.x / P.size(); - pred.y = pred.y / P.size(); - } - return pred; -} - -/// -/// \brief DAT_TRACKER::getForegroundBackgroundProbs -/// \param frame -/// \param obj_rect -/// \param num_bins -/// \param bin_mapping -/// \param prob_lut -/// \param prob_map -/// -void DAT_TRACKER::getForegroundBackgroundProbs(cv::Mat frame, cv::Rect obj_rect, int num_bins, cv::Mat bin_mapping, cv::Mat &prob_lut, cv::Mat &prob_map) { - int imgCount = 1; - const int channels[] = { 0, 1, 2 }; - cv::Mat mask = cv::Mat(); - int dims = 3; - const int sizes[] = { num_bins, num_bins, num_bins }; - float bRange[] = { 0, 256 }; - float gRange[] = { 0, 256 }; - float rRange[] = { 0, 256 }; - const float *ranges[] = { bRange, gRange, rRange }; - - cv::Mat surr_hist, obj_hist; - cv::calcHist(&frame, imgCount, channels, mask, surr_hist, dims, sizes, ranges); - - int obj_col = round(obj_rect.x); - int obj_row = round(obj_rect.y); - int obj_width = round(obj_rect.width); - int obj_height = round(obj_rect.height); - - if ((obj_col + obj_width) > (frame.cols - 1)) - obj_width = (frame.cols - 1) - obj_col; - if ((obj_row + obj_height) > (frame.rows-1)) - obj_height = (frame.rows-1) - obj_row; - - cv::Mat obj_win; - cv::Rect obj_region(std::max(0, obj_col), std::max(0, obj_row), - obj_col + obj_width + 1 - std::max(0, obj_col), obj_row + obj_height + 1 - std::max(0, obj_row)); - obj_win = frame(obj_region); - cv::calcHist(&obj_win, imgCount, channels, mask, obj_hist, dims, sizes, ranges); - prob_lut = (obj_hist + 1.) / (surr_hist + 2.); - - prob_map = cv::Mat(frame.size(), CV_32FC1); - cv::Mat frame_bin; - cv::LUT(frame, bin_mapping, frame_bin); - - float *p_prob_map = prob_map.ptr(0); - cv::MatIterator_ it, end; - for (it = frame_bin.begin(), end = frame_bin.end(); it != end; ++it) - { - *p_prob_map++ = prob_lut.at((*it)[0], (*it)[1], (*it)[2]); - } -} - -/// -/// \brief DAT_TRACKER::getForegroundBackgroundProbs -/// \param frame -/// \param obj_rect -/// \param num_bins -/// \param prob_lut -/// -void DAT_TRACKER::getForegroundBackgroundProbs(cv::Mat frame, cv::Rect obj_rect, int num_bins, cv::Mat &prob_lut) { - int imgCount = 1; - const int channels[] = { 0, 1, 2 }; - cv::Mat mask = cv::Mat(); - int dims = 3; - const int sizes[] = { num_bins, num_bins, num_bins }; - float bRange[] = { 0, 256 }; - float gRange[] = { 0, 256 }; - float rRange[] = { 0, 256 }; - const float *ranges[] = { bRange, gRange, rRange }; - - cv::Mat surr_hist, obj_hist; - cv::calcHist(&frame, imgCount, channels, mask, surr_hist, dims, sizes, ranges); - - int obj_col = round(obj_rect.x); - int obj_row = round(obj_rect.y); - int obj_width = round(obj_rect.width); - int obj_height = round(obj_rect.height); - - if ((obj_col + obj_width) > (frame.cols - 1)) - obj_width = (frame.cols - 1) - obj_col; - if ((obj_row + obj_height) > (frame.rows - 1)) - obj_height = (frame.rows - 1) - obj_row; - - cv::Mat obj_win; - frame(cv::Rect(std::max(0, obj_col), std::max(0, obj_row), obj_width + 1, obj_height + 1)).copyTo(obj_win); - cv::calcHist(&obj_win, imgCount, channels, mask, obj_hist, dims, sizes, ranges); - prob_lut = (obj_hist + 1) / (surr_hist + 2); -} - -/// -/// \brief DAT_TRACKER::getAdaptiveThreshold -/// \param prob_map -/// \param obj_coords -/// \return -/// -double DAT_TRACKER::getAdaptiveThreshold(cv::Mat prob_map, cv::Rect obj_coords){ - obj_coords.width++; obj_coords.width = std::min(prob_map.cols - obj_coords.x, obj_coords.width); - obj_coords.height++; obj_coords.height = std::min(prob_map.rows - obj_coords.y, obj_coords.height); - cv::Mat obj_prob_map = prob_map(obj_coords); - int bins = 21; - float range[] = { -0.025, 1.025 }; - const float* histRange = { range }; - bool uniform = true; bool accumulate = false; - - cv::Mat H_obj, H_dist; - /// Compute the histograms: - cv::calcHist(&obj_prob_map, 1, 0, cv::Mat(), H_obj, 1, &bins, &histRange, uniform, accumulate); - - H_obj = H_obj / cv::sum(H_obj)[0]; - cv::Mat cum_H_obj = H_obj.clone(); - for (int i = 1; i < cum_H_obj.rows; ++i) - cum_H_obj.at(i, 0) += cum_H_obj.at(i-1, 0); - - cv::calcHist(&prob_map, 1, 0, cv::Mat(), H_dist, 1, &bins, &histRange, uniform, accumulate); - H_dist = H_dist - H_obj; - H_dist = H_dist / cv::sum(H_dist)[0]; - cv::Mat cum_H_dist = H_dist.clone(); - for (int i = 1; i < cum_H_dist.rows; ++i) - cum_H_dist.at(i, 0) += cum_H_dist.at(i - 1, 0); - - cv::Mat k(cum_H_obj.size(), cum_H_obj.type(), cv::Scalar(0.0)); - for (int i = 0; i < (k.rows-1); ++i) - k.at(i, 0) = cum_H_obj.at(i + 1, 0) - cum_H_obj.at(i, 0); - cv::Mat cum_H_obj_lt = (cum_H_obj < (1 - cum_H_dist)); - cum_H_obj_lt.convertTo(cum_H_obj_lt, CV_32FC1, 1.0/255); - cv::Mat x = abs(cum_H_obj - (1 - cum_H_dist)) + cum_H_obj_lt + (1 - k); - float xmin = 100; - int min_index = 0; - for (int i = 0; i < x.rows; ++i) { - if (xmin > x.at(i, 0)) - { - xmin = x.at(i, 0); - min_index = i; - } - } - //Final threshold result should lie between 0.4 and 0.7 to be not too restrictive - double threshold = std::max(.4, std::min(.7, cfg.adapt_thresh_prob_bins[min_index])); - return threshold; -} - -/// -/// \brief DAT_TRACKER::pos2rect -/// \param obj_center -/// \param obj_size -/// \param win_size -/// \return -/// -cv::Rect DAT_TRACKER::pos2rect(cv::Point obj_center, cv::Size obj_size, cv::Size win_size){ - cv::Rect rect(round(obj_center.x - obj_size.width / 2), round(obj_center.y - obj_size.height / 2), obj_size.width, obj_size.height); - cv::Rect border(0, 0, win_size.width - 1, win_size.height - 1); - return rect&border; -} - -/// -/// \brief DAT_TRACKER::pos2rect -/// \param obj_center -/// \param obj_size -/// \return -/// -cv::Rect DAT_TRACKER::pos2rect(cv::Point obj_center, cv::Size obj_size){ - cv::Rect rect(round(obj_center.x - obj_size.width / 2), round(obj_center.y - obj_size.height / 2), obj_size.width, obj_size.height); - return rect; -} - -/// -/// \brief DAT_TRACKER::default_parameters_dat -/// \param cfg -/// \return -/// -dat_cfg DAT_TRACKER::default_parameters_dat(dat_cfg cfg){ - for (double i = 0; i <= 20; i++) - cfg.adapt_thresh_prob_bins.push_back(i*0.05); - - cv::Mat lookUpTable(1, 256, CV_8U); - uchar* p = lookUpTable.data; - for (int i = 0; i < 256; ++i) - p[i] = uchar(i / (256 / cfg.num_bins)); - cfg.bin_mapping = lookUpTable; - return cfg; -} - -/// -/// \brief DAT_TRACKER::getSubwindow -/// \param frame -/// \param centerCoor -/// \param sz -/// \return -/// -cv::Mat DAT_TRACKER::getSubwindow(const cv::Mat &frame, cv::Point centerCoor, cv::Size sz) { - cv::Mat subWindow; - cv::Point lefttop(std::min(frame.cols - 1, std::max(-sz.width + 1, centerCoor.x - cvFloor(float(sz.width) / 2.0) + 1)), - std::min(frame.rows - 1, std::max(-sz.height + 1, centerCoor.y - cvFloor(float(sz.height) / 2.0) + 1))); - cv::Point rightbottom(lefttop.x + sz.width - 1, lefttop.y + sz.height - 1); - - cv::Rect border(-std::min(lefttop.x, 0), -std::min(lefttop.y, 0), - std::max(rightbottom.x - frame.cols + 1, 0), std::max(rightbottom.y - frame.rows + 1, 0)); - cv::Point lefttopLimit(std::max(lefttop.x, 0), std::max(lefttop.y, 0)); - cv::Point rightbottomLimit(std::min(rightbottom.x, frame.cols - 1), std::min(rightbottom.y, frame.rows - 1)); - - rightbottomLimit.x += 1; - rightbottomLimit.y += 1; - cv::Rect roiRect(lefttopLimit, rightbottomLimit); - - frame(roiRect).copyTo(subWindow); - - if (border != cv::Rect(0, 0, 0, 0)) - cv::copyMakeBorder(subWindow, subWindow, border.y, border.height, border.x, border.width, cv::BORDER_REPLICATE); - return subWindow; -} diff --git a/src/Tracker/dat/dat_tracker.hpp b/src/Tracker/dat/dat_tracker.hpp deleted file mode 100644 index 7b7e9ad38..000000000 --- a/src/Tracker/dat/dat_tracker.hpp +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "../VOTTracker.hpp" - -/// -/// \brief The dat_cfg struct -/// -struct dat_cfg -{ - bool show_figures = false; - int img_scale_target_diagonal = 75; - double search_win_padding = 2; - double surr_win_factor = 1.9; - int color_space = 1; //1rgb 2lab 3hsv 4gray - int num_bins = 16; - cv::Mat bin_mapping; //getBinMapping(cfg.num_bins); - double prob_lut_update_rate = 0.05; - bool distractor_aware = true; - std::vector adapt_thresh_prob_bins; // 0:0.05 : 1; - int motion_estimation_history_size = 5; - - int nms_scale = 1; - double nms_overlap = 0.9; - double nms_score_factor = 0.5; - bool nms_include_center_vote = true; -}; - -/// -/// \brief The DAT_TRACKER class -/// -class DAT_TRACKER : public VOTTracker -{ -public: - DAT_TRACKER(); - ~DAT_TRACKER(); - - void Initialize(const cv::Mat &im, cv::Rect region); - cv::RotatedRect Update(const cv::Mat &im, float& confidence); - void Train(const cv::Mat &im, bool first); - -protected: - void getNMSRects(cv::Mat prob_map, cv::Size obj_sz, double scale, - double overlap, double score_frac, cv::Mat dist_map, bool include_inner, - std::vector &top_rects, std::vector &top_vote_scores, std::vector &top_dist_scores); - - void getForegroundBackgroundProbs(cv::Mat frame, cv::Rect obj_rect, int num_bins, cv::Mat bin_mapping, cv::Mat &prob_lut, cv::Mat &prob_map); - - void getForegroundBackgroundProbs(cv::Mat frame, cv::Rect obj_rect, int num_bins, cv::Mat &prob_lut); - - cv::Mat getForegroundDistractorProbs(cv::Mat frame, cv::Rect obj_rect, std::vector distractors, int num_bins); - - double getAdaptiveThreshold(cv::Mat prob_map, cv::Rect obj_rect_surr); - - cv::Mat getForegroundProb(cv::Mat frame, cv::Mat prob_lut, cv::Mat bin_mapping); - - cv::Mat CalculateHann(cv::Size sz); - - double intersectionOverUnion(cv::Rect target_rect, cv::Rect candidates); - - void getSubwindowMasked(cv::Mat im, cv::Point pos, cv::Size sz, cv::Mat &out, cv::Mat &mask); - - cv::Point getMotionPrediction(std::vectorvalues, int maxNumFrames); - - cv::Rect pos2rect(cv::Point obj_center, cv::Size obj_size, cv::Size win_size); - - cv::Rect pos2rect(cv::Point obj_center, cv::Size obj_size); - - cv::Mat getSubwindow(const cv::Mat &frame, cv::Point centerCoor, cv::Size sz); - - dat_cfg default_parameters_dat(dat_cfg cfg); - -private: - dat_cfg cfg; - double scale_factor_; - cv::Mat prob_lut_; - cv::Mat prob_lut_distractor_; - cv::Mat prob_lut_masked_; - double adaptive_threshold_; - std::vectortarget_pos_history_; - std::vectortarget_sz_history_; -}; diff --git a/src/Tracker/graph/GTL/include/GTL/GTL.h b/src/Tracker/graph/GTL/include/GTL/GTL.h deleted file mode 100644 index 5bd2fc9a9..000000000 --- a/src/Tracker/graph/GTL/include/GTL/GTL.h +++ /dev/null @@ -1,143 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// GTL.h - Internal header: DO NO USE IT DIRECTLY !!! -// -//========================================================================== -// $Id: GTL.h,v 1.29 2008/02/03 18:17:08 chris Exp $ - -#ifndef GTL_GTL_H -#define GTL_GTL_H - -#include - -//-------------------------------------------------------------------------- -// Generic iteration over container elements -//-------------------------------------------------------------------------- -// -// elem: loop variable -// cont: container to iterate over -// iter_t: iterator type -// iter: prefix for begin() and end() -// -// contains a hack for Microsoft Visual C++ 5.0, because code like -// -// for(int i=0; i<10; ++i) { ... do something ... } -// for(int i=0; i<10; ++i) { ... do something again ... } -// -// is illegal with Microsoft Extensions enabled, but without Microsoft -// Extensions, the Microsoft STL does not work :-(. -// So we code the line number (__LINE__) into our loop variables. - -#define GTL_CONCAT(x, y) x##y -#define GTL_FORALL_VAR(y) GTL_CONCAT(GTL_FORALL_VAR, y) - -#define GTL_FORALL(elem, cont, iter_t, iter) \ -if ((cont).iter##begin() != (cont).iter##end()) \ - (elem) = *((cont).iter##begin()); \ -for (iter_t GTL_FORALL_VAR(__LINE__) = (cont).iter##begin(); \ - GTL_FORALL_VAR(__LINE__) != (cont).iter##end(); \ - (elem) = (++GTL_FORALL_VAR(__LINE__)) == \ - (cont).iter##end() ? (elem) : *GTL_FORALL_VAR(__LINE__)) - -//-------------------------------------------------------------------------- -// Configuration for GCC >= 2.8.0 -//-------------------------------------------------------------------------- - -// -// Using namespaces is the default; may be unset by one of the -// following configurations. -// - -#define __GTL_USE_NAMESPACES - -#ifdef __GNUC__ - -# define __GTL_GCC - -# if __GNUC__ == 2 && __GNUC_MINOR__ >= 8 - -# undef __GTL_USE_NAMESPACES - -# elif __GNUC__ < 3 - -# error "Need at least version 2.8.0 of GCC to compile GTL." - -# endif - -// -// 2/3/2008 chris: -// -// Enable comparison of iterators in debug mode -// - -# if __GNUC__ >= 4 -# undef _GLIBCXX_DEBUG -# endif -#endif - -//-------------------------------------------------------------------------- -// Configuration for Microsoft Visual C++ 5.0 -//-------------------------------------------------------------------------- - -#ifdef _MSC_VER -/* -# if _MSC_VER >= 1400 // Visual Studio 2005 - -# define _HAS_ITERATOR_DEBUGGING 0 -# define _CRT_SECURE_NO_DEPRECATE 1 -# define _SECURE_SCL 0 - -# endif -*/ -# if _MSC_VER >= 1100 - -# define __GTL_USE_NAMESPACES -# define __GTL_MSVCC - -# pragma warning( disable : 4786 ) -# pragma warning( disable : 4251 ) - -# define GTL_EXTERN -# else - -# error "Need at least version 5.0 of MS Visual C++ to compile GTL." - -# endif -#else - -# define GTL_EXTERN - -#endif - -//-------------------------------------------------------------------------- -// Namespaces -//-------------------------------------------------------------------------- - -#ifdef __GTL_USE_NAMESPACES - -# define __GTL_BEGIN_NAMESPACE namespace GTL { -# define __GTL_END_NAMESPACE } - -#else - -# define __GTL_BEGIN_NAMESPACE -# define __GTL_END_NAMESPACE - -#endif - -//-------------------------------------------------------------------------- -// Temporary hack until Graphlet (i.e. gcc) supports Namespaces -//-------------------------------------------------------------------------- - -#ifdef __GTL_USE_NAMESPACES - -namespace GTL {} - -#endif // __GTL_USE_NAMESPACES - -#endif // GTL_GTL_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/algorithm.h b/src/Tracker/graph/GTL/include/GTL/algorithm.h deleted file mode 100644 index f38fb9e5c..000000000 --- a/src/Tracker/graph/GTL/include/GTL/algorithm.h +++ /dev/null @@ -1,94 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// algorithm.h -// -//========================================================================== -// $Id: algorithm.h,v 1.14 2003/03/24 15:58:54 raitner Exp $ - -#ifndef GTL_ALGORITHM_H -#define GTL_ALGORITHM_H - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2003/03/24 15:58:54 $ - * $Revision: 1.14 $ - * - * @brief Abstract baseclass for all algoritm-classes. - */ -class GTL_EXTERN algorithm { -public: - /** - * @var algorithm::GTL_OK - * Used as (positive) return value of algorithm::check and - * algorithm::run. - */ - - /** - * @var algorithm::GTL_ERROR - * Used as (negative) return value of algorithm::check and - * algorithm::run. - */ - - /** - * @brief Return values for algorithm::check and algorithm::run - */ - enum { - GTL_OK = 1, - GTL_ERROR = 0 - }; - - /** - * @brief Creates an algorithm object. - */ - algorithm () { }; - - /** - * @brief Destroys the algorithm object. - */ - virtual ~algorithm () { }; - - /** - * @brief Applies %algorithm to %graph g. - * - * @param g %graph - * @retval algorithm::GTL_OK on success - * @retval algorithm::GTL_ERROR otherwise - */ - virtual int run (GTL::graph& g) = 0; - - /** - * @brief Checks whether all preconditions are satisfied. - * - * @em Please @em note: It is - * definitly required (and #run relies on it), - * that this method was called in advance. - * - * @param g %graph - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise. - */ - virtual int check (GTL::graph& g) = 0; - - /** - * @brief Resets %algorithm - * - * Prepares the %algorithm to be applied to - * another %graph. @em Please @em note: The options an - * %algorithm may support do @em not get reset by - * this. It is just to reset internally used datastructures. - */ - virtual void reset () = 0; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_ALGORITHM_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/bellman_ford.h b/src/Tracker/graph/GTL/include/GTL/bellman_ford.h deleted file mode 100644 index a6cd6c915..000000000 --- a/src/Tracker/graph/GTL/include/GTL/bellman_ford.h +++ /dev/null @@ -1,237 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bellman_ford.h -// -//========================================================================== -// $Id: bellman_ford.h,v 1.5 2003/03/24 15:58:54 raitner Exp $ - -#ifndef GTL_BELLMAN_FORD_H -#define GTL_BELLMAN_FORD_H - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - - -/** - * $Date: 2003/03/24 15:58:54 $ - * $Revision: 1.5 $ - * - * @brief Bellman Ford %algorithm. - * - * Implementation of the single source shortest path due to - * Bellman and Ford. Unlike Dijkstra's SSSP %algorithm this one - * allows negative edge weights, as long as there are no cycles - * with negative weight. If there are negative cycles this - * implementation finds them. - */ - -class GTL_EXTERN bellman_ford : public algorithm -{ -public: - - /** - * @brief Constructor. - */ - bellman_ford(); - - /** - * @brief Destructor. - */ - virtual ~bellman_ford(); - - /** - * @brief Checks whether the preconditions for Bellman Ford - * are satisfied. - * - * The Precondition are that the weights of the edges - * have been set and that the graph has at least one node. - * - * @param G graph. - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise. - */ - int check (GTL::graph& G); - - int run (GTL::graph& G); - - /** - * @brief Resets the algorithm. - * - * The weights are not reset. You can apply this algorithms - * twice without setting the weights for the second call. - */ - void reset (); - - /** - * @brief Sets source. - * - * The default source is the invalid %node (GTL::node::node()), - * in this case an arbitrary %node is chosen and stored when - * this algorithm is run. - * - * @param n source. - */ - void source (const node& n) {s = n;} - - /** - * @brief Returns source. - * - * @return source. - */ - node source () const {return s;} - - /** - * @brief Sets weights of the edges. - * - * This method @b must be called before run. - * - * @param w weights of the edges. - */ - void weights (const edge_map& weight) {w = weight; vars_set = true; } - - /** - * @brief Enables or disables the storing of predecessors. - * - * If enabled for every %node the predecessor on the shortest - * path from will be stored. - * - * @param set if true predecessors will be stored. - * @sa bellman_ford::predecessor_node, - * bellman_ford::predecessor_edge - */ - void store_preds (bool set); - - /** - * @brief Returns whether the storing of predecessors is enabled. - * - * @retval true iff the storing of predecessors is enabled. - * - * @sa bellman_ford::predecessor_node, - * bellman_ford::predecessor_edge - */ - bool store_preds () const {return preds != 0;} - - /** - * @brief Returns whether is reachable from source. - * - * @param n node - */ - bool reached (const node& n) const {return !inf[n];} - - /** - * @brief Returns the distance from source to @a n - * - * @param n node - */ - double distance (const node& n) const {return d[n];} - - /** - * @brief edge to predecessor of %node @a n on the shortest - * path from source - * - * If @a n is a root or wasn't reached the return value is - * the invalid %edge edge::edge(). - * - * @em Please @em note that this requires that this option - * was enabled during last run. - * - * @param n node. - * @return predecessor of @a n. - * @sa bellman_ford::store_preds - */ - edge predecessor_edge (const node& n) const - {assert (preds); return (*preds)[n];} - - /** - * @brief predecessor of %node @a n on the shortest - * path from source - * - * If @a n is a root or wasn't reached the return value is - * the invalid %node node::node(). - * - * @em Please @em note that this requires that this option - * was enabled during last run. - * - * @param n node. - * @return predecessor of @a n. - * @sa bellman_ford::store_preds - */ - node predecessor_node (const node& n) const - {edge e = predecessor_edge(n); return e == edge() ? node() : e.opposite(n); } - - /** - * @brief Returns whether there is a cycle with negative - * weight. - */ - bool negative_cycle() const - {return cycle;} - -private: - - - /** - * @brief Main method for Bellman Ford - * - * @param e edge to be relaxed - */ - void relax (const edge& e, bool dir); - - /** - * @brief Stores source. - * - * @sa bellman_ford::source. - */ - node s; - - /** - * @brief Stores the weights of the edges. - * - * @sa bellman_ford::weights. - */ - edge_map w; - - /** - * @brief Indicates whether weights were set. - * - * @sa bellman_ford::weights. - */ - bool vars_set = false; - - /** - * @brief distance from source s. - * - * @sa bellman_ford::distance. - */ - node_map d; - - /** - * @brief Indicates whether the node has distance infinity - * - * @sa bellman_ford::distance. - */ - node_map inf; - - /** - * @brief Stores father of each %node (if enabled) - * - * @sa bellman_ford::store_preds - */ - node_map* preds; - - /** - * @brief Indicates whether there is a cycle with negative - * weight - * - * @sa bellman_ford::negative_cycle. - */ - bool cycle = 0; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_BELLMAN_FORD_H diff --git a/src/Tracker/graph/GTL/include/GTL/bfs.h b/src/Tracker/graph/GTL/include/GTL/bfs.h deleted file mode 100644 index ea61f031f..000000000 --- a/src/Tracker/graph/GTL/include/GTL/bfs.h +++ /dev/null @@ -1,585 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bfs.h -// -//========================================================================== -// $Id: bfs.h,v 1.14 2003/03/24 15:58:54 raitner Exp $ - -#ifndef GTL_BFS_H -#define GTL_BFS_H - -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2003/03/24 15:58:54 $ - * $Revision: 1.14 $ - * - * @brief Breadth-First-Search (BFS) %algorithm. - * - * Encapsulates the BFS %algorithm together with all data - * produced by it. There are a few parameters, which on the one - * hand influence the behaviour of BFS (e.g. bfs::start_node) and - * on the other hand toggle the storing of extra information, - * such as the level-number of each %node. In detail these are: - * - bfs::start_node - * (default: an arbitrary %node will be chosen) - * - bfs::scan_whole_graph states whether BFS will be - * continued in the unused part of the %graph, if not all - * nodes were touched at the end of BFS started at the start-%node. - * (default: disabled) - * - bfs::calc_level toggle storing of level-numbers for each - * %node, i.e. its distance from the start-%node. - * (default: disabled) - * - bfs::store_preds toggle storing the predecessor of each - * %node, i.e. the father in the BFS-tree. (default: disabled) - * - bfs::store_non_tree_edges toggle storing of all non_tree_edges - * (tree_edges are always stored) in a list and thus enable or disable - * iteration through all non_tree_edges. - * (default: disabled) - * - * @em Please @em note that the %algorithm always starts with the - * given start-%node (if none was given, the first %node is chosen - * and stored, thus after BFS the root of the tree is always - * accesible via bfs::start_node) and continues until no more - * unused nodes are reachable from already used ones. Thus if the - * %graph isn't connected not @em all nodes will be reached. If - * bfs::scan_whole_graph isn't set the BFS stops here. If it is - * set, the BFS will be continued with the next unused %node and - * so on until all nodes were used. - * - * For further customization a few virtual functions, so called - * handler, are called at crucial stages of the %algorithm. In - * this basic implementation all of these handler are empty. So - * if one wants to add only a few lines of code (e.g. some new - * numbering) he is likely to take this class as base-class and - * override the handler where neccessary. In detail these are - * (please look at the source code to see where they are called): - * - bfs::init_handler - * - bfs::end_handler - * - bfs::popped_node_handler - * - bfs::finished_node_handler - * - bfs::unused_node_handler - * - bfs::used_node_handler - * - bfs::new_start_handler - * - * @em Please @em note: We do @em not claim that the set of - * handlers provided is sufficient in any way. So if you believe - * that some new handler is needed urgently please let us know. - * - * There is a lot of information stored during BFS (e.g. nodes in - * bfs-order, list of non-tree edges). Some of it can be obtained directly - * by using the corresponding member-function (e.g. bfs::bfs_num), - * but all information that can be thought of as a list (e.g. nodes in - * bfs-order) can be accessed through iterators. In detail these are (of - * course depending on what options are chosen!): - * - bfs::bfs_iterator - * - bfs::tree_edges_iterator - * - bfs::non_tree_edges_iterator - * - bfs::roots_iterator - */ -class GTL_EXTERN bfs : public algorithm -{ -public: - - /** - * @brief Constructor. - */ - bfs (); - - /** - * @brief Destructor. - */ - virtual ~bfs (); - - int run (GTL::graph& G); - - /** - * @brief Checks whether the preconditions for BFS are satisfied. - * - * Currently there aren't any restricitions for the BFS %algorithm. - * - * @param G graph. - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise. - */ - virtual int check (GTL::graph& /*G*/) { return GTL_OK; } - - virtual void reset (); - - //----------------------------------------------------------------------- - // Parameters - //----------------------------------------------------------------------- - - /** - * @brief Sets start-%node for BFS. - * - * The default start-%node is the invalid %node (GTL::node::node()), - * in this case an arbitrary %node is chosen and stored when - * BFS is run. - * - * @param n start-%node. - */ - void start_node (const node& n) {start = n;} - - /** - * @brief Returns start-%node for BFS. - * - * @return start-%node. - */ - node start_node () const {return start;} - - /** - * @brief Enables or disables scanning of the whole %graph. - * - * If enabled and the BFS started at the given start-%node - * stops without having touched all nodes, it will be - * continued with the next unused %node, and so on until all - * nodes were used. This makes sure that for every %node - * bfs::bfs_num is defined. - * - * If this feature is disabled, you are able to check what - * nodes can be reached, when starting a BFS at the - * start-%node, because for those not reached bfs::bfs_num - * will be 0. - * - * @param set if true enable scanning the whole %graph. - * @sa bfs::roots_begin, bfs::roots_end - */ - void scan_whole_graph (bool set) {whole_graph = set;} - - /** - * @brief Returns whether the whole graph will be scanned. - * - * @retval true iff the whole graph will be scanned. - * @sa bfs::roots_begin, bfs::roots_end - */ - bool scan_whole_graph () const {return whole_graph;} - - /** - * @brief Enables or disables the calculation of level-numbers for each - * %node. - * - * If enabled each %node gets a level-number, i.e. its - * distance from the start-%node. - * - * @param set if true level-number will be calculated. - * @sa bfs::level - */ - void calc_level (bool set); - - /** - * @brief Returns whether level-numbers will be calculated. - * - * @retval true iff level-numbers will be calculated. - * @sa bfs::level - */ - bool calc_level () const {return level_number != 0;} - - /** - * @brief Enables or disables the storing of non-tree-edges. - * - * If enabled all non-tree-edges will be stored in - * the order they occured. - * - * @param set if true non-tree-edges will be stored. - * @sa bfs::non_tree_edges_begin, bfs::non_tree_edges_end - */ - void store_non_tree_edges (bool set); - - /** - * @brief Returns whether the storing of non-tree-edges is - * enabled. - * - * @retval true iff the storing of non-tree-edges is enabled. - * @sa bfs::non_tree_edges_begin, bfs::non_tree_edges_end - */ - bool store_non_tree_edges () const {return non_tree != 0;} - - - /** - * @brief Enables or disables the storing of predecessors. - * - * If enabled for every %node the predecessor in the BFS-forest - * will be stored. - * - * @param set if true predecessors will be stored. - * @sa bfs::father - */ - void store_preds (bool set); - - /** - * @brief Returns whether the storing of predecessors is enabled. - * - * @retval true iff the storing of predecessors is enabled. - * @sa bfs::father - */ - bool store_preds () const {return preds != 0;} - - /** - * @brief Checks whether %node @a n was reached in BFS. - * - * @param n %node. - * @retval true iff @a n was reached. - */ - bool reached (const node& n) const - {return bfs_number[n] != 0;} - - /** - * @brief BFS-number of @a n. - * - * @em Please @em note that BFS-number 0 means that this %node wasn't - * reached. - * - * @param n %node. - * @return BFS-number of @a n. - */ - int bfs_num (const node& n) const - {return bfs_number[n];} - - /** - * @brief BFS-number of @a n. - * - * @em Please @em note that BFS-number 0 means that this %node wasn't - * reached. - * - * @param n %node. - * @return BFS-number of @a n. - */ - int operator[] (const node& n) const - {return bfs_number[n];} - - /** - * @brief Level-number of %node @a n. - * - * @em Please @em note that this requires that this option - * was enabled during last run. - * - * @param n node. - * @return level-number of @a n. - * @sa bfs::calc_level - */ - int level (const node& n) const - {assert (level_number); return (*level_number)[n];} - - /** - * @brief Father of %node @a n in BFS-forest. - * - * If @a n is a root in the forest or wasn't reached the - * return value is the invalid %node node::node(). - * - * @em Please @em note that this requires that this option - * was enabled during last run. - * - * @param n node. - * @return Father of @a n. - * @sa bfs::store_preds - */ - node father (const node& n) const - {assert (preds); return (*preds)[n];} - - /** - * @brief Iterator for tree-edges. - */ - typedef edges_t::const_iterator tree_edges_iterator; - - /** - * @brief Iterate through all tree-edges of last BFS. - * - * @em Please @em note that this edges not always form a - * tree. In case the %graph is not (strongly) connected and - * the whole graph was scanned, they form a forest. - * - * @return Start for iteration through all tree-edges. - */ - tree_edges_iterator tree_edges_begin () const - {return tree.begin();} - - /** - * @brief End-iterator for iteration through all tree-edges - * picked of last BFS. - * - * @return End for iteration through all tree-edges. - */ - tree_edges_iterator tree_edges_end () const - {return tree.end();} - - /** - * @brief Iterator for nodes in BFS-order. - */ - typedef nodes_t::const_iterator bfs_iterator; - - /** - * @brief Iterate through all (reached) nodes in BFS-Order. - * - * @return Start for iteration through all nodes in BFS-order. - */ - bfs_iterator begin () const - {return bfs_order.begin();} - - /** - * @brief End-iterator for iteration through all (reached) - * nodes in BFS-Order. - * - * @return End for iteration through all (reached) nodes - */ - bfs_iterator end () const - {return bfs_order.end();} - - /** - * @brief Iterator for non-tree-edges. - */ - typedef edges_t::const_iterator non_tree_edges_iterator; - - /** - * @brief Iterate through all non-tree-edges (if enabled). - * - * @return Start for iteration through all non-tree-edges. - * @sa bfs::store_non_tree_edges - */ - non_tree_edges_iterator non_tree_edges_begin () const - {assert (non_tree); return non_tree->begin(); } - - /** - * @brief End-iterator for iteration through all - * non-tree-edges (if enabled). - * - * @return End for iteration through all non-tree-edges. - * @sa bfs::store_non_tree_edges - */ - non_tree_edges_iterator non_tree_edges_end () const - {assert (non_tree); return non_tree->end(); } - - /** - * @brief Iterator for roots of trees in BFS-forest. - */ - typedef std::list::const_iterator roots_iterator; - - /** - * @brief Iterator pointing towards the first root in the - * BFS-forest. - * - * @em Please @em note that instead of pointing directly - * towards the %node (i.e. @c *it is of type @c node) - * the iterator points towards a bfs-iterator, which - * represents the root (i.e. @c *it is of type - * @c bfs_iterator). - * - * Using this technique makes it possible not only to obtain - * all the roots in the forest, but also the whole trees - * associated with each one. This can be achieved because a - * @c root_iterator specifies the exact position of the root - * in the BFS-ordering and by definition of BFS all the - * descendents of the root, i.e. the whole tree below, will - * come later in BFS, such that by incrementing the @c - * bfs_iterator a @c roots_iterator refers to, one can - * traverse the whole tree with this given root. - * - * Of course if the root isn't the last %node in the - * BFS-forest all following trees also will be traversed. But - * since the first %node of such a tree, that will be - * discovered, is its root, the successor of the @c - * roots_iterator can be used as end-iterator. - * - * @return Start for iteration through all roots in BFS-forest. - * @sa bfs::scan_whole_graph - */ - roots_iterator roots_begin () const - {return roots.begin();} - - /** - * @brief Iterator pointing to the end of all roots. - * - * @return End for iteration through all roots in BFS-forest. - * @sa bfs::scan_whole_graph - */ - roots_iterator roots_end () const - {return roots.end();} - - /** - * @brief Number of nodes reached in last BFS. - * - * @return Number of reached nodes. - * @sa bfs::scan_whole_graph - */ - int number_of_reached_nodes () const - {return reached_nodes;} - - //----------------------------------------------------------------------- - // Handler - //----------------------------------------------------------------------- - - /** - * @brief Called at the start of BFS. - * - * @param G %graph for which BFS was invoked. - */ - virtual void init_handler (GTL::graph& /*G*/) { }; - - /** - * @brief Called right before the end of BFS. - * - * @param G %graph for which BFS was invoked. - */ - virtual void end_handler (GTL::graph& /*G*/) { }; - - /** - * @brief Called after the %node @a n was taken out of the queue. - * - * @param G %graph for which BFS was invoked. - * @param n %node taken out of the queue. - */ - virtual void popped_node_handler (GTL::graph& /*G*/, GTL::node& /*n*/) { }; - - /** - * @brief Called when finished with the %node @a n. - - * A %node is finished after all its neighbors have been - * visited. - * - * @param G %graph for which BFS was invoked. - * @param n finished %node. - */ - virtual void finished_node_handler (GTL::graph& /*G*/, GTL::node& /*n*/) { }; - - /** - * @brief Called when an unused %node @a n was discovered. - * - * This means that the actual %node's @a f neighbor @a n was - * not previously discovered. - * - * @param G %graph for which BFS was invoked. - * @param n unused %node. - * @param f actual %node. - */ - virtual void unused_node_handler (GTL::graph& /*G*/, GTL::node& /*n*/, GTL::node& /*f*/) { }; - - /** - * @brief Called when an used %node @a n was found. - * - * This means that the actual %node's (@a f) neighbor @a n - * has already been discovered. - * - * @param G %graph for which BFS was invoked. - * @param n used %node. - * @param f actual %node. - */ - virtual void used_node_handler (GTL::graph& /*G*/, GTL::node& /*n*/, GTL::node& /*f*/) { }; - - /** - * @brief Called when BFS is started with start-%node - * @a n. - - * This is particularly useful when BFS was invoked with the - * @c scan_whole_graph option. - * - * @param G %graph for which BFS was invoked. - * @param n start-%node. - * @sa bfs::scan_whole_graph - */ - virtual void new_start_handler (GTL::graph& /*G*/, GTL::node& /*n*/) { }; - -private: - - void bfs_sub (GTL::graph&, const node&, GTL::edge_map*); - -protected: - - //----------------------------------------------------------------------- - // Data - //----------------------------------------------------------------------- - - /** - * @brief BFS number that will be assigned next. - */ - int act_bfs_num; - - /** - * @brief queue used in BFS. - */ - std::deque qu; - - /** - * @brief List of nodes in BFS-order - * - * @sa bfs::begin, bfs::end - */ - nodes_t bfs_order; - - /** - * @brief List of all edges of the BFS-tree - * - * @sa bfs::tree_edges_begin, bfs::tree_edges_end - */ - edges_t tree; - - /** - * @brief Stores BFS-number of nodes. - */ - node_map bfs_number; - - /** - * @brief Number of nodes reached so far. - */ - int reached_nodes; - - /** - * @brief List of all roots of the BFS-tree - * - * @sa bfs::roots_begin, bfs::roots_end - */ - std::list roots; - - //----------------------------------------------------------------------- - // Optional - //----------------------------------------------------------------------- - - /** - * @brief Stores whether whole %graph will be scanned. - * - * @sa bfs::scan_whole_graph - */ - bool whole_graph; - - /** - * @brief Stores start %node. - * - * @sa bfs:start_node - */ - node start; - - /** - * @brief Stores level number of each %node (if enabled) - * - * @sa bfs::calc_level - */ - node_map* level_number; - - /** - * @brief List of non-tree edges (if enabled) - * - * @sa bfs::store_non_tree_edges - */ - edges_t* non_tree; - - /** - * @brief Stores father of each %node (if enabled) - * - * @sa bfs::store_preds - */ - node_map* preds; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_BFS_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/biconnectivity.h b/src/Tracker/graph/GTL/include/GTL/biconnectivity.h deleted file mode 100644 index fcb98b0a8..000000000 --- a/src/Tracker/graph/GTL/include/GTL/biconnectivity.h +++ /dev/null @@ -1,328 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// biconnectivity.h -// -//========================================================================== -// $Id: biconnectivity.h,v 1.18 2003/03/26 13:37:14 raitner Exp $ - -#ifndef GTL_BICONNECTIVITY_H -#define GTL_BICONNECTIVITY_H - -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2003/03/26 13:37:14 $ - * $Revision: 1.18 $ - * - * @brief Biconnectivity-test and low-numbers. - * - * Obviously there is a close relationship between DFS and the testing of - * biconnectivity. Thus this test takes advantage of the possibility to - * add pieces of code to the DFS-class in order to calculate the - * low-numbers. - * - * As default no biconnected components will be stored and no edges - * will be added to make the graph biconnected. The test will run on the - * whole graph, even if it is not connected. - */ - -class GTL_EXTERN biconnectivity : public dfs -{ -public: - /** - * @brief Creates biconnectivity algorithm object. - * - * @see dfs::dfs - */ - biconnectivity (); - - /** - * @brief Destroys biconnectivity algorithm object. - * - * @see dfs::~dfs - */ - virtual ~biconnectivity () {} - - /** - * @brief Checks whether the algorithm can be applied. - * - * Necessary preconditions: - * - G is undirected. - * - storing of predecessors is enabled. - * - DFS may be applied - * - * @param G graph. - * @return algorithm::GTL_OK if binconnectivity-test can - * be applied to @a G. - * @sa dfs::scan_whole_graph, dfs::store_preds - */ - virtual int check (GTL::graph& G); - - virtual void reset (); - - /** - * @brief low-number. - * - * @param n node. - * @return low-number of n. - */ - int low_number (const node& n) const - {return low_num[n];} - - /** - * @brief Biconnectivity-test. - * - * @return true iff graph is biconnected. - */ - bool is_biconnected () const - {return num_of_components == 1;} - - /** - * @brief Returns whether the storing of components is enabled. - * - * @return true iff storing of components is enabled. - * @sa biconnectivity::components_begin, biconnectivity::components_end - */ - bool store_components () const - { return store_comp; } - - /** - * @brief Enables or disables the storing of biconnected components. - * - * If this feature is enabled, the whole graph will be scanned - * in order to get all the biconnected components even if the graph - * isn't connected. By default this feature is disabled. - * - * @param set if true each biconnected component will be stored. - * @sa biconnectivity::components_begin, biconnectivity::components_end - */ - void store_components (bool set) - { store_comp = set; if (set) scan_whole_graph (set); } - - /** - * @brief If enabled edges will be added to the graph in order to make it - * biconnected, if cutpoints are discovered. - * - * The list of added edges can be accessed via additional_begin and - * additional_end. - * - * @param set if true additional edges will we inserted - * to make the graph biconnected. - * @sa biconnectivity::additional_begin, biconnectivity::additional_end - */ - void make_biconnected (bool set) - { add_edges = set; if (set) scan_whole_graph (set); } - - /** - * @brief Returns whether addition of edges neccessary to make graph - * biconnected is enabled. - * - * @return true iff addition edges is enabled. - * @sa biconnectivity::additional_begin, biconnectivity::additional_end - */ - bool make_biconnected () const - { return add_edges; } - - /** - * @brief Begin of edges added to make graph biconnected. - * - * @return begin of additional edges - * @sa biconnectivity::make_biconnected - */ - edges_t::iterator additional_begin() - { return additional.begin (); } - - /** - * @brief End of edges added to make graph biconnected - * - * @return end of additional edges - * @sa biconnectivity::make_biconnected - */ - edges_t::iterator additional_end() - { return additional.end (); } - - /** - * @internal - */ - typedef nodes_t::iterator cutpoint_iterator; - - /** - * @brief Start iteration over all cutpoints found. - * - * A cutpoints is a node whose removal will disconnect the graph, - * thus a graph with no cutpoints is biconnected and vice versa. - * - * @return iterator to first cutpoint. - * @sa biconnectivity::cut_points_end - */ - cutpoint_iterator cut_points_begin () - { return cut_points.begin(); } - - /** - * @brief End of iteration over all cutpoints. - * - * @return one-past-the-end iterator. - * @sa biconnectivity::cut_points_begin - */ - cutpoint_iterator cut_points_end () - { return cut_points.end(); } - - - /** - * @internal - */ - typedef std::list >::iterator component_iterator; - - /** - * @brief Start iteration over all biconnected components (if enabled during - * last call to run). - * - * Components are represented as a pair consisting of - * a list of nodes and a list of edges, - * i.e. if it is of type component_iterator - * then *it is of type - * pair<list<node>,list<edge> >. - * - * @return iterator to first component - * @sa biconnectivity::store_components - */ - component_iterator components_begin () - { return components.begin(); } - - - /** - * @brief End of iteration over all biconnected components. - * - * @return end of iteration over biconnected components - * @sa biconnectivity::store_components - */ - component_iterator components_end () - { return components.end(); } - - /** - * @brief Number von biconnected components detected during the last run. - * - * @return number of biconnected components. - */ - int number_of_components () const - {return num_of_components; } - - //----------------------------------------------------------------------- - // Handler used to extend dfs to biconnectivity - //----------------------------------------------------------------------- - /** - * @internal - */ - virtual void init_handler (GTL::graph&); - - /** - * @internal - */ - virtual void entry_handler (GTL::graph&, GTL::node&, GTL::node&); - - /** - * @internal - */ - virtual void before_recursive_call_handler (GTL::graph&, GTL::edge&, GTL::node&); - - /** - * @internal - */ - virtual void after_recursive_call_handler (GTL::graph&, GTL::edge&, GTL::node&); - - /** - * @internal - */ - virtual void old_adj_node_handler (GTL::graph&, GTL::edge&, GTL::node&); - - /** - * @internal - */ - virtual void new_start_handler (GTL::graph&, GTL::node&); - - /** - * @internal - */ - virtual void leave_handler (GTL::graph&, GTL::node&, GTL::node&); - - /** - * @internal - */ - virtual void end_handler (GTL::graph&); - - -protected: - /** - * @internal - */ - edges_t self_loops; - - /** - * @internal - */ - node_map in_component; - - /** - * @internal - */ - node_map low_num; - /** - * @internal - */ - int num_of_components; - /** - * @internal - */ - bool store_comp; - /** - * @internal - */ - bool add_edges; - /** - * @internal - */ - node last; - /** - * @internal - */ - std::stack node_stack; - /** - * @internal - */ - std::stack edge_stack; - /** - * @internal - */ - std::list > components; - /** - * @internal - */ - nodes_t cut_points; - /** - * @internal - */ - node_map cut_count; - /** - * @internal - */ - edges_t additional; - /** - * @internal - */ - node_map first_child; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_BICONNECTIVITY_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/bid_dijkstra.h b/src/Tracker/graph/GTL/include/GTL/bid_dijkstra.h deleted file mode 100644 index bba5a6811..000000000 --- a/src/Tracker/graph/GTL/include/GTL/bid_dijkstra.h +++ /dev/null @@ -1,387 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bid_dijkstra.h -// -//========================================================================== -// $Id: bid_dijkstra.h,v 1.3 2003/03/24 15:58:54 raitner Exp $ - -#ifndef GTL_BID_DIJKSTRA_H -#define GTL_BID_DIJKSTRA_H - -#include -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2003/03/24 15:58:54 $ - * $Revision: 1.3 $ - * - * @brief Dijkstra's Algorithm for computing a shortest path from a single - * source to a single target. - * - * This class implements Dijkstra's algorithm in a bidirectional manner for - * computing a shortest path from a single source to a single target in - * \f$\mathcal{O}((|V| + |E|) log |V|)\f$ worst case. - * - * @sa dijkstra - * @sa bellman_ford - * - * @author Christian Bachmaier chris@infosun.fmi.uni-passau.de - */ -class GTL_EXTERN bid_dijkstra : public algorithm -{ -public: - /** - * @brief Iterator type for traversing %nodes on one shortest path. - */ - typedef nodes_t::const_iterator shortest_path_node_iterator; - - /** - * @brief Iterator type for traversing %edges on one shortest path. - */ - typedef edges_t::const_iterator shortest_path_edge_iterator; - - /** - * @internal - */ - enum node_color {white, grey, black}; - - /** - * @brief Default constructor. - * - * Enables only the calculation of shortest paths. - * - * @sa algorithm::algorithm - */ - bid_dijkstra(); - - /** - * @brief Destructor. - * - * @sa algorithm::~algorithm - */ - virtual ~bid_dijkstra(); - - /** - * @brief Sets source and target %node. - * - * Must be executed every time before check and run of this %algorithm. - * - * @param s source %node - * @param t target %node - */ - void source_target(const node& s, const node& t); - - /** - * @brief Sets weights of the edges. - * - * This method @b must be called before check and run. - * - * @param weight weights of the %edges - */ - void weights(const edge_map& weight); - - /** - * @brief Enables or disables the storing of the shortest path. - * - * If enabled for every %node and edge on the shortest path from source - * to target will be stored. - * - * @param set true if path should be stored - * - * @sa dijkstra::predecessor_node - * @sa dijkstra::predecessor_edge - */ - void store_path(bool set); - - /** - * @brief Checks whether the preconditions for bidirectional Dijkstra are - * satisfied. - * - * The Precondition are that the weights of the edges have been set and - * that the graph has at least one %node. Additionally all %edge weights - * must be \f$\ge 0\f$ and and source and target %nodes must be found in - * @p G. - * - * @param G graph - * - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise - * - * @sa dijkstra::source - * @sa dijkstra::weigths - * @sa algorithm::check - */ - virtual int check(GTL::graph& G); - - /** - * @brief Runs shortest path algorithm on @p G. - * - * This should return always algorithm::GTL_OK. The return value only - * tracks errors that might occur. - * Afterwards the result of the test can be accessed via access methods. - * - * @param G graph - * - * @retval algorithm::GTL_OK on success - * @retval algorithm::GTL_ERROR otherwise - * - * @sa algorithm::run - */ - int run(GTL::graph& G); - - /** - * @brief Returns source %node. - * - * @return source %node - */ - node source() const; - - /** - * @brief Returns target %node if set, node::node() else. - * - * @return target %node - */ - node target() const; - - /** - * @brief Returns whether the storing of the shortest path is enabled. - * - * @return @c true iff the storing of path is enabled. - * - * @sa dijkstra::predecessor - */ - bool store_path() const; - - /** - * @brief Returns whether target is reachable from source. - * - * @return @c true iff target was reached from source - */ - bool reached() const; - - /** - * @brief Returns the distance from source %node to target %node. - * - * @return distance if target is bid_dijkstra::reached, -1.0 - * else - */ - double distance() const; - - /** - * @brief Returns an iterator to the beginning (to the source %node) of - * the shortest %node path to target %node. - * - * @return beginning %node iterator of the shortest path - * - * @sa bid_dijkstra::store_path - * - * @note The method requires that path calculation option was - * enabled during last run. - */ - shortest_path_node_iterator shortest_path_nodes_begin(); - - /** - * @brief Returns an iterator one after the end (one after target - * %node) of the shortest %node path to target %node. - * - * @return shortest path end %node iterator - * - * @sa bid_dijkstra::store_path - * - * @note The method requires that path calculation option was - * enabled during last run. - */ - shortest_path_node_iterator shortest_path_nodes_end(); - - /** - * @brief Returns an iterator to the beginning %edge of the shortest - * %edge path to target %node. - * - * @sa bid_dijkstra::store_path - * - * @return beginning %edge iterator of the shortest path - * - * @note The method requires that path calculation option was - * enabled during last run. - */ - shortest_path_edge_iterator shortest_path_edges_begin(); - - /** - * @brief Returns an iterator one after the end of a shortest %edge path - * to target %node. - * - * @sa bid_dijkstra::store_path - * - * @return shortest path end %edge iterator - * - * @note The method requires that predecessor calculation option was - * enabled during last run. - */ - shortest_path_edge_iterator shortest_path_edges_end(); - - /** - * @brief Resets Dijkstra's bidirectional algorithm. - * - * It prepares the algorithm to be applied again, possibly to another - * graph. - * - * @note The weights are not reset. You can apply this algorithms - * - * @sa algorithm::reset - */ - virtual void reset(); -private: - /** - * @internal - * Stores source. - * - * @sa bid_dijkstra::source. - */ - node s; - - /** - * @internal - * Stores target. - * - * @sa bid_dijkstra::source. - */ - node t; - - /** - * @internal - * Indicates whether weights were set. - * - * @sa bid_dijkstra::weights. - */ - bool weights_set; - - /** - * @internal - * Indicates whether predecessors should be computed. - * - * @sa bid_dijkstra::store_preds. - */ - bool path_set; - - /** - * @internal - * Stores the weights of the %edges. - * - * @sa bid_dijkstra::weights. - */ - edge_map weight; - - /** - * @internal - * Stores distance between @s and @t. - * (default: -1.0) - */ - double dist; - - /** - * @internal - * Stores if @a t can be reached from @s. - * (default: false) - */ - bool reached_t; - - /** - * @internal - * Stores predecessor of each %node in shortest path. - * (default: edge() (if enabled)) - */ - node_map pred; - - /** - * @internal - * Stores successor of each %node in shortest path tree. - * (default: edge() (if enabled)) - */ - node_map succ; - - /** - * @internal - * Indicates the current %node status. - * (default: black) - */ - node_map source_mark; - - /** - * @internal - * Indicates the current %node status. - * (default: black) - */ - node_map target_mark; - - /** - * @internal - * Distance from source @a s. - * (default: -1.0) - */ - node_map source_dist; - - /** - * @internal - * Distance to target @a t. - * (default: -1.0) - */ - node_map target_dist; - - /** - * @internal - * Stores for target %node @a t a list of nodes on the shortest path - * from source @a s to it. - * (default: empty) - * - * @sa dijkstra::shortest_path_nodes_begin - * @sa dijkstra::shortest_path_nodes_end - */ - nodes_t shortest_path_node_list; - - /** - * @internal - * Stores for target %node @a t a list of edges on the shortest path - * from source @a s to it. - * (default: empty) - * - * @sa dijkstra::shortest_path_edges_begin - * @sa dijkstra::shortest_path_edges_end - */ - edges_t shortest_path_edge_list; - - /** - * @internal - * Prepares the %algorithm to be applied once again. - */ - void reset_algorithm(); - - /** - * @internal - * Inits data structure. - */ - void init(GTL::graph& G); - - /** - * @internal - * Fills ordered lists @a shortest_path_node_list and @a - * shortest_path_edge_list with nodes respective edges of shortest path - * from @a s to @a t. Calculates distance. - * - * @param n first white node of the two directions - */ - void fill_node_edge_lists(const node& n); -}; - -__GTL_END_NAMESPACE - -#endif // GTL_BID_DIJKSTRA_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/bin_heap.h b/src/Tracker/graph/GTL/include/GTL/bin_heap.h deleted file mode 100644 index 4ea90ac62..000000000 --- a/src/Tracker/graph/GTL/include/GTL/bin_heap.h +++ /dev/null @@ -1,425 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bin_heap.h -// -//========================================================================== -// $Id: bin_heap.h,v 1.10 2003/01/07 07:01:05 chris Exp $ - -#ifndef GTL_BIN_HEAP_H -#define GTL_BIN_HEAP_H - -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - * Node type of container. - */ -template -class heap_node -{ -public: - /** - * @internal - * Default constructor. - */ - heap_node() - { - } - - /** - * @internal - */ - heap_node(const T& n) : data(n) - { - } - - /** - * @internal - * Data member. - */ - T data; - - /** - * @internal - * Position in container. - */ - int pos = 0; -}; - - -/** - * @brief Binary heap. - * - * @author Christian Bachmaier chris@infosun.fmi.uni-passau.de - */ -template -class bin_heap -{ -public: - /** - * @brief Creates empty binary heap. - * - * @param prd binary predicate to compare two Ts - */ - bin_heap(const Pred& prd); - - /** - * @brief Creates empty binary heap. - * - * @param prd binary predicate to compare two @c Ts - * @param est_size estimated maximal size of heap - */ - bin_heap(const Pred& prd, const int est_size); - - /** - * @brief Copy constructor. - * - * @param bh binary heap to copy - */ - bin_heap(const bin_heap& bh); - - /** - * @brief Assigns @c bh to this binary heap. - * - * All elements in this heap will be deleted. The predicate of this heap - * must be physically the same as the one of @p bh. - * - * @param bh binary heap - * - * @return this heap - */ - bin_heap& operator=(const bin_heap& bh); - - /** - * @brief Destructor. - */ - ~bin_heap(); - - /** - * @brief Inserts @p ins in heap. - * - * @param ins data element to be inserted - */ - void push(const T& ins); - - /** - * @brief Removes the element on top of the heap. - */ - void pop(); - - /** - * @brief Returns a reference to the element at the top of the heap. - * - * @return top element of the heap - */ - const T& top() const; - - /** - * @brief Reconstructs heap condition after changing key value of @p - * cha externally. - * - * @param cha element with changed key value - * - * @note @c changeKey doesn't operate if @p cha is a primitive data - * structure, because it represents its key value itself, or if one - * object is stored more than once in the data structure. - * - * @sa dijkstra - */ - void changeKey(const T& cha); - - /** - * @brief Checks if heap is empty. - * - * @return @c true iff empty - */ - bool is_empty() const; - - /** - * @internal - * Makes heap empty. - */ - void clear(); -private: - /** - * @internal - * Binary predicate to compare two T's. - */ - const Pred& prd; - - /** - * @internal - * Next free position in @a container. - */ - int size; - - /** - * @internal - * Estimated maximum size of @a container. Initially set to estimated - * size of user in constructor #bin_heap. - */ - int capacity; - - /** - * @internal - * Data container. - */ - std::vector* > container; - - /** - * @internal - * Mapping between data member T and its heap_node. - */ - std::map* > heap_node_map; - - /** - * @internal - * Reconstructs heap condition with bubbling up heap_node @p n. - */ - void bubble_up(heap_node* const n); - - /** - * @internal - * Reconstructs heap condition with bubbling down heap_node @p n. - */ - void bubble_down(heap_node* const n); -#ifdef _DEBUG -public: - /** - * @internal - * Prints @a container for debug purposes. - */ - void print_data_container(); -#endif // _DEBUG -}; - -// Implementation Begin - -template -bin_heap::bin_heap(const Pred& prd) : - prd(prd), size(0), capacity(50) -{ - container.resize(capacity); -} - - -template -bin_heap::bin_heap(const Pred& prd, const int est_size) : - prd(prd), size(0), capacity(50) -{ - if (est_size > 50) - { - capacity = est_size; - } - container.resize(capacity); -} - - -template -bin_heap::bin_heap(const bin_heap& bh) : - prd(bh.prd), size(bh.size), capacity(bh.capacity) -{ - container.resize(capacity); - for (int i = 0; i < size; ++i) - { - container[i] = new heap_node(bh.container[i]->data); - } -} - - -template -bin_heap& bin_heap::operator=(const bin_heap& bh) -{ - if (this != &bh) // no self assignment - { - assert(&prd == &(bh.prd)); - clear(); - size = bh.size; - capacity = bh.capacity; - container.resize(capacity); - for (int i = 0; i < size; ++i) - { - container[i] = new heap_node(bh.container[i]->data); - } - } - return *this; -} - - -template -bin_heap::~bin_heap() -{ - clear(); -} - - -template -void bin_heap::push(const T& ins) -{ - if (size == capacity) - { - // dynamic memory allocation - capacity *= 2; - container.resize(capacity); - } - heap_node* n = new heap_node(ins); - n->pos = size; - container[size] = n; - heap_node_map[ins] = n; - ++size; - bubble_up(n); -} - - -template -void bin_heap::pop() -{ - assert(size > 0); - // save smallest element for return (ensured by heap condition) - heap_node_map.erase(container[0]->data); - delete container[0]; - // replace by last element in array and decrease heap "size" - if (size > 1) - { - container[0] = container[--size]; - container[0]->pos = 0; - // reorder heap to ensure heap conditions - bubble_down(container[0]); - } - else - { - size = 0; - } -} - - -template -const T& bin_heap::top() const -{ - return container[0]->data; -} - - -template -void bin_heap::changeKey(const T& cha) -{ - int pos = heap_node_map[cha]->pos; - heap_node* n = container[pos]; - if (pos != 0) - { - heap_node* father = container[(pos - 1) / 2]; - if (prd(n->data, father->data)) - { - bubble_up(n); - return; - } - } - bubble_down(n); -} - - -template -bool bin_heap::is_empty() const -{ - // empty if if first free index is 0 - return size == 0; -} - - -template -void bin_heap::clear() -{ - for (int i = 0; i < size; ++i) - { - delete container[i]; - } - size = 0; - heap_node_map.clear(); -} - - -template -void bin_heap::bubble_up(heap_node* const n) -{ - int pos = n->pos; - // if we are not already at top AND the parent in heap is more - while ((pos != 0) && - (prd(n->data, container[(pos - 1) / 2]->data))) - { - // move father down - container[pos] = container[(pos - 1) / 2]; - container[pos]->pos = pos; - // increment k to parent index - pos = (pos - 1) / 2; - } - // place value in its highest position in heap - container[pos] = n; - container[pos]->pos = pos; -} - - -template -void bin_heap::bubble_down(heap_node* const n) -{ - int pos = n->pos; - int j = 0; - while (pos < size / 2) - { - j = 2 * pos + 1; - // if right child is smaller than left child get right child - if ((j < size - 1) && - (prd(container[j + 1]->data, container[j]->data))) - { - ++j; - } - // if element is less or equal than its child leave it here - if (!prd(container[j]->data, n->data)) - { - break; - } - // else move its child up - container[pos] = container[j]; - container[pos]->pos = pos; - // repeat for new position - pos = j; - } - // place element into position, where heap condition is fulfilled - container[pos] = n; - container[pos]->pos = pos; -} - -#ifdef _DEBUG -template -void bin_heap::print_data_container() -{ - if (size == 0) - { - cout << "empty"; - } - else - { - for (int pos = 0; pos < size; ++pos) - { - cout << container[pos]->data << " "; - } - } - cout << endl; -} -#endif // _DEBUG - -// Implementation End - -__GTL_END_NAMESPACE - -#endif // GTL_BIN_HEAP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/components.h b/src/Tracker/graph/GTL/include/GTL/components.h deleted file mode 100644 index f1a07d36e..000000000 --- a/src/Tracker/graph/GTL/include/GTL/components.h +++ /dev/null @@ -1,133 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// components.h -// -//========================================================================== -// $Id: components.h,v 1.5 2003/04/03 11:44:42 raitner Exp $ - -#ifndef GTL_COMPONENTS_H -#define GTL_COMPONENTS_H - -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE -/** - * @brief Connected components algorithm - */ -class GTL_EXTERN components : public dfs -{ -public: - /** - * @brief Creates connected components algorithm object. - * - * @sa dfs::dfs - */ - components (); - - /** - * @brief Destroys connected components algorithm object. - * - * @sa dfs::~dfs - */ - virtual ~components () {} - - /** - * @brief Checks whether the connected components algorithm can be applied - * - * Necessary preconditions: - * - G is undirected. - * - scanning of whole graph is enabled. - * - DFS may be applied - * - * @param G graph. - * @return algorithm::GTL_OK if connected components can be computed for G. - * @sa dfs::scan_whole_graph - */ - virtual int check (GTL::graph& G); - - virtual void reset (); - - /** - * @internal - */ - typedef std::list >::iterator component_iterator; - - /** - * @brief Start iteration over all components (if enabled during - * last call to run). - - * Components are represented as a pair consisting of - * a list of nodes and a list of edges, - * i.e. if @c it is of type @c component_iterator - * then @c *it is of type - * @c pair<list<node>,list<edge> >. - * - * @return iterator to first component - */ - component_iterator components_begin () - { return comp.begin(); } - - - /** - * @brief End of iteration over all components. - * - * @return end of iteration over biconnected components - * @sa biconnectivity::store_components - */ - component_iterator components_end () - { return comp.end(); } - - /** - * @brief Number of components detected during the last run. - * - * @return number of components. - */ - int number_of_components () const - {return num_of_components; } - - //----------------------------------------------------------------------- - // Handler used to extend dfs to biconnectivity - //----------------------------------------------------------------------- - /** - * @internal - */ - virtual void before_recursive_call_handler (GTL::graph&, GTL::edge&, GTL::node&); - - /** - * @internal - */ - virtual void old_adj_node_handler (GTL::graph&, GTL::edge&, GTL::node&); - - /** - * @internal - */ - virtual void new_start_handler (GTL::graph&, GTL::node&); - - -protected: - - /** - * @internal - */ - int num_of_components; - /** - * @internal - */ - std::list > comp; - /** - * @internal - */ - component_iterator li; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_BICONNECTIVITY_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/debug.h b/src/Tracker/graph/GTL/include/GTL/debug.h deleted file mode 100644 index ea0ae6696..000000000 --- a/src/Tracker/graph/GTL/include/GTL/debug.h +++ /dev/null @@ -1,48 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// debug.h - Functions, which are useful for debugging -// -//========================================================================== -// $Id: debug.h,v 1.8 2001/10/10 08:30:00 chris Exp $ - -#ifndef GTL_DEBUG_H -#define GTL_DEBUG_H - -#include - -#include - -__GTL_BEGIN_NAMESPACE - -// -// If _DEBUG is defined the funtions defined here will produce output. -// You can either define _DEBUG here (or undef it) or you can set it as -// option of your compiler. -// -//#define _DEBUG 1 -//#undef _DEBUG -// - -/** - * @internal - */ -class GTL_EXTERN GTL_debug { -public: - static void debug_message (const char*, ...); - static void init_debug(); - static void close_debug(); - static std::ostream& os() - { return *GTLerr; } - -private: - static std::ostream* GTLerr; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_DEBUG_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/dfs.h b/src/Tracker/graph/GTL/include/GTL/dfs.h deleted file mode 100644 index 0355d3de8..000000000 --- a/src/Tracker/graph/GTL/include/GTL/dfs.h +++ /dev/null @@ -1,571 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// dfs.h -// -//========================================================================== -// $Id: dfs.h,v 1.25 2003/03/24 15:58:54 raitner Exp $ - -#ifndef GTL_DFS_H -#define GTL_DFS_H - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2003/03/24 15:58:54 $ - * $Revision: 1.25 $ - * - * @brief Depth-First-Search (DFS) %algorithm - * - * Encapsulates the DFS %algoritm together with all the data - * produced by a run of DFS. Since there exits so much different - * things which one might want to calculate during a DFS this - * class provides basically two different customization - * features. First it is possible to take influence on the - * behaviour of this %algortihm by changing some of the following - * options: - * - dfs::start_node - * (default: an arbitrary %node will be chosen) - * - dfs::scan_whole_graph states whether BFS will be - * continued in the unused part of the %graph, if not all - * nodes were touched at the end of DFS started at the start-%node. - * (default: disabled) - * - dfs::calc_comp_num toggle storing of completion-numbers - * for each %node, i.e. a numbering which reflects the order in which - * nodes were @em finished. (default: disabled) - * - dfs::store_preds toggle storing the predecessor of each - * %node, i.e. the father in DFS-tree. (default: disabled) - * - dfs::store_non_tree_edges toggle storing of all non-tree-edges - * (tree-edges are always stored) in a list and thus enable or disable - * iteration through all non-tree-edges. - * (default: disabled) - * - * But the trouble with most DFS-%algorithm is that one always - * wants to add a little bit of code somewhere in the - * %algorithm. And then there are only two ways to get this - * done. The more efficient one (in terms of runtime) is to - * implement the DFS anew and add the new code where - * necessary. The other way (which is more efficient in terms of - * code-writing) is to take the %algorithm as provided and run - * through the list of nodes it returns (resulting in an extra - * factor of 2). - * - * Our DFS-%algoritm class provides a new method to add small - * pieces of code to the %algorithm: Handler. These are virtual - * functions called at well-defined, important states of the - * %algorithm (e.g. before a new recursive call). So the only - * thing to do is to derive your extended DFS from this class and - * to override the handlers where needed. In detail there are the - * following handler supported (have a look at the source code - * for details): - * - dfs::init_handler - * - dfs::end_handler - * - dfs::entry_handler - * - dfs::leave_handler - * - dfs::before_recursive_call_handler - * - dfs::after_recursive_call_handler - * - dfs::old_adj_node_handler - * - dfs::new_start_handler - * - * @em Please @em note: We do @em not claim that this set of handlers - * is sufficient in any way. So if you believe that some new handler is - * needed urgently please let us know. - * - * There is a lot of information stored during DFS (e.g. nodes in - * dfs-order, list of non-tree-edges). Some of it can be obtained directly - * by using the corresponding member-function (e.g. dfs::dfs_num), - * but all information that can be thought of as a list (e.g. nodes in - * dfs-order) can be accessed through iterators. In detail these are (of - * course depending on what options are chosen!): - * - dfs::dfs_iterator - * - dfs::tree_edges_iterator - * - dfs::non_tree_edges_iterator - * - dfs::roots_iterator - */ -class GTL_EXTERN dfs : public algorithm -{ -public: - /** - * @brief Constructor. - */ - dfs (); - - - /** - * @brief Destructor. - */ - virtual ~dfs (); - - int run (GTL::graph& G); - - /** - * @brief Checks whether the preconditions for DFS are - * satisfied. - * - * Currently there aren't any restricitions for the DFS - * %algorithm. - * - * @param G graph. - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise. - */ - virtual int check (GTL::graph& G); - - virtual void reset (); - - - //--------------------------------------------------------------------- - // Parameters - //--------------------------------------------------------------------- - - /** - * @brief Sets start-%node for DFS. - * - * @param n start-node. - */ - void start_node (const node& n) - { start = n; } - - /** - * @brief Returns start-%node for DFS. - * - * @return start-%node. - */ - node start_node () const {return start;} - - /** - * @brief Enables or disables scanning of the whole %graph. - * - * If enabled and the DFS started at the given start-%node - * stops without having touched all nodes, it will be - * continued with the next unused %node, and so on until all - * nodes were used. This makes sure that for every %node - * #dfs_number is defined. - * - * On the other hand, if this feature is disabled, one - * will be able to check what nodes can be reached, when - * starting a DFS at the start-%node, because for those not - * reached #dfs_number will be 0. - * - * @param set if true enable scanning the whole graph. - * @sa dfs::roots_begin - * @sa dfs::roots_end - */ - void scan_whole_graph (bool set) {whole_graph = set;} - - /** - * @brief Returns true iff the whole graph will be scanned. - * - * @retval true iff the whole graph will be scanned. - * @sa dfs::roots_begin - * @sa dfs::roots_end - */ - bool scan_whole_graph () const {return whole_graph;} - - /** - * @brief Enables or Disables the calculation of the completion number. - * - * @param set if true completion-numbers will be calculated. - * @sa dfs::comp_num - */ - void calc_comp_num (bool set); - - /** - * @brief Returns true iff completion-numbers will be calculated. - * - * @retval true iff completion-numbers will be calculated. - * @sa dfs::comp_num - */ - bool calc_comp_num () const {return comp_number != 0;} - - - /** - * @brief Enables or disables the storing of predecessors. - * - * If enabled for every %node the predecessor in DFS will be - * stored. - * - * @param set if true predecessors will be stored. - * @sa dfs::father - */ - void store_preds (bool set); - - /** - * @brief Returns true iff the storing of predecessors is enabled. - * - * @retval true iff the storing of predecessors is enabled. - * @sa dfs::father - */ - bool store_preds () const {return preds != 0;} - - /** - * @brief Enables the storing of back-edges. - * - * If enabled the list of non-tree-edges can be traversed in - * the order they occured using #non_tree_edges_iterator. - * - * @param set if true non_tree_edges will be stored. - * @sa dfs::non_tree_edges_begin - * @sa dfs::non_tree_edges_end - */ - void store_non_tree_edges (bool set); - - /** - * @brief Returns true iff the storing of non-tree-edges is enabled. - * - * @return true iff the storing of non-tree-edges is enabled. - * @sa dfs::non_tree_edges_begin - * @sa dfs::non_tree_edges_end - */ - bool store_non_tree_edges () const {return back_edges != 0;} - - //--------------------------------------------------------------------- - // Access - //---------------------------------------------------------------------- - - /** - * @brief Checks whether %node @a n was reached in last DFS. - * - * @param n %node to be checked. - * @return true iff @a n was reached. - */ - bool reached (const node& n) const - {return dfs_number[n] != 0;} - - /** - * @brief DFS-Number of @a n. - * - * Please note that DFS-Number 0 means that this %node wasn't - * reached. - * - * @param n %node. - * @return DFS-Number of @a n. - */ - int dfs_num (const node& n) const - {return dfs_number[n];} - - /** - * @brief DFS-Number of @a n. - * - * Please note that DFS-Number 0 means that this %node wasn't - * reached. - * - * @param n %node. - * @return DFS-Number of @a n. - */ - int operator[] (const node& n) const - {return dfs_number[n];} - - /** - * @brief Completion-number of %node @a n, if enabled in last - * run. - * - * @param n %node. - * @return Completion-number of @a n. - * @sa dfs::calc_comp_num - */ - int comp_num (const node& n) const - {assert (comp_number); return (*comp_number)[n];} - - /** - * @brief Returns father of node @a n in DFS-forest. - * - * If @a n is a root in the forest or wasn't reached the - * return value is @c node(). - * - * @param n %node. - * @return Father of @a n. - * @sa dfs::store_preds - */ - node father (const node& n) const - {assert (preds); return (*preds)[n];} - - /** - * @brief Iterator for the tree edges of the DFS-tree. - */ - typedef edges_t::const_iterator tree_edges_iterator; - - /** - * @brief Iterate through all edges picked in last DFS. - * - * Please note that this edges not always form a tree. In - * case the %graph is not (strongly) connected they form a - * forest. - * - * @return start for iteration through all edges followed in DFS. - */ - tree_edges_iterator tree_edges_begin () const - {return tree.begin();} - - /** - * @brief End-iterator for iteration through all edges picked in last DFS. - * - * @return end for iteration through all edges followed in DFS. - */ - tree_edges_iterator tree_edges_end () const - {return tree.end();} - - /** - * @brief Iterator for the (reached) nodes in DFS-order. - */ - typedef nodes_t::const_iterator dfs_iterator; - - /** - * @brief Iterate through all (reached) nodes in DFS-order. - * - * @return start for iteration through all nodes in DFS-order. - */ - dfs_iterator begin () const - {return dfs_order.begin();} - - /** - * @brief End-Iterator for iteration through all (reached) - * nodes in DFS-order. - * - * @return end for iteration through all (reached) nodes - */ - dfs_iterator end () const - {return dfs_order.end();} - - /** - * @brief Iterator for the non-tree-edges - */ - typedef edges_t::const_iterator non_tree_edges_iterator; - - /** - * @brief Iterate through all non-tree-edges (if enabled). - * - * @return start for iteration through all non-tree-edges. - * @sa dfs::store_non_tree_edges - */ - non_tree_edges_iterator non_tree_edges_begin () const - {assert (back_edges); return back_edges->begin(); } - - /** - * @brief End-iterator for iteration through all - * non-tree-edges (if enabled). - * - * @return end for iteration through all non-tree-edges. - * @sa dfs::store_non_tree_edges - */ - non_tree_edges_iterator non_tree_edges_end () const - {assert (back_edges); return back_edges->end(); } - - /** - * @brief Iterator for the roots of the DFS-forest. - */ - typedef std::list::const_iterator roots_iterator; - - /** - * @brief Iterator pointing towards the first root in the DFS-forest. - * - * Please note that intstead of pointing directly - * towards the node (i.e. @c *it is of type node) the - * iterator points towards a #dfs_iterator, which represents - * the root (i.e. @c *it is of type #dfs_iterator). - * - * Using this technique makes it possible not only to obtain - * all the roots in the forest, but also the whole trees - * associated with each one. This can be achieved because a - * #root_iterator specifies the exact position of the root in - * the DFS-ordering and by definition of DFS all the - * descendents of the root, i.e. the whole tree, will come - * later in DFS, such that by incrementing the #dfs_iterator, - * a #roots_iterator points at, one can traverse the whole - * tree with this given root. - * - * Of course if the root isn't the last node in the - * DFS-forest on will also traverse all following trees, but - * since the first node of such a tree one will discover is - * its root, the successor of the #roots_iterator can be used - * as end-iterator. - * - * @return start for iteration through all roots in DFS-forest. - * @sa dfs::scan_whole_graph - */ - roots_iterator roots_begin () const - {return roots.begin();} - - /** - * @brief Iterator pointing to the end of all roots. - * - * @return end for iteration through all roots in DFS-forest. - * @sa dfs::scan_whole_graph - */ - roots_iterator roots_end () const - {return roots.end();} - - /** - * @brief Number of nodes reached in last DFS. - * - * @return number of reached nodes. - * @sa dfs::scan_whole_graph - */ - int number_of_reached_nodes () const - {return reached_nodes;} - - - //----------------------------------------------------------------------- - // Handler - for customization purposes - //----------------------------------------------------------------------- - - /** - * @brief Handler called before the start of DFS. - * - * @param G %graph for which DFS was invoked. - */ - virtual void init_handler (GTL::graph& /*G*/) {} - - /** - * @brief Handler called at the end of DFS. - * - * @param G %graph for which DFS was invoked. - */ - virtual void end_handler (GTL::graph& /*G*/) {} - - /** - * @brief Handler called when touching %node @a n. - * - * @param G %graph for which DFS was invoked. - * @param n actual %node. - * @param f predecessor. - */ - virtual void entry_handler (GTL::graph& /*G*/, GTL::node& /*n*/, GTL::node& /*f*/) {} - - /** - * @brief Handler called after all the adjacent edges of @a n - * have been examined. - * - * @param G %graph for which DFS was invoked. - * @param n actual %node. - * @param f predecessor. - */ - virtual void leave_handler (GTL::graph& /*G*/, GTL::node& /*n*/, GTL::node& /*f*/) {} - - /** - * @brief Handler called when a unused %node @a n connected to the - * actual %node by @a e is found. - * - * @param G %graph for which DFS was invoked. - * @param e %edge connecting the actual %node to the unused one. - * @param n unused %node. - */ - virtual void before_recursive_call_handler (GTL::graph& /*G*/, GTL::edge& /*e*/, GTL::node& /*n*/) {} - - /** - * @brief Handler called after the %algorithm returns from the - * subtree starting at @a n connected to the actual %node by - * @a e. - * - * @param G %graph for which DFS was invoked. - * @param e %edge connecting the actual %node to the unused one. - * @param n unused %node. - */ - virtual void after_recursive_call_handler (GTL::graph& /*G*/, GTL::edge& /*e*/, GTL::node& /*n*/) {} - - /** - * @brief Handler called when a already marked %node @a n connected - * to the actual %node by @a e is found during the search of all - * adjacent edges of the actual %node. - * - * @param G %graph for which DFS was invoked. - * @param e %edge connecting the actual %node to the old one. - * @param n used %node. - */ - virtual void old_adj_node_handler (GTL::graph& /*G*/, GTL::edge& /*e*/, GTL::node& /*n*/) {} - - /** - * @brief Called when DFS is started with start-%node @a - * n. - * - * This is particularly useful when DFS was invoked with the - * #scan_whole_graph option. - * - * @param G %graph for which DFS was invoked. - * @param n start-%node. - */ - virtual void new_start_handler (GTL::graph& /*G*/, GTL::node& /*n*/) { }; - -private: - - /** - * @internal - */ - void dfs_sub (GTL::graph&, GTL::node&, GTL::node&); - -protected: - - //---------------------------------------------------------------------- - // Data - //---------------------------------------------------------------------- - - /** - * @internal - */ - int act_dfs_num; - /** - * @internal - */ - int act_comp_num; - /** - * @internal - */ - edges_t tree; - /** - * @internal - */ - nodes_t dfs_order; - /** - * @internal - */ - node_map dfs_number; - /** - * @internal - */ - int reached_nodes; - /** - * @internal - */ - edge_map* used; - /** - * @internal - */ - std::list roots; - - - //----------------------------------------------------------------------- - // Optional - //----------------------------------------------------------------------- - - /** - * @internal - */ - node_map* comp_number; - /** - * @internal - */ - node_map* preds; - /** - * @internal - */ - edges_t* back_edges; - /** - * @internal - */ - node start; - /** - * @internal - */ - bool whole_graph; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_DFS_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/dijkstra.h b/src/Tracker/graph/GTL/include/GTL/dijkstra.h deleted file mode 100644 index a969fec81..000000000 --- a/src/Tracker/graph/GTL/include/GTL/dijkstra.h +++ /dev/null @@ -1,421 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// dijkstra.h -// -//========================================================================== -// $Id: dijkstra.h,v 1.8 2003/02/25 09:18:19 chris Exp $ - -#ifndef GTL_DIJKSTRA_H -#define GTL_DIJKSTRA_H - -#include -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @brief Dijkstra's Algorithm for computing single source shortest path. - * - * This class implements Dijkstra's algorithm for computing single source - * shortest path in @f$\mathcal{O}((|V| + |E|) log |V|)@f$ worst case. - * - * @sa bellman_ford - * - * @author Christian Bachmaier chris@infosun.fmi.uni-passau.de - */ -class GTL_EXTERN dijkstra : public algorithm -{ -public: - /** - * @brief Iterator type for traversing %nodes on one shortest path. - */ - typedef nodes_t::const_iterator shortest_path_node_iterator; - - /** - * @brief Iterator type for traversing %edges on one shortest path. - */ - typedef edges_t::const_iterator shortest_path_edge_iterator; - - /** - * @internal - */ - enum node_color {white, grey, black}; - - /** - * @brief Default constructor. - * - * Enables only the calculation of shortest paths. - * - * @sa algorithm::algorithm - */ - dijkstra(); - - /** - * @brief Destructor. - * - * @sa algorithm::~algorithm - */ - virtual ~dijkstra(); - - /** - * @brief Sets source %node. - * - * The default source is the invalid %node (GTL::node::node()), - * in this case an arbitrary %node is chosen and stored when - * this algorithm is run. - * - * @param n source node - */ - void source(const node& n); - - /** - * @brief Sets target %node. - * - * If a target is set with this method the %algorithm stops if a - * shortest distance to @p n is found. Ohterwise shortest paths are - * computed from source to any %node in the %graph. - * - * @param n target node - */ - void target(const node& n); - - /** - * @brief Sets weights of the edges. - * - * This method @b must be called before check run. - * - * @param weight weights of the %edges - */ - void weights(const edge_map& weight); - - /** - * @brief Enables or disables the storing of predecessors. - * - * If enabled for every %node the predecessor on the shortest - * path from will be stored. - * - * @param set @c true if predecessors should be stored - * - * @sa dijkstra::predecessor_node - * @sa dijkstra::predecessor_edge - */ - void store_preds(bool set); - - /** - * @brief Checks whether the preconditions for Dijkstra are satisfied. - * - * Necessary preconditions are: - * - the weights of the edges are set - * - the %graph @p G has at least one %node - * - all %edge weights must be \f$\ge 0\f$ - * - the source %node and (if set) target %node must be found in @p G - * - * @param G graph - * - * @retval algorithm::GTL_OK if %algorithm can be applied - * @retval algorithm::GTL_ERROR otherwise - * - * @sa dijkstra::source - * @sa dijkstra::weights - * @sa algorithm::check - */ - virtual int check(GTL::graph& G); - - /** - * @brief Runs shortest path %algorithm on @p G. - * - * This should return always algorithm::GTL_OK. The return value only - * tracks errors that might occur. - * Afterwards the result of the test can be accessed via access methods. - * - * @param G graph - * - * @retval algorithm::GTL_OK on success - * @retval algorithm::GTL_ERROR otherwise - * - * @sa algorithm::run - */ - int run(GTL::graph& G); - - /** - * @brief Returns source %node. - * - * @return source %node - */ - node source() const; - - /** - * @brief Returns target %node if set, node::node() else. - * - * @return target %node - */ - node target() const; - - /** - * @brief Returns whether the storing of predecessors is enabled. - * - * @return @c true iff the storing of predecessors is enabled - * - * @sa dijkstra::predecessor - */ - bool store_preds() const; - - /** - * @brief Returns whether @p n is reachable from source %node. - * - * @param n node - * - * @return @c true iff @p n was reached from source - */ - bool reached(const node& n) const; - - /** - * @brief Returns the distance from source %node to %node @p n. - * - * @param n node - * - * @return distance if @p n is dijkstra::reached, -1.0 else - */ - double distance(const node& n) const; - - /** - * @brief Predecessor %node of %node @p n on the shortest path from the - * source %node. - * - * If @p n is a root or wasn't reached the return value is - * the invalid %node node::node(). - * - * @param n node - * - * @return predecessor %node of @p n - * - * @sa dijkstra::store_preds - * @sa dijkstra::predecessor_edge - * - * @note The method requires that predecessor calculation option was - * enabled during last run. - */ - node predecessor_node(const node& n) const; - - /** - * @brief Predecessor %edge of %node @p n on the shortest path from the - * source %node. - * - * If @p n is a root or wasn't reached the return value is - * the invalid %edge edge::edge(). - * - * @param n node - * - * @return predecessor %edge of @p n - * - * @sa dijkstra::store_preds - * @sa dijkstra::predecessor_node - * - * @note The method requires that predecessor calculation option was - * enabled during last run. - */ - edge predecessor_edge(const node& n) const; - - /** - * @brief Returns an iterator to the beginning (to the source %node) of - * a shortest %node path to %node @p dest. - * - * @param dest target %node - * - * @return beginning %node iterator of a shortest path - * - * @note The method requires that predecessor calculation option was - * enabled during last run. If this method is called on the shortest - * path to @p dest for the first time (before - * dijkstra::shortest_path_nodes_end) it needs - * @f$\mathcal{O}(\mbox{length of this path})@f$ time. - */ - shortest_path_node_iterator shortest_path_nodes_begin(const node& dest); - - /** - * @brief Returns an iterator one after the end (one after - * %node @p dest) of a shortest %node path to %node @p dest. - * - * @param dest target %node - * - * @return shortest path end %node iterator - * - * @note The method requires that predecessor calculation option was - * enabled during last run. If this method is called on the shortest - * path to @p dest for the first time (before - * dijkstra::shortest_path_nodes_begin) it needs - * @f$\mathcal{O}(\mbox{length of this path})@f$ time. - */ - shortest_path_node_iterator shortest_path_nodes_end(const node& dest); - - /** - * @brief Returns an iterator to the beginning %edge of a shortest %edge - * path to %node @p dest. - * - * @param dest target %node - * - * @return beginning %edge iterator of a shortest path - * - * @note The method requires that predecessor calculation option was - * enabled during last run. If this method is called on the shortest - * path to @p dest for the first time (before - * dijkstra::shortest_path_edges_end) it needs - * @f$\mathcal{O}(\mbox{length of this path})@f$ time. - */ - shortest_path_edge_iterator shortest_path_edges_begin(const node& dest); - - /** - * @brief Returns an iterator one after the end of a shortest %edge path - * to %node @p dest. - * - * @param dest target %node - * - * @return shortest path end %edge iterator - * - * @note The method requires that predecessor calculation option was - * enabled during last run. If this method is called on the shortest - * path to @p dest for the first time (before - * dijkstra::shortest_path_edges_begin) it needs - * @f$\mathcal{O}(\mbox{length of this path})@f$ time. - */ - shortest_path_edge_iterator shortest_path_edges_end(const node& dest); - - /** - * @brief Resets Dijkstra's algorithm. - * - * It prepares the algorithm to be applied again, possibly to another - * graph. - * - * @note The weights are not reset. You can apply this algorithms - * - * @sa algorithm::reset - */ - virtual void reset(); -private: - /** - * @internal - * Stores source. - * - * @sa dijkstra::source. - */ - node s; - - /** - * @internal - * Stores target. - * - * @sa dijkstra::source. - */ - node t; - - /** - * @internal - * Indicates whether weights were set. - * - * @sa dijkstra::weights. - */ - bool weights_set; - - /** - * @internal - * Indicates whether predecessors should be computed. - * - * @sa dijkstra::store_preds. - */ - bool preds_set; - - /** - * @internal - * Stores the weights of the %edges. - * - * @sa dijkstra::weights. - */ - edge_map weight; - - /** - * @internal - * Stores father of each %node in shortest path tree (if enabled). - * (default: edge() (if enabled)) - * - * @sa dijkstra::store_preds - */ - node_map pred; - - /** - * @internal - * Indicates the current %node status. - * (default: black) - */ - node_map mark; - - /** - * @internal - * Distance from source @a s. - * (default: -1) - * - * @sa dijkstra::distance. - */ - node_map dist; - - /** - * @internal - * Stores for every target %node a list of nodes on the shortest path - * from source @a s to it. Filled on demand by methods creating - * iterators. - * (default: empty) - * - * @sa dijkstra::shortest_path_nodes_begin - * @sa dijkstra::shortest_path_nodes_end - */ - node_map shortest_path_node_list; - - /** - * @internal - * Stores for every target node a list of edges on the shortest path - * from source @a s to it. Filled on demand by methods creating - * iterators. - * (default: empty) - * - * @sa dijkstra::shortest_path_edges_begin - * @sa dijkstra::shortest_path_edges_end - */ - node_map shortest_path_edge_list; - - /** - * @internal - * Prepares the %algorithm to be applied once again. - */ - void reset_algorithm(); - - /** - * @internal - * Inits data structure. - */ - void init(GTL::graph& G); - - /** - * @internal - * Fills ordered list shortest_path_node_list[t] - * with nodes of shortest path from @a s to @p t. - */ - void fill_node_list(const node& t); - - /** - * @internal - * Fills ordered list shortest_path_edge_list[t] - * with edges of shortest path from @a s to @p t. - */ - void fill_edge_list(const node& t); -}; - -__GTL_END_NAMESPACE - -#endif // GTL_DIJKSTRA_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/edge.h b/src/Tracker/graph/GTL/include/GTL/edge.h deleted file mode 100644 index ae7618c20..000000000 --- a/src/Tracker/graph/GTL/include/GTL/edge.h +++ /dev/null @@ -1,149 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// edge.h -// -//========================================================================== -// $Id: edge.h,v 1.15 2001/04/17 14:35:25 raitner Exp $ - -#ifndef GTL_EDGE_H -#define GTL_EDGE_H - -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// For MSVC 5.0 edge.h has to be included before node.h and -// {node,edge}_data.h. So we only declare needed classnames here -//-------------------------------------------------------------------------- - -class node; -typedef std::list nodes_t; - -class edge_data; - -//-------------------------------------------------------------------------- -// edge -//-------------------------------------------------------------------------- - -/** - * @short An edge in a graph - */ -class GTL_EXTERN edge -{ -public: - /** - * Default constructor. Creates an invalid edge. - * The only way to obtain a valid edge is through @ref - * graph#new_edge. Example: - *
-     *   graph g;
-     *   node n1, n2;
-     *   edge e;
-     *
-     *   n1 = g.new_node();
-     *   n2 = g.new_node();
-     *   e = g.new_edge(n1, n2);
-     * 
- * - * @see graph#new_edge - */ - edge(); - - /** - * Returns the source node of the edge. - * - * @return source - */ - node source() const; - - /** - * Returns the target node of the edge. - * - * @return target - */ - node target() const; - const node& target_() const; - - /** - * Changes the direction of this edge. - */ - void reverse (); - - /** - * Makes n the source of this edge. Takes O(1) time. - * - * @param n new source - */ - void change_source (GTL::node n); - - /** - * Makes n the target of this edge. Takes O(1) time. - * - * @param n new target - */ - void change_target (GTL::node n); - - /** - * Returns the node opposite to n referring to - * this edge. - * - * @param n a node incident to this edge - */ - const node& opposite(GTL::node n) const; - - /** - * @internal - */ - nodes_t sources() const; - - /** - * @internal - */ - nodes_t targets() const; - - /** - * @internal - */ - int id() const; - - - /** - * Returns true iff node is hidden. - * - * @return true iff node is hidden. - * @see graph#hide_edge - * @see graph#restore_edge - */ - bool is_hidden () const; - - - //================================================== Implementation - -private: - edge_data *data; - - void remove_from(int where) const; // 0 = sources, 1 == targets - - friend class graph; - friend class node; - - GTL_EXTERN friend bool operator==(GTL::edge, GTL::edge); - GTL_EXTERN friend bool operator!=(GTL::edge, GTL::edge); - GTL_EXTERN friend bool operator<(GTL::edge, GTL::edge); - GTL_EXTERN friend std::ostream& operator<< (std::ostream& os, const edge& e); -}; - -typedef std::list edges_t; - -__GTL_END_NAMESPACE - -#endif // GTL_EDGE_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/edge_data.h b/src/Tracker/graph/GTL/include/GTL/edge_data.h deleted file mode 100644 index e347b06f3..000000000 --- a/src/Tracker/graph/GTL/include/GTL/edge_data.h +++ /dev/null @@ -1,42 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// edge_data.h - Internal header: DO NO USE IT DIRECTLY !!! -// -//========================================================================== -// $Id: edge_data.h,v 1.9 2000/02/03 12:49:50 raitner Exp $ - -#ifndef GTL_EDGE_DATA_H -#define GTL_EDGE_DATA_H - -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - */ -class GTL_EXTERN edge_data -{ -public: - int id; // internal numbering - nodes_t nodes[2]; // nodes[0] = sources, - // nodes[1] = targets - std::list adj_pos[2];// positions in the adjacency lists - // of sources and targets - edges_t::iterator pos; // position in the list of all edges - bool hidden; - graph* owner; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_EDGE_DATA_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/edge_map.h b/src/Tracker/graph/GTL/include/GTL/edge_map.h deleted file mode 100644 index 42724b8b7..000000000 --- a/src/Tracker/graph/GTL/include/GTL/edge_map.h +++ /dev/null @@ -1,82 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// edge_map.h -// -//========================================================================== -// $Id: edge_map.h,v 1.8 2005/06/14 12:22:12 raitner Exp $ - -#ifndef GTL_EDGE_MAP_H -#define GTL_EDGE_MAP_H - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -class graph; - -/** - * @short A specialized map with edges as keys - * - * A edge_map is a specialized and optimized map - * implementation with edges as keys. Using a edge_map is - * the standard way to attach user defined information to - * the edges of a graph. - * - * An example of usage: - *
- *   graph g;
- *
- *   node v1 = g.new_node();
- *   node v2 = g.new_node();
- *   edge e = g.new_edge(v1, v2);
- *
- *   edge_map<string> label(g, "Default Label");
- *
- *   label[e] = "An edge";
- *
- *   assert(label[e] == "An edge");
- * 
- * - * The edges used as keys for a edge_map MUST be edges - * of the same graph. If you want to use edges from different graphs, use - * a map<edge,T> instead. A graph and a copy of it are - * considered to be different. - * - * Most of the functionality of edge_map is inherited from - * @ref ne_map. - * - * @see node_map - */ -template > -class edge_map : public ne_map -{ -public: - - /** - * Constructs an empty edge_map not associated with any - * graph. You may (but need not) call - * ne_map::init(const graph &, T) to associate it to - * a graph. - */ - edge_map() : ne_map() {}; - - /** - * Constructs a edge_map associated to the graph - * g. - * The value associated to each edge in g is set to - * t. - */ - explicit edge_map(const graph &g, T t=T()) : - ne_map(g,t) {}; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_EDGE_MAP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/embedding.h b/src/Tracker/graph/GTL/include/GTL/embedding.h deleted file mode 100644 index 3b03a6ff6..000000000 --- a/src/Tracker/graph/GTL/include/GTL/embedding.h +++ /dev/null @@ -1,370 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// embedding.h -// -//========================================================================== -// $Id: embedding.h,v 1.20 2003/06/11 11:28:21 raitner Exp $ - -#ifndef __EMBEDDING__H -#define __EMBEDDING__H - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @brief Ordered adjacency lists as a result of planarity testing. - * - * It is known that if a graph is planar the adjacency list of every node - * can be ordered in such a way that it reflects the order the adjacent - * edges will have in a planar drawing around the node. Although the tested - * graph might have been directed the planar embedding one gets will always - * correspond to the underlying undirected graph, i.e. an edge from @c n1 to - * @c n2 will occurr in both adjacency lists. - */ -class GTL_EXTERN planar_embedding -{ -public: - /** - * @internal - */ - typedef symlist adj_list; - - /** - * @internal - */ - typedef symlist::iterator iterator; -private: - /** - * @internal - * Creates an empty planar embedding not related to any graph. - * @note At the moment planar embedding are thought as an output of - * planarity testing, this why they can't be constructed from scratch. - */ - planar_embedding() : G(0) - { - } -public: - /** - * - * Make this object a copy of @p em. - * - * @param em planar embedding - */ - planar_embedding(const planar_embedding& em); - - /** - * - * Destructor. - */ - virtual ~planar_embedding() - { - } - - /** - * - * Assigns @p em to this object. All former information in this object - * will be deleted. - * - * @param em - * - * @return reference to this object - */ - planar_embedding& operator=(const planar_embedding& em); -private: - /** - * @internal - * Initializes adjacency lists. - * - * @param G graph - */ - void init(GTL::graph& G); - - /** - * @internal - * Turns adjacency list of node @p n. - * - * @param n node. - */ - void turn(GTL::node n); - - /** - * @internal - * Insert edge @p e at the end of adjacency list of @p n. - * - * @param n node - * @param e edge to be inserted - * - * @return iterator to position of insertion - */ - iterator push_back(GTL::node n, GTL::edge e); - - /** - * @internal - * Insert edge @p e at the beginning of adjacency list of @p n. - * - * @param n node - * @param e edge to be inserted - * - * @return iterator to position of insertion - */ - iterator push_front(GTL::node n, GTL::edge e); - - /** - * @internal - * Insert selfloop @p e. - * - * @param @p e selfloop - */ - void insert_selfloop (GTL::edge e); - - /** - * @internal - * Returns position of edge @p e in adjacency list of node @p n. - * - * @param n node - * @param e adjacent edge - * - * @return position of @p e - */ - iterator& pos (GTL::node, GTL::edge); -public: - /** - * - * Returns reference to ordered adjacency list of node @p n. - * - * @param n node - * - * @return ordered adjacency list - */ - adj_list& adjacency(GTL::node n) - { - return adj[n]; - } - - /** - * - * Returns reference to ordered adjacency list of node @p n. - * - * @param n node - * - * @return ordered adjacency list - */ - const adj_list& adjacency(GTL::node n) const - { - return adj[n]; - } - - /** - * - * Start iteration through adjacency list of node @p n. - * - * @param n node - * - * @return start iterator - */ - iterator adj_edges_begin(GTL::node n) - { - return adj[n].begin(); - } - - /** - * - * End of iteration through adjacency list of node @p n. - * - * @param @p n node - * - * @return one-past the end iterator - */ - iterator adj_edges_end(GTL::node n) - { - return adj[n].end(); - } - - /** - * - * Returns the cyclic successor of edge @p e in the adjacency list of - * node @p n. - * - * @param n node - * @param e edge adjacent to @p n - * - * @return edge following @p e in adjacency of @p n - */ - edge cyclic_next(GTL::node n, GTL::edge e); - - /** - * - * Returns the cyclic predecessor of edge @p e in the adjacency list of - * node @p n. - * - * @param n node - * @param e edge adjacent to @p n - * - * @return edge preceding @p e in adjacency of @p n - */ - edge cyclic_prev(GTL::node n, GTL::edge e); - - - /** - * - * Writes embedding with st-numbers as given by @p st to @p os. - * - * @param os output stream - * - * @param st st-numbers - */ - void write_st(std::ostream& os, st_number& st); - - /** - * - * Returns list of selfloops contained in the graph. These will not - * occur in the adjacency lists. - * - * @retval list of selfloops - */ - edges_t& selfloops() - { - return self; - } - - /** - * - * Returns list of selfloops contained in the graph. These will not - * occur in the adjacency lists. - * - * @retval list of selfloops - */ - const edges_t& selfloops() const - { - return self; - } - - /** - * - * Returns list of multiple edges contained in the graph. These are - * edges for which there is already another edge connecting the same - * endpoints is contained in the adjacency lists. Please note that the - * notion "connecting" is meant in an undirected sense. These edges will - * not occur it the adjacency lists. - * - * @retval list of multiple edges - */ - edges_t& multiple_edges() - { - return multi; - } - - /** - * - * Returns list of multiple edges contained in the graph. These are - * edges for which there is already another edge connecting the same - * endpoints is contained in the adjacency lists. Please note that the - * notion "connecting" is meant in an undirected sense. These edges will - * not occur it the adjacency lists. - * - * @retval list of multiple edges - */ - const edges_t& multiple_edges() const - { - return multi; - } - - /** - * - * Used for debugging only. Checks whether this is a correct planar - * embedding by checking the faces of the graph, i.e. at any node - * starting with an arbitrary adjacent edge and advancing along @c - * cyclic_next the start node must be met through the edge given by @c - * cyclic_prev of the edge we started with. - * - * @retval true iff embedding is correct - */ - bool check(); - - /** - * @internal - */ - friend class planarity; - - /** - * @internal - */ - friend class pq_tree; - - /** - * @internal - */ - GTL_EXTERN friend std::ostream& operator<< (std::ostream&, planar_embedding&); -private: - /** - * @internal - * Graph. - */ - graph* G; - - /** - * @internal - * Adjacency lists. - */ - node_map adj; - - /** - * @internal - * Positions of edges in its source's adjacency list. - */ - edge_map s_pos; - - /** - * @internal - * Positions of edges in its target's adjacency list. - */ - edge_map t_pos; - - /** - * @internal - * Selfloops. - */ - edges_t self; - - /** - * @internal - * Multiple edges. - */ - edges_t multi; -}; - - -// class face -// { -// public: -// face (planar_embedding& em, GTL::node n, GTL::edge e) : embed (em), -// start (n), first (e) { } -// virtual ~face () { } - -// private: -// planar_embedding& embed; -// node start; -// edge first; - -// friend class planar_embedding; -// }; - -// struct _face_iterator -// { - - -// face& _face; -// }; - -__GTL_END_NAMESPACE - -#endif - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/fm_partition.h b/src/Tracker/graph/GTL/include/GTL/fm_partition.h deleted file mode 100644 index fa5a54251..000000000 --- a/src/Tracker/graph/GTL/include/GTL/fm_partition.h +++ /dev/null @@ -1,751 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// fm_partition.h -// -//========================================================================== -// $Id: fm_partition.h,v 1.8 2003/01/31 08:15:05 chris Exp $ - -#ifndef GTL_FM_PARTITION_H -#define GTL_FM_PARTITION_H - -#include -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - - -/** - * @short Heuristic graph bi-partitioning algorithm (Fiduccia-Mattheyses). - * - * This class implements a heuristic graph bi-partitioning algorithm, based - * on iterative movement, proposed by C. M. Fiduccia and R. M. Mattheyses - * in 1982. - * - *

In the case E is the set of edges of the graph, the algorithm needs - * O(|E|) time to proceed. - * - * @see ratio_cut_partition - */ -class GTL_EXTERN fm_partition : public algorithm -{ -public: - /** - * Return type of @ref fm_partition#get_side_of_node. - * - * @see fm_partition#A - * @see fm_partition#B - */ - typedef int side_type; - - /** - * A means the node is on side A. - * - * @see fm_partition#side_type - */ - const static side_type A; - - /** - * B means the node is on side B. - * - * @see fm_partition#side_type - */ - const static side_type B; - - /** - * Fix type of each node (needed with @ref fm_partition#set_vars). - * - * @see fm_partition#FIXA - * @see fm_partition#FIXB - * @see fm_partition#UNFIXED - */ - typedef short int fix_type; - - /** - * FIXA means fix node on side A. - * - * @see fm_partition#set_vars - */ - const static fix_type FIXA; - - /** - * FIXB means fix node on side B. - * - * @see fm_partition#fixe_type - */ - const static fix_type FIXB; - - /** - * UNFIXED means node is free. - * - * @see fm_partition#fixe_type - */ - const static fix_type UNFIXED; - - /** - * Default constructor. - * - * @see fm_partition#fixe_type - */ - fm_partition(); - - /** - * Destructor. - * - * @see algorithm#~algorithm - */ - virtual ~fm_partition(); - - /** - * Sets variables. - * Must be executed before @ref fm_partition#check! - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @see fm_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight); - - /** - * Sets variables. - * Must be executed before @ref fm_partition#check! - * In order to get good results, init_side should - * almost be in balance. - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param init_side initial bi-partitioning - * @see fm_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, - const node_map& init_side); - - /** - * Sets variables. - * Must be executed before @ref fm_partition#check! - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param fixed fixed nodes - * @see fm_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, - const node_map& fixed); - - /** - * Sets variables. - * Must be executed before @ref fm_partition#check! - * In order to get good results, init_side should - * almost be in balance. Fixed nodes are on their fix side, their - * initial side is overwritten then. - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param init_side initial bi-partitioning - * @param fixed fixed nodes - * @see fm_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, - const node_map& init_side, - const node_map& fixed); - - /** - * Enables the storing of cut-edges. If enabled the list of - * cut-edges can be traversed using @ref - * fm_partition#cut_edges_iterator. - * - * @param set if true cut_edges will be stored - * @see fm_partition#cut_edges_begin - * @see fm_partition#cut_edges_end - */ - void store_cut_edges(const bool set); - - /** - * Enables the storing of nodes on their side. If enabled the nodes - * of each side can be traversed using @ref - * fm_partition#nodes_on_one_side_iterator. - * - * @param set if true nodes will be stored on their sides - * @see fm_partition#nodes_of_sideA_begin - * @see fm_partition#nodes_of_sideA_end - * @see fm_partition#nodes_of_sideB_begin - * @see fm_partition#nodes_of_sideB_end - */ - void store_nodesAB(const bool set); - - /** - * Checks whether following preconditions are satisfied: - *

    - *
  • @ref fm_partition#set_vars has been executed before. - *
  • graph G is undirected. - *
  • only node_weights >= 0 are applied. - *
  • only edge_weights >= 0 are applied. - *
- * - * @param G graph - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise - * @see fm_partition#set_vars - * @see algorithm#check - */ - virtual int check(GTL::graph& G); - - /** - * Computes a partitioning with G, that means a - * division of its vertices in two sides fm_partition::A - * and fm_partition::B. - * - * @param G graph - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise - * @see algorithm#run - */ - int run(GTL::graph& G); - - /** - * Gets the size of the cut after bi-partitioning. - * - * @return cutsize - */ - int get_cutsize(); - - /** - * Gets the number of passes needed to create a bi-partition with - * this heuristic. - * - * @return number of passes - */ - int get_needed_passes(); - - /** - * Gets side of the node after bi-partitioning. - * - * @param n node of graph @c G - * @return fm_partition::A if n lies on - * side A, fm_partition::B otherwise - */ - side_type get_side_of_node(const node& n) const; - - /** - * Gets side of the node after bi-partitioning. - * - * @param n node of graph @c G - * @return fm_partition::A if n lies on - * side A, fm_partition::B otherwise - * @see fm_partition#get_side_of_node - */ - side_type operator [](const node& n) const; - - /** - * Gets the sum of all node weights from nodes on side A. - * - * @param G graph - * @return node_weight_on_sideA - */ - int get_weight_on_sideA(const graph& G) const; - - /** - * Gets the sum of all node weights from nodes on side B. - * - * @param G graph - * @return node_weight_on_sideB - */ - int get_weight_on_sideB(const graph& G) const; - - /** - * Iterator type for edges which belong to the cut. - */ - typedef edges_t::const_iterator cut_edges_iterator; - - /** - * Iterate through all edges which belong to the cut, that means - * all edges with end-nodes on different sides. - * It is only valid if enabled with @ref - * fm_partition#store_cut_edges before. - * - * @return start for iteration through all cut edges - */ - cut_edges_iterator cut_edges_begin() const; - - /** - * End-Iterator for iteration through all edges which belong to the - * cut. - * It is only valid if enabled with @ref - * fm_partition#store_cut_edges before. - * - * @return end for iteration through all cut-edges - */ - cut_edges_iterator cut_edges_end() const; - - /** - * Iterator type of nodes of a side. - */ - typedef nodes_t::const_iterator nodes_of_one_side_iterator; - - /** - * Iterate through all nodes which belong to side A. - * It is only valid if enabled with @ref - * fm_partition#store_nodesAB before. - * - * @return start for iteration through all nodes on A - */ - nodes_of_one_side_iterator nodes_of_sideA_begin() const; - - /** - * End-Iterator for iteration through all nodes which belong to side - * A. - * It is only valid if enabled with @ref - * fm_partition#store_nodesAB before. - * - * @return end for iteration through all nodes on A - */ - nodes_of_one_side_iterator nodes_of_sideA_end() const; - - /** - * Iterate through all nodes which belong to side B, - * It is only valid if enabled with @ref - * fm_partition#store_nodesAB before. - * - * @return start for iteration through all nodes on B - */ - nodes_of_one_side_iterator nodes_of_sideB_begin() const; - - /** - * End-Iterator for iteration through all nodes which belong to side - * B, - * It is only valid if enabled with @ref - * fm_partition#store_nodesAB before. - * - * @return end for iteration through all nodes on B - */ - nodes_of_one_side_iterator nodes_of_sideB_end() const; - - /** - * Resets fm_partition, i.e. prepares the algorithm to be applied - * to another graph. - * - * @see algorithm#reset - */ - virtual void reset(); -protected: - /** - * @internal - * true, iff user enabled storing of cut-edges with - * @ref fm_partition#store_cut_edges. - */ - bool enable_cut_edges_storing; - - /** - * @internal - * List of edges which belong to the cut. - */ - edges_t cut_edges; - - /** - * @internal - * true, iff user enabled storing of nodes with - * @ref fm_partition#store_nodesAB. - */ - bool enable_nodesAB_storing; - - /** - * @internal - * List of nodes which belong to side A. - */ - nodes_t nodesA; - - /** - * @internal - * List of nodes which belong to side A. - */ - nodes_t nodesB; - - /** - * @internal - * true, iff user has executed @ref fm_partition# - * set_vars before @ref fm_partition#check and @ref fm_partition# - * run. - */ - bool set_vars_executed; - - /** - * @internal - * true, iff user has provided init_side - * with @ref fm_partition#set_vars, false otherwise. - */ - bool provided_initial_part; - - /** - * @internal - * true, iff user has provided fixed with - * @ref fm_partition#set_vars, false otherwise. - */ - bool provided_fix; - - /** - * @internal - * Contains information where a node is fixed. - */ - node_map fixed; - - /** - * @internal - * Contains the weight of each node. - * Corresponds to w(v) in [Leng90]. - */ - node_map node_weight; - - /** - * @internal - * Contains the maximum weight of a node in G. - * (maximum of node_weight[...]) - */ - int max_node_weight; - - /** - * @internal - * Contains the weight of each edge. - * Corresponds to c(e) in [Leng90]. - */ - edge_map edge_weight; - - /** - * @internal - * Contains the maximum weight of an edge in G. - * (maximum of edge_weight[...]) - */ - int max_edge_weight; - - /** - * @internal - * Contains the sum over all vertex weights in G. - * Corresponds to w(V) in [Leng90]. - */ - int total_node_weight; - - /** - * @internal - * Contains the sum over all vertex weights on side A. - * Corresponds to w(A) in [Leng90]. - */ - int node_weight_on_sideA; - - /** - * @internal - * Contains the sum over all vertex weights on side B. - * Corresponds to w(B) in [Leng90]. - */ - int node_weight_on_sideB; - - /** - * @internal - * Contains information about the current side of a node. - */ - node_map side; - - /** - * @internal - * Corresponds to CELL array in [FidMat82] - */ - node_map position_in_bucket; - - /** - * @internal - * Contains the maximal number of adjacent to a node. - */ - int max_vertex_degree; - - /** - * @internal - * Contains how many nodes an edge has on side A. - */ - edge_map aside; - - /** - * @internal - * Contains how many nodes an edge has on side B. - */ - edge_map bside; - - /** - * @internal - * Contains the unlocked nodes of an edge on side A. - * (max. 2) - */ - edge_map unlockedA; - - /** - * @internal - * Contains the unlocked nodes of an edge on side B. - * (max. 2) - */ - edge_map unlockedB; - - /** - * @internal - * Corresponds to D value in Leng[90]. - */ - node_map gain_value; - - /** - * @internal - * true, iff bucketA is empty. - */ - bool bucketA_empty; - - /** - * @internal - * true, iff bucketB is empty. - */ - bool bucketB_empty; - - /** - * @internal - * Contains the maximum gain value of a node in - * bucketA. - */ - int max_gainA; - - /** - * @internal - * Contains the maximum gain value of a node in - * bucketB. - */ - int max_gainB; - - /** - * @internal - * Like a hash table over the gain_value of each node - * on side A. (open hashing, collisions in gain buckets - * are organized through LIFO lists) - */ - std::vector bucketA; - - /** - * @internal - * Like a hash table over the gain_value of each node - * on side B. (open hashing, collisions in gain buckets - * are organized through LIFO lists) - */ - std::vector bucketB; - - /** - * @internal - * Sum over all edge_costs[e] where edge e is an - * element of the cut. - */ - int cur_cutsize; - - /** - * @internal - * Number of needed passes. - */ - int no_passes; - - /** - * @internal - * Fix FIXA nodes on side A and FIXB - * nodes on side B. - */ - void divide_up(const graph& G); - - /** - * @internal - * Hides self loops of G. - */ - void hide_self_loops(GTL::graph& G); - - /** - * @internal - * Computes max_edge_weight, max_node_weight - * and total_node_weight. - */ - void init_variables(const graph& G); - - /** - * @internal - * Divides nodes of G arbitrary into two sides A - * and B. Here, side will be - * filled with an arbitrary feasible solution. - */ - void create_initial_bipart(const graph& G); - - /** - * @internal - * Shuffles order of node_vector with size - * vector_size. - */ - void shuffle_vector(const int vector_size, - std::vector& node_vector); - - /** - * @internal - * Computes max_vertex_degree. - */ - void compute_max_vertex_degree(const graph& G); - - /** - * @internal - * Runs as much passes as needed. - */ - void pass_manager(const graph& G); - - /** - * @internal - * Copies side node maps. - */ - void copy_side_node_map(const graph& G, GTL::node_map& dest, - const node_map source) const; - - /** - * @internal - * Initialization of the data structure for each pass. - */ - void init_data_structure(const graph& G); - - /** - * @internal - * Computes initial gain_value for each node and inserts it in the - * corresponding bucket data structure. - */ - void init_filling_buckets(const graph& G); - - /** - * @internal - * Compute initial gain of a node on side A. - * @return initial gain_value of a node on side A - */ - int inital_gain_of_node_on_sideA(const node cur_node); - - /** - * @internal - * Compute initial gain of a node on side B. - * @return initial gain_value of a node on side B - */ - int inital_gain_of_node_on_sideB(const node cur_node); - - /** - * @internal - * Moves nodes within a pass. - */ - void move_manager(const graph& G); - - /** - * @internal - * Move a single node - * @return true if vertex stored in parameter - * moved_node has been found - */ - bool move_vertex(const graph& G, GTL::node& moved_node); - - /** - * @internal - * Only valid on unlocked nodes! - * @return true if a certain balance criterion can be - * hold, false otherwise - */ - bool balance_holds(const graph& G, const node cur_node); - - /** - * @internal - * Executed, if cur_node is chosen to move from side - * A to B. - */ - void update_data_structure_A2B(const node cur_node); - - /** - * @internal - * Executed, if cur_node is chosen to move from side - * B to A. - */ - void update_data_structure_B2A(const node cur_node); - - /** - * @internal - * Reorganizes bucketA if a nodes gain of it has been - * changed. - */ - void update_bucketA(const node cur_node, const int old_gain, - const int new_gain); - - /** - * @internal - * Reorganizes bucketB if a nodes gain of it has been - * changed. - */ - void update_bucketB(const node cur_node, const int old_gain, - const int new_gain); - - /** - * @internal - * Recomputes max_gainA or max_gainB - * respectively. - */ - void update_max_gain(const side_type side); - - /** - * @internal - * Transform a range from [-a..+a] to [0..2a]. - * (reverse to @ref fm_partition#range_up) - */ - inline int range_up(const int gain_value) const; - - /** - * @internal - * Transform a range from [0..2a] to [-a..+a]. - * (reverse to @ref fm_partition#range_down) - */ - inline int range_down(const int index) const; - - /** - * @internal - * Do some garbage collection. - */ - void clean_pass(const graph& G); - - /** - * @internal - * Computes list cut_edges. - */ - void compute_cut_edges(const graph& G); - - /** - * @internal - * Computes lists nodesA and nodesB. - */ - void compute_nodesAB(const graph& G); -private: -#ifdef _DEBUG - /** - * @internal - * Prints content of bucketA with associated gain values. - */ - void print_bucketA(); - - /** - * @internal - * Prints content of bucketB with associated gain values. - */ - void print_bucketB(); -#endif // _DEBUG -}; - - -__GTL_END_NAMESPACE - -#endif // GTL_FM_PARTITION_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/gml_parser.h b/src/Tracker/graph/GTL/include/GTL/gml_parser.h deleted file mode 100644 index 029e0e102..000000000 --- a/src/Tracker/graph/GTL/include/GTL/gml_parser.h +++ /dev/null @@ -1,80 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// gml_parser.h -// -//========================================================================== -// $Id: gml_parser.h,v 1.7 2000/01/05 16:32:36 raitner Exp $ - -#ifndef GTL_GML_PARSER_H -#define GTL_GML_PARSER_H - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - */ -union GTL_EXTERN GML_pair_val { - long integer; - double floating; - char* str; - struct GML_pair* list; -}; - -/** - * @internal - */ -struct GTL_EXTERN GML_pair { - char* key; - GML_value kind; - union GML_pair_val value; - struct GML_pair* next; -}; - -/** - * @internal - */ -struct GTL_EXTERN GML_list_elem { - char* key; - struct GML_list_elem* next; -}; - -/** - * @internal - */ -struct GTL_EXTERN GML_stat { - struct GML_error err; - struct GML_list_elem* key_list; -}; - -/* - * returns list of KEY - VALUE pairs. Errors and a pointer to a list - * of key-names are returned in GML_stat. Previous information contained - * in GML_stat, i.e. the key_list, will be *lost*. - */ - -GTL_EXTERN GML_pair* GML_parser (FILE*, GML_stat*, int); - -/* - * free memory used in a list of GML_pair - */ - -GTL_EXTERN void GML_free_list (GML_pair*, GML_list_elem*); - - -/* - * debugging - */ - -GTL_EXTERN void GML_print_list (GML_pair*, int); - -__GTL_END_NAMESPACE - -#endif // GTL_GML_PARSER_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/gml_scanner.h b/src/Tracker/graph/GTL/include/GTL/gml_scanner.h deleted file mode 100644 index 92c0c93d4..000000000 --- a/src/Tracker/graph/GTL/include/GTL/gml_scanner.h +++ /dev/null @@ -1,138 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// gml_scanner.h -// -//========================================================================== -// $Id: gml_scanner.h,v 1.11 2000/03/06 15:16:52 raitner Exp $ - -#ifndef GTL_GML_SCANNER_H -#define GTL_GML_SCANNER_H - -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/* - * start-size of buffers for reading strings. If too small it will be enlarged - * dynamically - */ - -#define INITIAL_SIZE 1024 - -GTL_EXTERN typedef enum { - GML_KEY, GML_INT, GML_DOUBLE, GML_STRING, GML_L_BRACKET, - GML_R_BRACKET, GML_END, GML_LIST, GML_ERROR -} GML_value; - - -/** - * Possible errors while parsing a GML file. - */ -GTL_EXTERN typedef enum { - GML_UNEXPECTED, GML_SYNTAX, GML_PREMATURE_EOF, GML_TOO_MANY_DIGITS, - GML_OPEN_BRACKET, GML_TOO_MANY_BRACKETS, GML_OK, GML_FILE_NOT_FOUND -} GML_error_value; - - -/** - * @short Reason and position of an error in a GML file. - * - * When an error occurs while parsing the structure of a GML file - * GML_error is used to return the type and position - * of the error detected. Position is specified by - * line and column, but might be - * somewhat imprecise. However at least the line number should - * not differ too much from the real position. - * - * @see graph#load - */ -struct GTL_EXTERN GML_error { - /** - * Contains the error description as symbolic constant: - *
    - *
  • GML_FILE_NOT_FOUND: A file with that name - * doesn't exist.
  • - *
  • GML_OK: No error :-)
  • - *
  • GML_TOO_MANY_BRACKETS: A mismatch of - * brackets was detected, i.e. there were too many closing - * brackets (]).
  • - *
  • GML_OPEN_BRACKET: Now, there were too many - * opening brackets ([)
  • - *
  • GML_TOO_MANY_DIGITS: The number of digits a - * integer or floating point value can have is limited to - * 1024, this should be enough :-)
  • - *
  • GML_PREMATURE_EOF: An EOF occured, where it - * wasn't expected, e.g. while scanning a string.
  • - *
  • GML_SYNTAX: The file isn't a valid GML file, - * e.g. a mismatch in the key-value pairs.
  • - *
  • GML_UNEXPECTED: A character occured, where - * it makes no sense, e.g. non-numerical characters in - * numbers or keys beginning with numbers
  • - *
- * - */ - GML_error_value err_num; - - /** - * Contains the line, where the error was detected. This will - * usually be near the line where the error really is - * located. - */ - int line; - - /** - * Contains the column, where the error was detected. - */ - int column; -}; - - -union GTL_EXTERN GML_tok_val { - long integer; - double floating; - char* str; - struct GML_error err; -}; - -/** - * @internal - */ -struct GTL_EXTERN GML_token { - GML_value kind; - union GML_tok_val value; -}; - -/* - * global variables - */ - -GTL_EXTERN extern unsigned int GML_line; -GTL_EXTERN extern unsigned int GML_column; - -/* - * if you are interested in the position where an error occured it is a good - * idea to set GML_line and GML_column back. - * This is what GML_init does. - */ - -GTL_EXTERN void GML_init (); - -/* - * returns the next token in file. If an error occured it will be stored in - * GML_token. - */ - -GTL_EXTERN struct GML_token GML_scanner (FILE*); - -GTL_EXTERN extern const char* GML_table[]; - -__GTL_END_NAMESPACE - -#endif // GTL_GML_SCANNER_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/graph.h b/src/Tracker/graph/GTL/include/GTL/graph.h deleted file mode 100644 index a30ee1d91..000000000 --- a/src/Tracker/graph/GTL/include/GTL/graph.h +++ /dev/null @@ -1,818 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// graph.h -// -//========================================================================== -// $Id: graph.h,v 1.43 2002/11/06 08:49:35 raitner Exp $ - -#ifndef GTL_GRAPH_H -#define GTL_GRAPH_H - -#include -#include -#include -#include -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2002/11/06 08:49:35 $ - * $Revision: 1.43 $ - * - * @brief A directed or undirected graph. - * - * A graph G=(V,E) consists of a set of nodes - * V and a set of edges E , where every - * edge can be viewed as a (ordered) pair of nodes (u,v) - * connecting source u with target v . - * Obviously this implies a direction on the edges, which is why we - * call these graphs directed (this is the default). A graph can be made - * undirected by just ignoring the (implicit) direction. - * - * @see node - * @see edge - */ - -class GTL_EXTERN graph -{ -public: - //================================================== Con-/Destructors - - /** - * Generates an empty graph, i.e. without any nodes and any edges. - */ - graph(); - - /** - * Copy constructor. Please note: This will generate an - * isomorpic copy of G. Although this graph will look - * like G it is not physically the same. - * Especially it consists of nodes and edges, which of course have - * counterparts in G, but are different. This means - * that the nodes (edges) in the copy have undefined behaviour if - * used within a @ref node_map (@ref edge_map ) of the original graph. - * - * @param G graph - */ - graph (const graph& G); - - /** - * Makes new graph isomorphic to the subgraph induced by nodes. - * The same restriction as for the ordinary copy constructor applies to - * this one. - * - * @param G graph - * @param nodes nodes of G, which form - * the induced subgraph this graph will be isomorphic to. - */ - graph(const graph& G, const nodes_t& nodes); - - /** - * Makes new graph isomorphic to the subgraph induced by the nodes - * in the range from it to end - * The same restriction as for the ordinary copy constructor applies to - * this one. - * - * @param G graph - * @param it beginning of nodes - * @param end end of nodes - */ - graph (const graph& G, - nodes_t::const_iterator it, - nodes_t::const_iterator end); - - /** - * Destructor. Deletes all nodes and edges. - */ - virtual ~graph(); - - //================================================== Directed/Undirected - - /** - * Makes graph directed. - */ - void make_directed(); - - /** - * Makes graph undirected. - */ - void make_undirected(); - - //================================================== Tests / Information - - /** - * Test whether the graph is directed. - * - * @return true iff the graph is directed. - */ - bool is_directed() const; - - /** - * Test whether the graph is undirected. - * - * @return true iff the graph is undirected - */ - bool is_undirected() const; - - /** - * Checks if for all edges (v, w) the reverse edge - * (w,v) is present, too. Additionally the reverse of some - * edge e will be stored as rev[e]. If there - * is no reverse edge of e rev[e] will be the - * invalid edge edge(). - * - * @param rev map associating every edge with its - * reverse edge. - * @return true iff every edge has a reverse edge. - */ - bool is_bidirected(GTL::edge_map& rev) const; - - /** - * Test whether the graph is connected - * - * @return true iff the graph is connected - * @see dfs - * @see bfs - */ - bool is_connected() const; - - /** - * Test whether the graph is acyclic - * - * @return true iff the graph contains no cycles - * @see topsort - */ - bool is_acyclic() const; - - /** - * Returns the number of nodes in the graph. - * - * @return number of nodes - */ - int number_of_nodes() const; - - /** - * Returns the number of (visible) edges in the graph - * - * @return number of edges - */ - int number_of_edges() const; - - /** - * Returns a center of the graph which is defined as a node with - * maximum excentricity. - * - * @return one node of the graph center - */ - node center() const; - - //================================================== Creation - - /** - * Adds a new node. - * - * @return new node. - */ - virtual node new_node(); - - /** - * Adds new edge from s to - * t. - * - *

- * Precondition: s,t are valid nodes in this graph. - * - * @param s source of new edge - * @param t target of new edge - * @return new edge. - */ - virtual edge new_edge(GTL::node s, GTL::node t); - - /** - * @internal - */ - virtual edge new_edge(const nodes_t &sources, const nodes_t &targets); - - //================================================== Deletion - - /** - * Deletes node n, and thus all edges incident with - * n. - * - *

- * Precondition: n is a valid visible node - * in this graph - * - * @param n visible node to be deleted - */ - void del_node(GTL::node n); - - /** - * @deprecated - * Deletes all visible nodes, i.e. the hidden ones stay. - */ - void del_all_nodes(); - - /** - * Deletes edge e. - * - *

- * Precondition: e is a valid visible edge - * in this graph. - * - * @param e edge to be deleted - */ - void del_edge(GTL::edge e); - - /** - * @deprecated - * Deletes all visible edges, i.e. the hidden ones stay. - */ - void del_all_edges(); - - /** - * Deletes all nodes and edges, even the hidden ones - */ - void clear(); - - //================================================== Iterators - - /** - * @internal - */ - typedef nodes_t::const_iterator node_iterator; - /** - * @internal - */ - typedef edges_t::const_iterator edge_iterator; - - /** - * Iterate through all nodes in the graph. - * - * @return start for iteration through all nodes in the graph. - */ - node_iterator nodes_begin() const; - - /** - * Iterate through all nodes in the graph. - * - * @return end for iteration through all nodes in the graph. - */ - node_iterator nodes_end() const; - - /** - * Iterate through all edges in the graph. - * - * @return start for iteration through all edges in the graph. - */ - edge_iterator edges_begin() const; - - /** - * Iterate through all edges in the graph. - * - * @return end for iteration through all edges in the graph. - */ - edge_iterator edges_end() const; - - //================================================== get nodes/edges - - /** - * @deprecated - * @return a list of all nodes of the graph - */ - nodes_t all_nodes() const; - - /** - * @deprecated - * @return a list of all edges of the graph - */ - edges_t all_edges() const; - - /** - * @deprecated - */ - node choose_node () const; - - //================================================== Hide / Restore - - /** - * Hides an edge. - * - *

- * Precondition: e is a valid edge in this graph - * - * @param e edge to be hidden - */ - void hide_edge (GTL::edge e); - - /** - * Restores a hidden edge - * - *

- * Precondition: e is a valid edge in this graph - * - * @param e hidden edge - */ - void restore_edge (GTL::edge e); - - /** - * Hides a node. Please note: all the edges incident with - * n will be hidden, too. All these edges are returned - * in a list. - * - *

- * Precondition: n is a valid node in this graph - * - * @param e node to be hidden - * @return list of implicitly hidden, incident edges - */ - edges_t hide_node(GTL::node n); - - /** - * Restores a hidden node. This only restores the node itself. It - * doesn't restore the incident edges, i.e. you will have to restore - * all the edges you get returned when calling @ref graph#hide_node - * yourself. - * - *

- * Precondition: n is a valid node in this graph - * @param n hidden node - */ - void restore_node (GTL::node n); - - /** - * Hides all nodes not contained in subgraph_nodes, i.e. - * (the visible part of) the graph is the induced subgraph with - * respect to the nodes in subgraph_nodes. It is allowed - * to apply this function recursively, i.e. one may call - * induced_subgraph on a graph that is already a induced - * subgraph. - * - * @param subgraph_nodes nodes of subgraph. - * @see graph#restore_graph - */ - void induced_subgraph(nodes_t& subgraph_nodes); - - /** - * Restores all hidden nodes and edges - * This means that, although the nodes - * and edges got hidden at different times, they will be restored all - * together. - * - * @see graph#induced_subgraph - * @see graph#hide_edge - * @see graph#hide_node - */ - void restore_graph (); - - //================================================== Others - - /** - * @deprecated - * inserts for all edges of the graph a reverse edge - * NOTE: this functions does NOT care about existing reverse edges - */ - edges_t insert_reverse_edges(); - - //================================================== I/O - - /** - * Load graph from a file in GML-format. The optional - * parameter preserve_ids controls whether to - * give the nodes the same ids as in the GML file. You can enable this - * for debugging but you should disable it for final releases since - * it may make node_map unecessarily large. - * - * @param filename file in GML-format. - * @param preserve_ids if true all the nodes - * will get the same id as in the GML file. If false (default) - * the nodes will be numbered consecutively beginning with 0. However - * the order of the nodes in the GML file will be preserved. - * @return detailed error description (hopefully GML_OK). For details - * see @ref GML_error#err_num. - */ - - GML_error load(const std::string& filename, bool preserve_ids = false) - { return load (filename.c_str(), preserve_ids); } - - - /** - * Load graph from a file in GML-format. The optional - * parameter preserve_ids controls whether to - * give the nodes the same ids as in the GML file. You can enable this - * for debugging but you should disable it for final releases since - * it may make node_map unecessarily large. - * - * @param filename file in GML-format. - * @param preserve_ids if true all the nodes - * will get the same id as in the GML file. If false (default) - * the nodes will be numbered consecutively beginning with 0. However - * the order of the nodes in the GML file will be preserved. - * @return detailed error description (hopefully GML_OK). For details - * see @ref GML_error#err_num. - */ - - GML_error load (const char* filename, bool preserve_ids = false); - - /** - * Save graph to file filename in GML-format, i.e. - * graph [ node [ id # ] ... edge [ source # target #] ... ] - * - * @param filename - * @return 0 on error 1 otherwise - */ - - int save (const char* filename) const; - - /** - * Saves graph to stream file in GML-format. - * - * @param file output stream defaults to cout. - */ - - void save(std::ostream* file = &std::cout) const; - - //================================================== Node handlers - - /** - * Virtual function called before a new node is created; - * can be redefined in a derived class for customization - * - * @see graph#new_node - */ - virtual void pre_new_node_handler() {} - - /** - * Virtual function called after a new node was created; - * can be redefined in a derived class for customization - * - * @param n created node - * @see graph#new_node - */ - virtual void post_new_node_handler(GTL::node /*n*/) {} - - /** - * Virtual function called before a node is deleted; - * can be redefined in a derived class for customization - * - * @param n node deleted afterwards - * @see graph#del_node - */ - virtual void pre_del_node_handler(GTL::node /*n*/) {} - - /** - * Virtual function called after a node was deleted; - * can be redefined in a derived class for customization - * - * @see graph#del_node - */ - virtual void post_del_node_handler() {} - - /** - * Virtual function called before a node gets hidden; - * can be redefined in a derived class for customization - * - * @param n node to be hidden - * @see graph#hide_node - */ - virtual void pre_hide_node_handler(GTL::node /*n*/) {} - - /** - * Virtual function called after a node got hidden; - * can be redefined in a derived class for customization - * - * @param n hidden node - * @see graph#hide_node - */ - virtual void post_hide_node_handler(GTL::node /*n*/) {} - - /** - * Virtual function called before a node is restored; - * can be redefined in a derived class for customization - * - * @param n node to be restored - * @see graph#restore_node - */ - virtual void pre_restore_node_handler(GTL::node /*n*/) {} - - /** - * Virtual function called after a node was restored; - * can be redefined in a derived class for customization - * - * @param n restored node - * @see graph#restore_node - */ - virtual void post_restore_node_handler(GTL::node /*n*/) {} - - //================================================== Edge handlers - - /** - * Virtual function called before a new edge is inserted; - * can be redefined in a derived class for customization - * - * @param s source of edge created afterwards - * @param t target of edge created afterwards - * @see graph#new_edge - */ - virtual void pre_new_edge_handler(GTL::node /*s*/, GTL::node /*t*/) {} - - /** - * Virtual function called after a new edge was inserted; - * can be redefined in a derived class for customization - * - * @param e created edge - * @see graph#new_edge - */ - virtual void post_new_edge_handler(GTL::edge /*e*/) {} - - /** - * Virtual function called before a edge is deleted; - * can be redefined in a derived class for customization - * - * @param e edge to be deleted - * @see graph#del_edge - */ - virtual void pre_del_edge_handler(GTL::edge /*e*/) {} - - /** - * Virtual function called after a edge was deleted; - * can be redefined in a derived class for customization - * - * @param s source of edge deleted - * @param t target of edge deleted - * @see graph#del_edge - */ - virtual void post_del_edge_handler(GTL::node, GTL::node) {} - - /** - * Virtual function called before a edge gets hidden; - * can be redefined in a derived class for customization - * - * @param e edge to be hidden - * @see graph#hide_edge - */ - virtual void pre_hide_edge_handler(GTL::edge /*e*/) {} - - /** - * Virtual function called after a edge got hidden; - * can be redefined in a derived class for customization - * - * @param e hidden edge - * @see graph#hide_edge - */ - virtual void post_hide_edge_handler(GTL::edge /*e*/) {} - - /** - * Virtual function called before a edge is restored; - * can be redefined in a derived class for customization - * - * @param e edge to be restored - * @see graph#restore_edge - */ - virtual void pre_restore_edge_handler(GTL::edge /*e*/) {} - - /** - * Virtual function called after a edge was restored; - * can be redefined in a derived class for customization - * - * @param e restored edge - * @see graph#restore_edge - */ - virtual void post_restore_edge_handler(GTL::edge /*e*/) {} - - //================================================== Global handlers - - /** - * Virtual function called before performing clear; - * can be redefined in a derived class for customization. - * Please note: Although nodes and edges are deleted - * during @ref graph#clear this is not achieved by calling - * @ref graph#del_node and @ref graph#del_edge, which is why - * the correspondig handler will not be called. - * - * @see graph#clear - */ - virtual void pre_clear_handler() {} - - /** - * Virtual function called after the graph was cleared; - * can be redefined in a derived class for customization - * Please note: Although nodes and edges are deleted - * during @ref graph#clear this is not achieved by calling - * @ref graph#del_node and @ref graph#del_edge, which is why - * the correspondig handler will not be called. - * - * @see graph#clear - */ - virtual void post_clear_handler() {} - - /** - * Virtual function called before performing make_directed - * (only if graph was undirected) - * can be redefined in a derived class for customization - * - * @see graph#make_directed - */ - virtual void pre_make_directed_handler() {} - - /** - * Virtual function called after performing make_directed; - * (only if graph was undirected) - * can be redefined in a derived class for customization - * - * @see graph#make_directed - */ - virtual void post_make_directed_handler() {} - - /** - * Virtual function called before performing make_undirected; - * (only if graph was directed) - * can be redefined in a derived class for customization - * - * @see graph#make_undirected - */ - virtual void pre_make_undirected_handler() {} - - /** - * Virtual function called after performing make_undirected; - * (only if graph was directed) - * can be redefined in a derived class for customization - * - * @see graph#make_undirected - */ - virtual void post_make_undirected_handler() {} - - - //================================================== I/O - Handler - - /** - * Called before writing the graph key to os. This can be - * used to write top-level keys that should appear before the graph in - * the file. - * - * @param os output stream. - * @see graph#save - */ - virtual void pre_graph_save_handler(std::ostream* /*os*/) const { }; - - /** - * Called before the closing bracket of the list belonging to the - * graph key is written. This can be used to write information that - * belong to the graph, and thus should appear within the list - * associated with the graph key. - * - * @param os output stream. - * @see graph#save - */ - virtual void save_graph_info_handler(std::ostream*) const { }; - - /** - * Called before the closing bracket of the list belonging to the key - * of node n is written. This can be used to write - * information belonging to the node n and thus should - * appear within the list associated with this node. - * - * @param os output stream. - * @see graph#save - */ - virtual void save_node_info_handler(std::ostream*, GTL::node) const { }; - - /** - * Called before the closing bracket of the list belonging to the key - * of edge e is written. This can be used to write - * information belonging to the edge e and thus should - * appear within the list associated with this edge. - * - * @param os output stream. - * @see graph#save - */ - virtual void save_edge_info_handler(std::ostream*, GTL::edge) const { }; - - /** - * Called after writing the graph key to os. This can be - * used to write top-level keys that should appear after the graph in - * the file. - * - * @param os output stream. - * @see graph#save - */ - virtual void after_graph_save_handler(std::ostream*) const { }; - - /** - * Called after the graph is completely built. The topmost list - * of key-value-pairs is passed to this handler. NB: This list - * also contains the graph key, which was used to build the graph. - * - * @param list pointer to the list of key-value pairs at - * top level - * @see graph#load - */ - virtual void top_level_key_handler (GML_pair* list); - - /** - * Called after a node is created. The whole list of key-value-pairs - * belonging to this node is passed to this handler together with the - * node itself. - * - * @param n node parsed - * @param list pointer to the list of key-value-pairs of - * this node. - * @see graph#load - */ - virtual void load_node_info_handler (GTL::node n, GML_pair* list ); - - /** - * Called after an edge is created. The whole list of key-value-pairs - * belonging to this edge is passed to this handler together with the - * edge itself. - * - * @param e edge parsed - * @param list pointer to the list of key-value-pairs of - * this edge. - * @see graph#load - */ - virtual void load_edge_info_handler (GTL::edge e, GML_pair* list); - - /** - * Called after the graph is completely built. The whole list for - * the graph key used to build this graph is passed to this handler. - * - * @param list pointer to the list of key-value-pairs of - * the graph. - * @see graph#load - */ - virtual void load_graph_info_handler (GML_pair* list); - -private: - - //================================================== Flags - - mutable bool directed; - - //================================================== Visible Nodes/Edges - - nodes_t nodes; - edges_t edges; - int nodes_count, edges_count; - - //================================================== Hidden Nodes/Edges - - nodes_t hidden_nodes; - edges_t hidden_edges; - int hidden_nodes_count, hidden_edges_count; - - //================================================== Node/edge numbering - - int new_node_id(); - int new_edge_id(); - - //================================================== Copy - - void copy (const graph& G, - nodes_t::const_iterator it, - nodes_t::const_iterator end); - -public: // needs to be public, because template friends are not possible - /** - * @internal - */ - int number_of_ids(GTL::node) const; - - /** - * @internal - */ - int number_of_ids(GTL::edge) const; - -private: - std::list free_node_ids; - std::list free_edge_ids; - int free_node_ids_count, free_edge_ids_count; - - //================================================== utilities - - void del_list(nodes_t &); - void del_list(edges_t &); - - GTL_EXTERN friend std::ostream& operator<< (std::ostream& os, const graph& G); -}; - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// Iteration -//-------------------------------------------------------------------------- - -#define forall_nodes(v,g) GTL_FORALL(v,g,GTL::graph::node_iterator,nodes_) -#define forall_edges(v,g) GTL_FORALL(v,g,GTL::graph::edge_iterator,edges_) - -#endif // GTL_GRAPH_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/maxflow_ff.h b/src/Tracker/graph/GTL/include/GTL/maxflow_ff.h deleted file mode 100644 index 89dd88d78..000000000 --- a/src/Tracker/graph/GTL/include/GTL/maxflow_ff.h +++ /dev/null @@ -1,240 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_ff.h -// -//========================================================================== -// $Id: maxflow_ff.h,v 1.5 2003/01/31 08:15:05 chris Exp $ - -#ifndef GTL_MAXFLOW_FF_H -#define GTL_MAXFLOW_FF_H - -#include -#include -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @short Maximum flow algorithm (Edmonds-Karp). - */ -class GTL_EXTERN maxflow_ff : public algorithm -{ -public: - /** - * Default constructor. Enables only the calculation of - * maximum flow. - * - * @see algorithm#algorithm - */ - maxflow_ff(); - - /** - * Destructor. - * - * @see algorithm#~algorithm - */ - virtual ~maxflow_ff(); - - /** - * Sets capacity of every edge for maximum flow calculation - * where artificial start-node and end_node will be computed - * automatically. - * - * @param edge_capacity capacity of every edge - */ - void set_vars(const edge_map& edge_capacity); - - /** - * Sets capacity of every edge for maximum flow calculation - * - * @param edge_capacity capacity of every edge - * @param net_source start-node - * @param net_target end-node - */ - void set_vars( - const edge_map& edge_capacity, - const node& net_source, - const node& net_target); - - /** - * Checks whether following preconditions are satisfied: - *

    - *
  • @ref maxflow_ff#set_vars has been executed before. - *
  • only edge_capacities >= 0 are applied. - *
  • G is directed. - *
  • G is connected. - *
  • G has at least one edge and two nodes. - *
  • if not applied, start-nodes and end-nodes exists. - *
  • if applied, start-node is not the same node as end-node. - *
- * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR otherwise - * @see algorithm#check - */ - virtual int check(GTL::graph& G); - - /** - * Computes maximum flow of graph G. - * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR otherwise - * @see algorithm#run - */ - int run(GTL::graph& G); - - /** - * Returns the maximum flow of an edge. - * - * @param e edge of a graph G - * @return maximum flow value - */ - double get_max_flow(const edge& e) const; - - /** - * Returns the maximum flow of the whole graph G. - * - * @return maximum flow value - */ - double get_max_flow() const; - - /** - * Returns the remaining free capacity of an edge. - * - * @param e edge of a graph G - * @return remaining capacity - */ - double get_rem_cap(const edge& e) const; - - /** - * Resets maximum flow algorithm, i.e. prepares the - * algorithm to be applied to another graph. - * - * @see algorithm#reset - */ - virtual void reset(); -protected: - /** - * @internal - */ - enum {SP_FOUND = 2, NO_SP_FOUND = 3}; - - /** - * @internal - */ - bool artif_source_target = false; - - /** - * @internal - */ - bool set_vars_executed = false; - - /** - * @internal - */ - double max_graph_flow = 0; - - /** - * @internal - */ - node net_source; - - /** - * @internal - */ - node net_target; - - /** - * @internal edges to remove from G after run - */ - edges_t edges_not_org; - - /** - * @internal original edge or inserted back edge - */ - edge_map edge_org; - - /** - * @internal - */ - edge_map back_edge_exists; - - /** - * @internal every edge knows its back edge - */ - edge_map back_edge; - - /** - * @internal - */ - edge_map edge_capacity; - - /** - * @internal - */ - edge_map edge_max_flow; - - /** - * @internal - */ - void create_artif_source_target(GTL::graph& G); - - /** - * @internal - */ - void prepare_run(const graph& G); - - /** - * @internal - */ - void comp_single_flow(GTL::graph& G, GTL::node_map& last_edge); - - /** - * @internal every node knows its predecessor then - */ - int get_sp(const graph& G, GTL::node_map& last_edge); - - /** - * @internal - */ - int comp_sp( - const graph& G, - std::queue& next_nodes, - node_map& visited, - node_map& last_edge); - - /** - * @internal - */ - double extra_charge(const node_map& last_edge) const; - - /** - * @internal - */ - void create_back_edge(GTL::graph& G, const edge& org_edge); - - /** - * @internal - */ - void comp_max_flow(const graph& G); - - /** - * @internal - */ - void restore_graph(GTL::graph& G); -}; - -__GTL_END_NAMESPACE - -#endif // GTL_MAXFLOW_FF_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/maxflow_pp.h b/src/Tracker/graph/GTL/include/GTL/maxflow_pp.h deleted file mode 100644 index 36311c031..000000000 --- a/src/Tracker/graph/GTL/include/GTL/maxflow_pp.h +++ /dev/null @@ -1,305 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_pp.h -// -//========================================================================== -// $Id: maxflow_pp.h,v 1.5 2003/01/31 08:15:05 chris Exp $ - -#ifndef GTL_MAXFLOW_PP_H -#define GTL_MAXFLOW_PP_H - -#include -#include -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @short Maximum flow algorithm (Malhotra, Kumar, Maheshwari). - */ -class GTL_EXTERN maxflow_pp : public algorithm -{ -public: - /** - * Default constructor. Enables only the calculation of - * maximum flow. - * - * @see algorithm#algorithm - */ - maxflow_pp(); - - /** - * Destructor. - * - * @see algorithm#~algorithm - */ - virtual ~maxflow_pp(); - - /** - * Sets capacity of every edge for maximum flow calculation - * where artificial start-node and end_node will be computed - * automatically. - * - * @param edge_capacity capacity of every edge - */ - void set_vars(const edge_map& edge_capacity); - - /** - * Sets capacity of every edge for maximum flow calculation - * - * @param edge_capacity capacity of every edge - * @param net_source start-node - * @param net_target end-node - */ - void set_vars( - const edge_map& edge_capacity, - const node& net_source, const node& net_target); - - /** - * Checks whether following preconditions are satisfied: - *
    - *
  • @ref maxflow_pp#set_vars has been executed before. - *
  • only edge_capacities >= 0 are applied. - *
  • G is directed. - *
  • G is connected. - *
  • G has at least one edge and two nodes. - *
  • if not applied, start-nodes and end-nodes exists. - *
  • if applied, start-node is not the same node as end-node. - *
- * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR - * otherwise - * @see algorithm#check - */ - virtual int check(GTL::graph& G); - - /** - * Computes maximum flow of graph G. - * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR otherwise - * @see algorithm#run - */ - int run(GTL::graph& G); - - /** - * Returns the maximum flow of an edge. - * - * @param e edge of a graph @c G - * @return maximum flow value - */ - double get_max_flow(const edge& e) const; - - /** - * Returns the maximum flow of the whole graph @c G. - * - * @return maximum flow value - */ - double get_max_flow() const; - - /** - * Returns the remaining free capacity of an edge. - * - * @param e edge of a graph G - * @return remaining capacity - */ - double get_rem_cap(const edge& e) const; - - /** - * Resets maximum flow algorithm, i.e. prepares the - * algorithm to be applied to another graph. - * @see algorithm#reset - */ - virtual void reset(); -protected: - /** - * @internal - */ - enum {TARGET_FROM_SOURCE_REACHABLE = 2, TARGET_FROM_SOURCE_NOT_REACHABLE = 3}; - - /** - * @internal - */ - bool artif_source_target = false; - - /** - * @internal - */ - bool set_vars_executed = false; - - /** - * @internal - */ - double max_graph_flow = 0.; - - /** - * @internal - */ - node net_source; - - /** - * @internal - */ - node net_target; - - /** - * @internal edges to remove from G after run - */ - edges_t edges_not_org; - - /** - * @internal original edge or inserted back edge - */ - edge_map edge_org; - - /** - * @internal - */ - edge_map back_edge_exists; - - /** - * @internal every edge knows its back edge - */ - edge_map back_edge; - - /** - * @internal - */ - edge_map edge_capacity; - - /** - * @internal - */ - edge_map edge_max_flow; - - /** - * @internal - */ - edge_map flow_update; - - /** - * @internal - */ - edges_t full_edges; - - /** - * @internal - */ - nodes_t temp_unvisible_nodes; - - /** - * @internal - */ - edges_t temp_unvisible_edges; - - /** - * @internal - */ - void create_artif_source_target(GTL::graph& G); - - /** - * @internal - */ - void prepare_run(const graph& G); - - /** - * @internal - */ - int leveling(GTL::graph& G); - - /** - * @internal - */ - void hide_unreachable_nodes(GTL::graph& G); - - /** - * @internal - */ - void store_temp_unvisible_edges(const node& cur_node); - - /** - * @internal - */ - void min_throughput_node(const graph& G, GTL::node& min_tp_node, double& min_value); - - /** - * @internal - */ - double comp_min_throughput(const node cur_node) const; - - /** - * @internal every node knows its predecessor then - */ - void get_sp_ahead(const graph& G, const node& start_node, - node_map& last_edge); - - /** - * @internal every node knows its successor then - */ - void get_sp_backwards(const graph& G, const node& start_node, - node_map& prev_edge); - - /** - * @internal - */ - void push(GTL::graph& G, const node& start_node, const double flow_value); - - /** - * @internal - */ - void pull(GTL::graph& G, const node& start_node, const double flow_value); - - /** - * @internal - */ - void comp_rem_net(GTL::graph& G); - - /** - * @internal - */ - void single_edge_update(GTL::graph& G, GTL::edge cur_edge); - - /** - * @internal - */ - double extra_charge_ahead(const node& start_node, const - node_map& last_edge) const; - - /** - * @internal - */ - double extra_charge_backwards(const node& start_node, - const node_map& prev_edge) const; - - /** - * @internal - */ - void create_back_edge(GTL::graph& G, const edge& org_edge); - - /** - * @internal - */ - void comp_max_flow(const graph& G); - - /** - * @internal - */ - void restore_graph(GTL::graph& G); -private: -}; - -__GTL_END_NAMESPACE - -#endif // GTL_MAXFLOW_PP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/maxflow_sap.h b/src/Tracker/graph/GTL/include/GTL/maxflow_sap.h deleted file mode 100644 index b18b662bc..000000000 --- a/src/Tracker/graph/GTL/include/GTL/maxflow_sap.h +++ /dev/null @@ -1,268 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_sap.h -// -//========================================================================== -// $Id: maxflow_sap.h,v 1.4 2003/01/31 08:15:05 chris Exp $ - -#ifndef GTL_MAXFLOW_SAP_H -#define GTL_MAXFLOW_SAP_H - -#include -#include -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @short Maximum flow algorithm with shortest augmenting paths - * - * This class implements a maximum flow algorithm with shortest augmenting - * paths due to Ahuja and Orlin. - * - *

In the case V is the set of vertices and E is the set of edges of - * the graph, the algorithm needs O(|V| * |V| * |E|) time to proceed. - * - * @see maxflow_ff - * @see maxflow_pp - */ -class GTL_EXTERN maxflow_sap : public algorithm -{ -public: - /** - * Default constructor. Enables only the calculation of - * maximum flow. - * - * @see algorithm#algorithm - */ - maxflow_sap(); - - /** - * Destructor. - * - * @see algorithm#~algorithm - */ - virtual ~maxflow_sap(); - - /** - * Sets capacity of every edge for maximum flow calculation - * where artificial start-node and end_node will be computed - * automatically. - * - * @param edge_capacity capacity of every edge - */ - void set_vars(const edge_map& edge_capacity); - - /** - * Sets capacity of every edge for maximum flow calculation - * - * @param edge_capacity capacity of every edge - * @param net_source start-node - * @param net_target end-node - */ - void set_vars(const edge_map& edge_capacity, - const node& net_source, - const node& net_target); - - /** - * Checks whether following preconditions are satisfied: - *

    - *
  • @ref maxflow_sap#set_vars has been executed before. - *
  • only edge_capacities >= 0 are applied. - *
  • G is directed. - *
  • G is connected. - *
  • G has at least one edge and two nodes. - *
  • if not applied, start-nodes and end-nodes exists. - *
  • if applied, start-node is not the same node as end-node. - *
- * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR otherwise - * @see algorithm#check - */ - virtual int check(GTL::graph& G); - - /** - * Computes maximum flow of graph G. - * - * @param G graph - * @return algorithm::GTL_OK on success - * algorithm::GTL_ERROR otherwise - * @see algorithm#run - */ - int run(GTL::graph& G); - - /** - * Returns the maximum flow of an edge. - * - * @param e edge of a graph @c G - * @return maximum flow value - */ - double get_max_flow(const edge& e) const; - - /** - * Returns the maximum flow of the whole graph G. - * - * @return maximum flow value - */ - double get_max_flow() const; - - /** - * Returns the remaining free capacity of an edge. - * - * @param e edge of a graph @c G - * @return remaining capacity - */ - double get_rem_cap(const edge& e) const; - - /** - * Resets maximum flow algorithm, i.e. prepares the - * algorithm to be applied to another graph. - * - * @see algorithm#reset - */ - virtual void reset(); -protected: - /** - * @internal - */ - enum {AP_FOUND = 2, NO_AP_FOUND = 3}; - - /** - * @internal - */ - bool artif_source_target = false; - - /** - * @internal - */ - bool set_vars_executed = false; - - /** - * @internal - */ - double max_graph_flow = 0.; - - /** - * @internal - */ - node net_source; - - /** - * @internal - */ - node net_target; - - /** - * @internal edges to remove from G after run - */ - edges_t edges_not_org; - - /** - * @internal - */ - node_map dist_label; - - /** - * @internal original edge or inserted back edge - */ - edge_map edge_org; - - /** - * @internal - */ - edge_map back_edge_exists; - - /** - * @internal every edge knows its back edge - */ - edge_map back_edge; - - /** - * @internal - */ - edge_map edge_capacity; - - /** - * @internal - */ - edge_map edge_max_flow; - - /** - * @internal - */ - void create_artif_source_target(GTL::graph& G); - - /** - * @internal - */ - void prepare_run(const graph& G); - - /** - * @internal - */ - void comp_dist_labels(const graph& G, std::vector& numb); - - /** - * @internal - */ - bool has_an_admissible_arc(const node cur_node); - - /** - * @internal - */ - void advance(GTL::node& cur_node, GTL::node_map& last_edge); - - /** - * @internal - */ - void augment(GTL::graph& G, const node_map& last_edge); - - /** - * @internal - */ - bool retreat(const int number_of_nodes, - node& cur_node, - const node_map& last_edge, - std::vector& numb); - - /** - * @internal - */ - int min_neighbour_label(const int number_of_nodes, - const node cur_node) const; - - /** - * @internal - */ - double free_capacity(const node_map& last_edge) const; - - /** - * @internal - */ - void create_back_edge(GTL::graph& G, const edge& org_edge); - - /** - * @internal - */ - void comp_max_flow(const graph& G); - - /** - * @internal - */ - void restore_graph(GTL::graph& G); -}; - -__GTL_END_NAMESPACE - -#endif // GTL_MAXFLOW_SAP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/min_tree.h b/src/Tracker/graph/GTL/include/GTL/min_tree.h deleted file mode 100644 index 874c580f8..000000000 --- a/src/Tracker/graph/GTL/include/GTL/min_tree.h +++ /dev/null @@ -1,108 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// min_tree.cpp -// -//========================================================================== -// $Id: min_tree.h,v 1.3 2001/06/21 10:55:08 chris Exp $ - -#ifndef GTL_MIN_TREE_H -#define GTL_MIN_TREE_H - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @brief Kruskal's %algorithm for finding minimal spanning tree - * of a %graph. - * - * @author Marcus Raitner - */ -class min_tree: public algorithm { - -public: - - /** - * @brief Constructor - */ - min_tree (); - - /** - * @brief Destructor - */ - virtual ~min_tree () {}; - - /** - * @brief Checks whether %algorithm can be applied. - * - * The %graph must - * - be undirected - * - be connected - * - have more than 2 nodes - * - * Additionally the weights of the edges must have been set - * in advance using min_tree::set_distances. - * - * @param g graph - * @return algorithm::GTL_OK if %algorithm can be applied - * algorithm::GTL_ERROR otherwise. - */ - int check (GTL::graph& g); - - int run (GTL::graph& g); - - virtual void reset (); - - /** - * @brief Sets %edge weights. - * - * Setting of %edge weights must be done before calling - * min_tree::check and min_tree::run. - * - * @param dist %edge weigths. - */ - void set_distances (const edge_map& dist); - - /** - * @brief Edges of minimal spanning tree calculated in the - * last call of min_tree::run. - * - * @return Set of edges of representing the minimal spanning - * tree - */ - std::set get_min_tree(); - - /** - * @brief Weight of minimal spanning tree. - * - * @return weight of minimal spanning tree. - */ - int get_min_tree_length(); - -private: - typedef std::pair TSP_A_VALUE; - - class input_comp { - public: - bool operator()(TSP_A_VALUE x, TSP_A_VALUE y) - { return x.first > y.first;} - }; - - edge_map dist; - int weight; - std::set tree; - bool is_set_distances; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_MIN_TREE_H - - - - - diff --git a/src/Tracker/graph/GTL/include/GTL/ne_map.h b/src/Tracker/graph/GTL/include/GTL/ne_map.h deleted file mode 100644 index c32e201bc..000000000 --- a/src/Tracker/graph/GTL/include/GTL/ne_map.h +++ /dev/null @@ -1,190 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// ne_map.h - common implementation of node_map and edge_map -// -//========================================================================== -// $Id: ne_map.h,v 1.20 2005/06/14 12:22:12 raitner Exp $ - -#ifndef GTL_NE_MAP_H -#define GTL_NE_MAP_H - -#include - -#include -#include - -//-------------------------------------------------------------------------- -// Class declaration -//-------------------------------------------------------------------------- - -__GTL_BEGIN_NAMESPACE - -/** - * @short Baseclass for node_map and edge_map - * - * ne_map is the common implementation of @ref node_map - * and @ref edge_map and cannot be used directly. - */ - -template > -class ne_map -{ -protected: - - //================================================== Constructors - - /** - * Constructs an empty ne_map not associated to any - * graph. - */ - ne_map(); - - /** - * Constructs a ne_map associated to the - * graph g. The value associated to each key is set - * to def. - * You may (but need not) call - * ne_map::init(const graph &, T) to associate it to - * a graph. - * - * @param g associated graph - * @param def default value - */ - explicit ne_map(const Graph &g, Value def=Value()); - - //================================================== Operations - -public: - - /** - * Initializes the ne_map to hold information for the elements - * of graph g. def is the value associated with all elements. - * - * @param g associated graph - * @param def default value - */ - void init(const Graph &, Value def=Value()); - - /** - * @internal - */ -#if defined(__GTL_MSVCC) && _MSC_VER < 1310 - typedef Value& value_reference; -#else - typedef typename std::vector::reference value_reference; -#endif - - /** - * @internal - */ -#if defined(__GTL_MSVCC) && _MSC_VER < 1310 - typedef const Value& const_value_reference; -#else - typedef typename std::vector::const_reference const_value_reference; -#endif - - /** - * Read/write accessor function to the value associated with - * key. - * Use this function to change the value of an element in the - * ne_map. Assume that ne is a - * ne_map<int>. Then you can assign the value - * 5 to key with: - *
-     *   ne[key] = 5;
-     * 
- * - * If there is no entry in the ne_map associated - * with key, one is created. - * - * @param key Key of the Entry to change - * @return a reference to the value associated to key. - */ - value_reference operator[](Key key); - - /** - * Read-only accessor function to the value associated with - * key. - * Use this function to read the value of an element in the - * ne_map. Assume that ne is a - * ne_map<int>. Then you can print the value - * associated with key with: - *
-     *   cout << ne[key];
-     * 
- * - * @param key Key of the Entry to look up - * @return a const reference to the value associated to - * key. - */ - const_value_reference operator[](Key key) const; - - /** - * Erases a elements of this nodemap - */ - void clear (); - - //================================================== Implementation - -private: - std::vector data; -}; - -// Implementation Begin - -template - ne_map::ne_map() -{ -} - -template -ne_map::ne_map(const Graph &g, Value t2) : - data(g.number_of_ids(Key()), t2) -{ -} - -template -void ne_map::init(const Graph &g, Value t2) -{ - int n = g.number_of_ids(Key()); - data.resize(n); - fill_n(data.begin(), n, t2); -} - -template -typename ne_map::value_reference ne_map::operator[](Key t1) -{ - if(t1.id() >= (signed)data.size()) - { - if (t1.id() >= (signed)data.capacity()) { - data.reserve((6 * t1.id()) / 5 + 1); - } - - data.insert(data.end(), t1.id()+1-data.size(), Value()); - } - return data.operator[](t1.id()); -} - -template -typename ne_map::const_value_reference ne_map::operator[](Key t1) const -{ - assert(t1.id() < (signed)data.size()); - return data.operator[](t1.id()); -} - -template -void ne_map::clear () -{ - data.clear(); -} - -// Implementation End - -__GTL_END_NAMESPACE - -#endif // GTL_NE_MAP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/node.h b/src/Tracker/graph/GTL/include/GTL/node.h deleted file mode 100644 index 00896cf8a..000000000 --- a/src/Tracker/graph/GTL/include/GTL/node.h +++ /dev/null @@ -1,345 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// node.h -// -//========================================================================== -// $Id: node.h,v 1.20 2003/11/27 13:36:56 raitner Exp $ - -#ifndef GTL_NODE_H -#define GTL_NODE_H - -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// For MSVC 5.0 node.h has to be included before {node,edge}_data.h. -// So we only declare node_data here -//-------------------------------------------------------------------------- - -class node_data; - -//-------------------------------------------------------------------------- -// The first alternative is correct. The second one is a workaround -// for compilers that don't support namespaces and use the SGI STL -// (i.e. gcc/egcs) -//-------------------------------------------------------------------------- - -#ifdef __GTL_USE_NAMESPACES - -class node; -typedef std::iterator bi_iter_edge; -typedef std::iterator bi_iter_node; - -#else - -class node; -typedef bidirectional_iterator bi_iter_edge; -typedef bidirectional_iterator bi_iter_node; - -#endif // __GTL_USE_NAMESPACES - -//-------------------------------------------------------------------------- -// nodes -//-------------------------------------------------------------------------- - -/** - * @short A node in a graph - */ -class GTL_EXTERN node -{ -public: - /** - * Default constructor. Creates an invalid node. - * The only way to obtain a valid node is through @ref - * graph#new_node Example: - *
-     *   graph g;
-     *   node n;
-     *
-     *   n = g.new_node();
-     * 
- * - * @see graph#new_node - */ - node(); - - /** - * Returns the degree of the node, i. e. - * @ref node#outdeg + @ref node#indeg . - */ - int degree() const; - - /** - * Returns the out degree of the node, i. e. the number of outgoing edges. - */ - int outdeg() const; - - /** - * Returns the in degree of the node, i. e. the number of incoming edges. - */ - int indeg() const; - - /** - * @internal - */ - int id() const; - - /** - * Returns the node on the opposite side of e. - * - * @param e an edge incident to the node - */ - const node& opposite(GTL::edge e) const; - - /** - * @internal - */ - nodes_t opposites(GTL::edge) const; - - /** - * Returns true iff node is hidden. - * - * @return true iff node is hidden. - * @see graph#hide_edge - * @see graph#restore_edge - */ - bool is_hidden () const; - - /** - * Returns the excentricity of the node, i.e. the maximum graph-theoretic - * distance to another node - * - * @return excentricity of node. - */ - int excentricity() const; - - //================================================== Iterator types - - /** - * @internal - */ - typedef edges_t::const_iterator in_edges_iterator; - /** - * @internal - */ - typedef edges_t::const_iterator out_edges_iterator; - - /** - * @internal - */ - class inout_edges_iterator; - - /** - * @internal - */ - class adj_nodes_iterator; - - /** - * @internal - */ - class adj_edges_iterator; - - //================================================== Iterators - - /** - * Iterate through all adjacent nodes. - * - * @return start for iteration through all adjacent nodes - */ - adj_nodes_iterator adj_nodes_begin() const; - - /** - * Iterate through all adjacent nodes. - * - * @return end for iteration through all adjacent nodes - */ - adj_nodes_iterator adj_nodes_end() const; - - /** - * Iterate through all adjacent edges. - * - * @return start for iteration through all adjacent edges - */ - adj_edges_iterator adj_edges_begin() const; - - /** - * Iterate through all adjacent edges. - * - * @return end for iteration through all adjacent edges - */ - adj_edges_iterator adj_edges_end() const; - - /** - * Iterate through all incoming edges. - * - * @return start for iteration through all incoming edges - */ - in_edges_iterator in_edges_begin() const; - - /** - * Iterate through all incoming edges. - * - * @return end for iteration through all incoming edges - */ - in_edges_iterator in_edges_end() const; - - /** - * Iterate through all outgoing edges. - * - * @return start for iteration through all outgoing edges - */ - out_edges_iterator out_edges_begin() const; - - /** - * Iterate through all outgoing edges. - * - * @return end for iteration through all outgoing edges - */ - out_edges_iterator out_edges_end() const; - - /** - * Iterate through all incoming and outgoing edges. - * - * @return start for iteration through all incoming and outgoing edges - */ - inout_edges_iterator inout_edges_begin() const; - - /** - * Iterate through all incoming and outgoing edges. - * - * @return end for iteration through all incoming and outgoing edges - */ - inout_edges_iterator inout_edges_end() const; - - //================================================== Implementation - -private: - node_data *data; - - bool is_directed() const; - bool is_undirected() const; - - friend class graph; - friend class edge; - friend class adj_edges_iterator; - - GTL_EXTERN friend bool operator==(GTL::node, GTL::node); - GTL_EXTERN friend bool operator!=(GTL::node, GTL::node); - GTL_EXTERN friend bool operator<(GTL::node, GTL::node); - GTL_EXTERN friend std::ostream& operator<< (std::ostream& os, const node& n); -}; - -/** - * @short Iterator for adjacent edges of a node - */ -class GTL_EXTERN node::adj_edges_iterator : public bi_iter_edge -{ -public: - - // constructor - adj_edges_iterator(); - adj_edges_iterator(GTL::node, bool); - - // comparibility - bool operator==(const adj_edges_iterator&) const; - bool operator!=(const adj_edges_iterator&) const; - - // operators - adj_edges_iterator &operator++(); - adj_edges_iterator operator++(int); - adj_edges_iterator &operator--(); - adj_edges_iterator operator--(int); - - // dereferencing - const edge& operator*() const; - const edge* operator->() const; - -private: - in_edges_iterator akt_edge[2], last_edge[2], begin_edge[2]; - int inout; // in=0, out=1 - bool directed; // graph directed ?? -}; - -/** - * @short Iterator for all incident edges of a node - */ -class GTL_EXTERN node::inout_edges_iterator : public bi_iter_edge -{ -public: - - // constructor - inout_edges_iterator(); - inout_edges_iterator(GTL::node n, bool start); - - // comparibility - bool operator==(const inout_edges_iterator&) const; - bool operator!=(const inout_edges_iterator&) const; - - // operators - inout_edges_iterator &operator++(); - inout_edges_iterator operator++(int); - inout_edges_iterator &operator--(); - inout_edges_iterator operator--(int); - - // dereferencing - const edge& operator*() const; - const edge* operator->() const; - -private: - in_edges_iterator akt_edge[2], last_edge, begin_edge; - int inout; // in=0, out=1 -}; - -/** - * @short Iterator for adjacent nodes of a node - */ -class GTL_EXTERN node::adj_nodes_iterator : public bi_iter_node -{ -public: - - // constructor - adj_nodes_iterator(); - adj_nodes_iterator(const node&, bool); - - // comparibility - bool operator==(const adj_nodes_iterator&) const; - bool operator!=(const adj_nodes_iterator&) const; - - // operators - adj_nodes_iterator &operator++(); - adj_nodes_iterator operator++(int); - adj_nodes_iterator &operator--(); - adj_nodes_iterator operator--(int); - - // dereferencing - const node& operator*() const; - const node* operator->() const; - -private: - adj_edges_iterator akt_edge; - node int_node; -}; - - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// Iteration -//-------------------------------------------------------------------------- - -// #define forall_adj_nodes(v,w) GTL_FORALL(v,w,node::adj_nodes_iterator,adj_nodes_) -#define forall_out_edges(e,v) GTL_FORALL(e,v,GTL::node::out_edges_iterator,out_edges_) -#define forall_in_edges(e,v) GTL_FORALL(e,v,GTL::node::in_edges_iterator,in_edges_) -#define forall_inout_edges(e,v) GTL_FORALL(e,v,GTL::node::inout_edges_iterator,inout_edges_) -#define forall_adj_edges(e,v) GTL_FORALL(e,v,GTL::node::adj_edges_iterator,adj_edges_) - -#endif // GTL_NODE_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/node_data.h b/src/Tracker/graph/GTL/include/GTL/node_data.h deleted file mode 100644 index 251316a09..000000000 --- a/src/Tracker/graph/GTL/include/GTL/node_data.h +++ /dev/null @@ -1,42 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// node_data.h - Internal header: DO NO USE IT DIRECTLY !!! -// -//========================================================================== -// $Id: node_data.h,v 1.7 2000/01/05 16:32:38 raitner Exp $ - -#ifndef GTL_NODE_DATA_H -#define GTL_NODE_DATA_H - -#include -#include -#include - -#include - -__GTL_BEGIN_NAMESPACE - -class graph; - -/** - * @internal - */ -class GTL_EXTERN node_data -{ -public: - int id; // internal numbering - graph *owner; // graph containing this node - nodes_t::iterator pos; // position in the list of all nodes - edges_t edges[2]; // edges incident to this node - // edges[0] = in_edges, edges[1] = out_edges - bool hidden; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_NODE_DATA_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/node_map.h b/src/Tracker/graph/GTL/include/GTL/node_map.h deleted file mode 100644 index 07fe8f489..000000000 --- a/src/Tracker/graph/GTL/include/GTL/node_map.h +++ /dev/null @@ -1,81 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// node_map.h -// -//========================================================================== -// $Id: node_map.h,v 1.8 2005/06/14 12:22:12 raitner Exp $ - -#ifndef GTL_NODE_MAP_H -#define GTL_NODE_MAP_H - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -class graph; - -/** - * @short A specialized map with nodes as keys - * - * A node_map is a specialized and optimized map - * implementation with nodes as keys. Using a node_map is - * the standard way to attach user defined information to - * the nodes of a graph. - * - * An example of usage: - *
- *   graph g;
- *
- *   node v1 = g.new_node();
- *   node v2 = g.new_node();
- *
- *   node_map<string> label(g, "Default Label");
- *
- *   label[v1] = "v1";
- *   label[v2] = "v2";
- *
- *   assert(label[v1] != label[v2]);
- * 
- * - * The nodes used as keys for a node_map MUST be nodes - * of the same graph. If you want to use nodes from different graphs, use - * a map<node,T> instead. A graph and a copy of it are - * considered to be different. - * - * Most of the functionality of node_map is inherited from - * @ref ne_map. - * - * @see edge_map - */ -template > -class node_map : public ne_map -{ -public: - - /** - * Constructs an empty node_map not associated with any - * graph. You may (but need not) call - * ne_map::init(const graph &, T) to associate it to - * a graph. - */ - node_map() : ne_map() {}; - - /** - * Constructs a node_map associated to the graph - * g. - * The value associated to each node in g is set to - * t. - */ - explicit node_map(const graph &g, T t=T()) : ne_map(g,t) {}; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_NODE_MAP_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/planarity.h b/src/Tracker/graph/GTL/include/GTL/planarity.h deleted file mode 100644 index ce4b29a7f..000000000 --- a/src/Tracker/graph/GTL/include/GTL/planarity.h +++ /dev/null @@ -1,620 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// planarity.h -// -//========================================================================== -// $Id: planarity.h,v 1.22 2008/02/03 18:17:08 chris Exp $ - -#ifndef PLANARITY_H -#define PLANARITY_H - -#include -#include -#include -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2008/02/03 18:17:08 $ - * $Revision: 1.22 $ - * - * @brief Tests if a %graph can be drawn on a plane without any %edge - * crossings - * - * This class implements the Lempel-Even-Cederbaum %planarity test using - * PQ-trees. In case the %graph is planar a planar embedding is obtained, - * i.e. for each %node in the %graph an ordered adjacency list is calculated, - * such that there exists a planar drawing in which all adjacent edges - * around a %node apply to this order. - * - * If the %graph is not planar Kuratowski's famous theorem states that it - * must contain a subgraph hoemeomorphic to either K5 (the complete %graph - * with five nodes) or K3,3 (the complete bipartite %graph with three nodes - * each side). In this case the nodes and edges of the tested %graph that - * form either of these two are calculated. - * - * In case the %graph is planar and has @f$N@f$ nodes the algorithm needs - * @f$\mathcal{O}(N)@f$ time for the test (including the planar embedding). - * In case the %graph isn't planar it needs at most @f$\mathcal{O}(E)@f$ - * time if @f$E@f$ is the number of edges for both the test and the - * detection of K5 or K3,3. - */ -class GTL_EXTERN planarity : public algorithm -{ -public: - /** - * @brief Creates an object of the planarity test %algorithm. - * - * @sa algorithm - */ - planarity(); - - /** - * @brief Destructor - */ - ~planarity(); - - /** - * @brief Checks whether planarity test can be applied to @p G. - * - * This should return always @c GTL_OK. There aren't any - * restrictions on @p G, even multiple edges and selfloops - * are tolerated. - * - * @note Selfloops and multiple edges will not be added to - * the planar embedding. planar_embedding::selfloops and - * planar_embedding::multiple_edges can be used to get - * these. - * - * @param G arbitrary %graph - * - * @retval GTL_OK if %planarity test can be applied - * @retval GTL_ERROR if not - * - * @sa algorithm#check - */ - int check(GTL::graph& G); - - /** - * @brief Runs planarity test on @p G. - * - * This should return always @c GTL_OK. The return value only - * tracks errors that might occur, it is definitly @em not - * the result of the test itself. The result of the test is - * stored in a member variable and can be accessed via - * #is_planar. - * - * @param G arbitrary %graph - * - * @retval GTL_OK if %planarity test was sucessfully applied - * @retval GTL_ERROR if not - * - * @sa algorithm::run - */ - int run(GTL::graph& G); - - /** - * @brief Resets algorithm object, such that it can be applied to - * another graph. - * - * @sa algorithm::reset - */ - void reset(); - - /** - * @brief If @p p is true a planar embedding will be calculated in - * the next run. - * - * @param p @c true iff embedding should be calculated - * - * @sa #get_embedding - * @sa planar_embedding - */ - void calc_embedding(bool p) - { - emp = p; - if (!emp) kup = false; - } - - /** - * @brief Returns true if a planar embedding will be calculated in - * the next run. - * - * @retval true iff embedding will be calculated - * - * @sa #get_embedding - * @sa planar_embedding - */ - bool calc_embedding () const - { return emp; } - - /** - * @brief If @p p is true the obstructions to %planarity will be - * calculated in the next %run. - * - * This implies the calculation of an embedding. - * - * @param p @c true iff obstructions to %planarity should be calculated - * - * @sa #get_obstruction_edges - * @sa #get_obstruction_nodes - */ - void calc_obstruction(bool p) - { - kup = p; - if (kup) emp = true; - } - - /** - * @brief Returns true if the obstructions to %planarity will be - * calculated in the next %run. - * - * @retval true iff obstructions to %planarity will be calculated - * - * @sa #get_obstruction_edges - * @sa #get_obstruction_nodes - */ - bool calc_obstruction() const - { - return kup; - } - - /** - * @brief Determines the strategy used to test a graph which is not - * biconnected. - * - * If this is enabled the graph will be made biconnected by - * adding some new edges. This is usually faster than testing - * the biconnected components one by one, which is done if - * this option is disabled. By default this is enabled. - * - * @note This is not fully tested, i.e. at the moment this - * feature should be used only for the test without embedding - * or kuratowski graphs. - * - * @param p true iff %graph should be made biconnected - * - * @sa biconnectivity::make_biconnected - */ - void make_biconnected(bool p) - { - bip = p; - } - - /** - * @brief Returns strategy for testing graphs, which are not - * biconnected. - * - * @retval true iff graph will be made biconnected before test - * - * @sa biconnectivity#make_biconnected - */ - bool make_biconnected() const - { - return bip; - } - - /** - * @brief Result of last test. - * - * @retval true iff %graph in last %run was planar. - */ - bool is_planar() const - { - return planar; - } - - /** - * @brief If %graph in last #run was planar a planar embedding is - * calculated during the reductions. This function gives access to it. - * - * @return planar embedding of %graph in last %run - * - * @sa #calc_embedding - */ - planar_embedding& get_embedding() - { - return embedding; - } - - /** - * @brief Returns the edges of a subgraph homeomorphic to - * either K3,3 or K5 if %graph in last %run was not planar. - * - * @return edges of subgraph homeomorphic to either K3,3 or K5 - * - * @sa #get_obstruction_nodes - * @sa #calc_obstruction - */ - edges_t& get_obstruction_edges() - { - return ob_edges; - } - - /** - * @brief Returns the nodes of a subgraph homeomorphic to - * either K3,3 or K5 if %graph in last %run was not planar. - * - * @return nodes of subgraph homeomorphic to either K3,3 or K5 - * - * @sa #get_obstruction_edges - * @sa #calc_obstruction - */ - nodes_t& get_obstruction_nodes() - { - return ob_nodes; - } -private: - /** - * @internal - * Main procedure for planarity test. Assumes @p G to be undirected and - * biconnected. Used to test whether the biconnected components of a - * %graph are planar. - * - * @param G biconnected, undirected graph - * @param em planar embedding (should be empty) - * - * @retval true if @c G is planar - */ - bool run_on_biconnected(GTL::graph& G, planar_embedding& em); - - /** - * @internal - * Adds the embedding for component @c G to the embedding of the whole - * %graph. - * - * @param G biconnected graph - * @param em embedding obtained through testing @p G - */ - void add_to_embedding(GTL::graph& G, planar_embedding& em); - - /** - * @internal - * The so called upward embedding can be obtained from the list of edges - * one gets in the reduction steps of the %algorithm. The only problem - * is that some of these lists may be turned later in the algorithm. - * This procedure corrects the reversions according to the information - * stored in @p dirs. - * - * @param em embedding - * @param st st-numbers of biconnected %graph - * @param dirs direction indicators obtained after each reduction - */ - void correct_embedding(planar_embedding& em, - st_number& st, - node_map >& dirs); - - /** - * @internal - * After the embedding has been corrected by the above procedure, we - * have a so called upward embedding, this means only the edges leading - * to nodes with smaller st-number than itself are in the adjacency list - * for some node. This procedure extends the upward embedding @p em to a - * full embedding. This is a recursive procedure (well basically it's a - * DFS starting at the %node with the highest st-number). - * - * @param n current node (used for recursion) - * @param em embedding (at the beginning an upward embedding) - * @param mark marks used nodes in DFS. - * @param upward_begin marks the beginning of the upward embedding - */ - void extend_embedding( - node n, - planar_embedding& em, - node_map& mark, - node_map::iterator >& upward_begin); - - /** - * @internal - * Make @p G the component specified in @p it by hiding everything not - * in this subgraph. For the sake of efficiency the whole graph is - * hidden at the beginning and then only what is in this component is - * restored. - * - * @param G whole graph; partially hidden - * @param it component to highlight - * - * @sa graph::hide - */ - void switch_to_component(GTL::graph& G, - biconnectivity::component_iterator it); - - /** - * @internal - * Main procedure for detecting K5 or K3,3. Many cases have to be taken - * into account so it is split in a lot of subroutines decribed below. - * - * @param G biconnected graph. - * @param st st-numbers of @p G - * @param act node for which the reduction failed - * @param fail (PQ-) node at which no matching could be applied - * @param failed_at_root @c true iff @p fail is the root of the - * pertinent subtree. - * @param em planar embedding obtained up to the moment the matchings - * stopped - * @param dirs direction indicators obtained up to the moment the - * matchings stopped - * @param PQ tree - */ - void examine_obstruction(GTL::graph& G, - st_number& st, - node act, - pq_node* fail, - bool failed_at_root, - planar_embedding& em, - node_map >& dirs, - pq_tree* PQ); - - /** - * @internal - * Calculates a DFS-tree for the so called bush-form for the node with - * st-number @p stop, i.e. the induced subgraph consisting of all nodes - * with st-number smaller than @p stop and all edges from one of these - * to a higher numbered node lead to a virtual node with that number - * (there may be duplicates). - * - * @param act used in recursion; starts with node numbered 1 - * @param mark marks for DFS; initially for all nodes 0 - * @param st st-numbers for graph - * @param stop lowest st-number of virtual nodes - * @param to_father stores the edge to predecessor of each node - */ - void dfs_bushform(GTL::node act, - node_map& mark, - st_number& st, - int stop, - node_map& to_father); - - - /** - * @internal - * In case the reduction failed at a Q-node the boundary of the - * biconnected component the Q-node represents can be obtained from @p - * em. - * No return value is needed, since all the edges on the boundary are - * added to the obstruction edges (although some of them have to be - * deleted in some cases). - * - * @param n node with lowest st-number in biconnected component - * @param em planar embedding (at least for this component) - */ - void attachment_cycle (GTL::node n, planar_embedding& em); - - /** - * @internal - * Marks all neighbors of leaves in the subtree rooted at @p n. - * In some cases where the reduction fails at a Q-node, which is not the - * root of the pertinent subtree, an adjacent edge of the node for which - * the reduction failed, which does not lead to that component has to be - * found. - * - * @param n root of subtree - * @param mark edges in subtree recieve 1, all other are unchanged. - */ - void mark_all_neighbors_of_leaves (pq_node* act, GTL::node_map& mark); - - /** - * @internal - * Searches one full and one empty leaf beneath @p partial. The join of - * these leaves and the node on the boundary @p v to which @p partial is - * attached is added to the obstruction nodes. All edges that form this - * join are added to the obstruction edges. - * - * @param partial partial %node - * @param mark nodes already used - * @param to_father predecessor relation in DFS tree - * @param v node on the boundary - * @return empty leaf - */ - pq_leaf* run_through_partial(q_node* partial, - node_map& mark, - node_map& to_father, - node v); - - /** - * @internal - * Uses @p to_father to determine an already marked predecessor. - * - * @param act node - * @param mark nodes already used - * @param to_father predecessor relation in DFS tree - * - * @return marked node - */ - node up_until_marked(GTL::node act, - node_map& mark, - node_map& to_father); - - /** - * @internal - * Always uses a adjacent node with higher st-number as predecessor. - * Searches marked predecessor. - * - * @param act node - * @param mark nodes already used - * @param st used to determine predecessor - * - * @return marked node - */ - node up_until_marked(GTL::node act, - node_map& mark, - st_number& st); - - /** - * @internal - * Assumes that @p n is non empty. Searches full leaf beneath @p n. - * - * @param n (PQ-) node - * - * @return full leaf in subtree of @p n - */ - pq_leaf* search_full_leaf (pq_node* n); - - /** - * @internal - * Assumes that @p n is non full. Searches empty leaf beneath @p n. - * - * @param n (PQ-) node - * - * @return empty leaf in subtree of @p n - */ - pq_leaf* search_empty_leaf(pq_node* n); - - /** - * @internal - * Reduction failed at a P-%node, which had at least three pertial - * sons. - * - * @param p_fail P-%node at which reduction failed - * @param act node for which reduction failed - * @param _st st-numbers of graph - * @param to_father predecessors in DFS-tree of bushform - * @param G graph tested - */ - void case_A(p_node* p_fail, - node act, - st_number& _st, - node_map to_father, - graph& G); - - /** - * @internal - * Reduction failed at a P-%node, which isn't the root of the pertinent - * subtree and had at least two partial children. - * - * @param p_fail P-%node at which reduction failed - * @param act node for which reduction failed - * @param _st st-numbers of graph - * @param to_father predecessors in DFS-tree of bushform - * @param G graph tested - */ - void case_B(p_node* p_fail, - node act, - st_number& _st, - node_map to_father, - graph& G); - - /** - * @internal - * Reduction failed at a Q-node, such that there exist children a < b < - * c and a and c are both non-empty and b is non-full. - * - * @param nodes nodes on the boundary of @p q_fail to which the sons a, - * b, c are attached. - * @param leaves leaves in the subtrees of a, b, c. For a and c full - * leaves and an empty one for b. - * @param _st st-numbers of graph - * @param to_father predecessors in DFS-tree of bushform - * @param G graph tested - * @param q_fail Q-node at which reduction failed - */ - void case_C(GTL::node* nodes, - pq_leaf** leaves, - st_number& _st, - node_map to_father, - graph& G, - q_node* q_fail); - - /** - * @internal - * Reduction failed at a non-root Q-node, such that there exist children - * a < b < c and a and c are both non-full and b is non-empty. - * - * @param nodes nodes on the boundary of @p q_fail to which the sons a, - * b, c are attached. - * @param leaves leaves in the subtrees of a, b, c. For a and c full - * leaves and an empty one for b. - * @param _st st-numbers of graph - * @param to_father predecessors in DFS-tree of bushform - * @param G graph tested - * @param q_fail Q-node at which reduction failed - */ - void case_D(GTL::node* nodes, - pq_leaf** leaves, - st_number& _st, - node_map to_father, - graph& G, - q_node* q_fail); - - /** - * @internal - * Reduction failed at a non-root Q-node which has only two children, - * both partial. - * - * @param nodes nodes on the boundary of @p q_fail to which the two - * partial sons are attached. - * @param leaves two leaves in each subtree of a partial son. One full - * other empty. - * @param _st st-numbers of graph - * @param to_father predecessors in DFS-tree of bushform - * @param G graph tested - * @param q_fail Q-node at which reduction failed - */ - void case_E(GTL::node* nodes, - pq_leaf** leaves, - st_number& _st, - node_map to_father, - graph& G, - q_node* q_fail); - -#ifdef _DEBUG - /** - * @internal - */ - void write_bushform(GTL::graph& G, st_number& _st, int k, const char* name, - const node_map& mark, const node_map& to_father); - - /** - * @internal - */ - void write_node(std::ostream& os, int id, int label, int mark); -#endif - - /** - * @internal - */ - edges_t ob_edges; - - /** - * @internal - */ - nodes_t ob_nodes; - - /** - * @internal - */ - planar_embedding embedding; - - /** - * @internal - */ - bool planar = false; - - /** - * @internal - */ - bool emp = false; - - /** - * @internal - */ - bool kup = false; - - /** - * @internal - */ - bool bip = false; -}; - -__GTL_END_NAMESPACE - -#endif // PLANARITY_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/pq_node.h b/src/Tracker/graph/GTL/include/GTL/pq_node.h deleted file mode 100644 index df175cea4..000000000 --- a/src/Tracker/graph/GTL/include/GTL/pq_node.h +++ /dev/null @@ -1,789 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// pq_node.h -// -//========================================================================== -// $Id: pq_node.h,v 1.15 2003/04/03 11:48:26 raitner Exp $ - -#ifndef PQ_NODE_H -#define PQ_NODE_H - -#include -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -class pq_tree; -class p_node; -class q_node; -class pq_leaf; -class direction_indicator; - -/** - * @internal - */ -class GTL_EXTERN pq_node -{ -protected: - /** - * @internal - */ - typedef symlist::iterator iterator; - - /** - * @internal - */ - enum PQ_KIND {P_NODE, Q_NODE, LEAF, DIR}; - - /** - * @internal - */ - enum PQ_MARK {UNMARKED, QUEUED, BLOCKED, UNBLOCKED}; - - /** - * @internal - */ - pq_node (GTL::node n_, int id_) : pert_children(0), - pert_leaves(0), - mark (UNMARKED), - n (n_), - id (id_) - { - } - - /** - * @internal - */ - virtual ~pq_node (); - - /** - * @internal - * Used to identify nodes. - */ - virtual PQ_KIND kind() const = 0; - - /** - * @internal - * Called whenever a son is known to be partial during reduction phase. - */ - virtual void partial(iterator) - { - } - - /** - * @internal - * Called whenever a son is known to be full during reduction phase. - */ - virtual void full(iterator) - { - } - - /** - * @internal - * Used to write a description of this node into a stream. - */ - virtual void write(std::ostream&, int) = 0; - - /** - * @internal - * Reset node for next reduction. - */ - virtual void clear() - { - mark = UNMARKED; - pert_leaves = 0; - pert_children = 0; - } - - // type-casts - - /** - * @internal - * Interface type-cast to P-node. - */ - virtual p_node* P() = 0; - - /** - * @internal - * Interface type-cast to Q-node. - */ - virtual q_node* Q() = 0; - - /** - * @internal - * Interface type-cast to direction indicator. - */ - virtual direction_indicator* D() = 0; - - /** - * @internal - * Interface type-cast to PQ-leaf. - */ - virtual pq_leaf* L() = 0; - - // - // Data used in reductions - // - - /** - * @internal - * Number of pertinent children; is calculated during bubble-up phase - * and gets decreased whenever a pertinent child is matched in reduction - * phase, such that it can be assured that this node is matched @em - * after all its pertinent children were correctly matched. - */ - int pert_children = 0; - - /** - * @internal - * Number of pertinent leaves in the subtree rooted at this node; is - * calculated in the reduction phase and is used to determine the root - * of the pertinent subtree, i.e. the last node for template matchings. - */ - int pert_leaves = 0; - - /** - * @internal - * For Q-nodes it is not acceptable to maintain father pointers for @em - * all sons (cf. Booth, Luecker); fortunatly this isn't neccessary and - * the father pointer is only valid if is_endmost is true. For the sons - * of a Q-node is_endmost is only true for the first and the last son. - * For the sons of P-nodes ths flag is always true. - */ - bool is_endmost = false; - - /** - * @internal - * The main operations on PQ-trees are performed bottom up so each node - * should know its father; Because of complexity issuses this isn't - * always possible and thus father is valid iff is_endmost is true. - */ - pq_node* father = nullptr; - - /** - * @internal - * Describes the role this node plays in the reduction at the moment; - * four states are possible: - * -# @c UNMARKED: node wasn't touched so far - * -# @c BLOCKED: during bubble-up phase this node got queued, but as - * yet it was not possible to get a valid father pointer - * -# @c UNBLOCKED: node was touched during bubble-up and it either had - * a valid father pointer or one could be borrowed from one of its - * siblings - * -# @c QUEUED: node has been put into the queue - */ - PQ_MARK mark; - - /** - * @internal - * List of sons. - */ - symlist sons; - - /** - * @internal - * Position in the list of sons of node's father. - */ - iterator pos; - - /** - * @internal - * Position in the list of nodes to be cleared in reset. Each node - * touched in #bubble-up phase is stored in the list of nodes to be - * cleared. As they get matched in the reduction phase they are cleared - * and deleted from this list. But even if the reduction is successful - * not all nodes touched in the first phase really get matched. - */ - std::list::iterator lpos; - - // - // Application specific data (should become template parameter) - // - - /** - * @internal - * Node of the graph which this PQ-node represents. - */ - node n; - - /** - * @internal - */ - int id = 0; - - /** - * @internal - */ - node up; - - /** - * @internal - */ - int up_id = 0; - - // - // Friends - // - - /** - * @internal - * Allow q_node private access. - */ - friend class q_node; - - /** - * @internal - * Allow p_node private access. - */ - friend class p_node; - - /** - * @internal - * Allow my_pq_tree private access. - */ - friend class pq_tree; - - /** - * @internal - * Allow planarity private access. - */ - friend class planarity; - - /** - * @internal - * Allow operator<< private access. - */ - GTL_EXTERN friend std::ostream& operator<<(std::ostream&, const pq_tree&); -}; - - -/** - * @internal - */ -class GTL_EXTERN p_node : public pq_node -{ -private: - /** - * @internal - */ - p_node(GTL::node, int); - - /** - * @internal - */ - p_node(GTL::node, int, symlist&); - - // - // pq_node interface - // - - /** - * @internal - */ - void partial(iterator); - - /** - * @internal - */ - void full(iterator); - - /** - * @internal - * Determines kind of this %node. - */ - PQ_KIND kind () const - { - return P_NODE; - } - - /** - * @internal - * Print this %node in gml format. - */ - void write(std::ostream&, int); - - /** - * @internal - */ - void clear (); - - // type-casts - - /** - * @internal - * Type-cast to P-node. - */ - p_node* P() - { - return this; - } - - /** - * @internal - * Type-cast to Q-node. - */ - q_node* Q() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to direction indicator. - */ - direction_indicator* D() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to PQ-leaf. - */ - pq_leaf* L() - { - assert(false); - return 0; - } - - // - // Additional - // - - /** - * @internal - * Whenever a child is known to be full, it is moved from the list of - * sons to this list. - */ - symlist full_sons; - - /** - * @internal - * Whenever a child is known to be partial, it is moved from the list of - * sons to this list. - */ - symlist partial_sons; - - /** - * @internal - * Number of children. - */ - int child_count = 0; - - /** - * @internal - * Number of partial children. - */ - int partial_count = 0; - - /** - * @internal - * Number of full children. - */ - int full_count = 0; - - // - // Friends - // - - /** - * @internal - * Allow planarity private access. - */ - friend class planarity; - - /** - * @internal - * Allow pq_tree private access. - */ - friend class pq_tree; - - /** - * @internal - * Allow operator<< private access. - */ - GTL_EXTERN friend std::ostream& operator<<(std::ostream&, const pq_tree&); -}; - - -/** - * @internal - */ -class GTL_EXTERN q_node : public pq_node -{ -private: - /** - * @internal - */ - q_node (GTL::node, int); - - // - // pq_node interface - // - - /** - * @internal - */ - void partial(iterator); - - /** - * @internal - */ - void full(iterator); - - /** - * @internal - * Determines kind of this %node. - */ - PQ_KIND kind() const - { - return Q_NODE; - } - - /** - * @internal - * Print this %node in gml format. - */ - void write(std::ostream&, int); - - /** - * @internal - */ - void clear(); - - // type-casts - - /** - * @internal - * Type-cast to P-node. - */ - p_node* P() - { - assert (false); - return 0; - } - - /** - * @internal - * Type-cast to Q-node. - */ - q_node* Q() - { - return this; - } - - /** - * @internal - * Type-cast to direction indicator. - */ - direction_indicator* D() - { - assert (false); - return 0; - } - - /** - * @internal - * Type-cast to PQ-leaf. - */ - pq_leaf* L() - { - assert (false); - return 0; - } - - // - // Additional - // - - /** - * @internal - * Determines pert_begin and pert_end the first time a full or partial - * child is found. - */ - void pertinent(iterator); - - /** - * @internal - * In #Q2 and #Q3 matchings the sons of partial children have to be - * merged into the list of sons of this node at the partial node's - * position - */ - q_node* merge (iterator); - - /** - * @internal - * @em Depreacted. - */ - void turn (); - - /** - * @internal - * First son full or partial viewed from the beginning of the list of - * pq_node::sons. - */ - iterator pert_begin; - - /** - * @internal - * Last son full or partial; usually this is the last son. - */ - iterator pert_end; - - /** - * @internal - * Positions of the partial nodes among the sons. Normally only two - * partial sons are allowed, but the third one is needed in planarity - * testing. - */ - iterator partial_pos[3]; - - /** - * @internal - * True when all the pertinent children are consecutive; f false - * @a pert_begin lies in one block of pertinent children and @a pert_end - * in another, such that --pert_end is empty and between the - * two blocks. - */ - bool pert_cons = false; - - /** - * @internal - * Number of partial children. - */ - int partial_count = 0; - - /** - * @internal - * Number of full children. - */ - int full_count = 0; - - // - // Friends - // - - /** - * @internal - * Allow planarity private access. - */ - friend class planarity; - - /** - * @internal - * Allow pq_tree private access. - */ - friend class pq_tree; -}; - - -/** - * @internal - */ -class GTL_EXTERN pq_leaf : public pq_node -{ -public: - /** - * @internal - */ - pq_leaf (int, int, GTL::edge, GTL::node); -private: - /** - * @internal - * Determines kind of this %node. - */ - PQ_KIND kind() const - { - return LEAF; - } - - /** - * @internal - * Print this %node in gml format. - */ - void write(std::ostream&, int); - - // type-casts - - /** - * @internal - * Type-cast to P-node. - */ - p_node* P() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to Q-node. - */ - q_node* Q() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to direction indicator. - */ - direction_indicator* D() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to PQ-leaf. - */ - pq_leaf* L() - { - return this; - } - - // - // Additional - // - - /** - * @internal - */ - int other_id; - - /** - * @internal - */ - edge e; - - // - // Friends - // - - /** - * @internal - * Allow planarity private access. - */ - friend class planarity; - - /** - * @internal - * Allow pq_tree private access. - */ - friend class pq_tree; -}; - - -/** - * @internal - */ -class GTL_EXTERN direction_indicator : public pq_node -{ -private: - /** - * @internal - */ - direction_indicator (GTL::node n_, int id_) : pq_node (n_, id_) { }; - - // - // pq_node interface - // - - /** - * @internal - * Determines kind of this %node. - */ - PQ_KIND kind() const - { - return DIR; - } - - /** - * @internal - * Print this %node in gml format. - */ - void write(std::ostream& os, int); - - // type-casts - - /** - * @internal - * Type-cast to P-node. - */ - p_node* P() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to Q-node. - */ - q_node* Q() - { - assert(false); - return 0; - } - - /** - * @internal - * Type-cast to direction indicator. - */ - direction_indicator* D() - { - return this; - } - - /** - * @internal - * Type-cast to PQ-leaf. - */ - pq_leaf* L() - { - assert(false); - return 0; - } - - // - // Additional - // - - /** - * @internal - */ - bool direction = false; - - // - // Friends - // - - /** - * @internal - * Allow planarity private access. - */ - friend class planarity; - - /** - * @internal - * Allow pq_tree private access. - */ - friend class pq_tree; -}; - -__GTL_END_NAMESPACE - -#endif - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/pq_tree.h b/src/Tracker/graph/GTL/include/GTL/pq_tree.h deleted file mode 100644 index d8e66651a..000000000 --- a/src/Tracker/graph/GTL/include/GTL/pq_tree.h +++ /dev/null @@ -1,392 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// pq_tree.h -// -//========================================================================== -// $Id: pq_tree.h,v 1.20 2008/02/03 18:17:08 chris Exp $ - -#ifndef PQ_TREE_H -#define PQ_TREE_H - -#include -#include -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * $Date: 2008/02/03 18:17:08 $ - * $Revision: 1.20 $ - * - * @brief PQ-Trees. - * - */ -class GTL_EXTERN pq_tree -{ -public: - /** - * @internal - */ - typedef symlist sons_list; - - /** - * @internal - */ - typedef symlist::iterator sons_iterator; - - /** - * @brief Creates empty pq_tree. - */ - pq_tree() : root(0), pert_root(0), pseudo(0), fail(0) - { - } - - /** - * @brief Creates a PQ-tree consisting of a single P-node whose - * whose children are the leaves given in list @p le. - * - * @param id st-number of @p n - * @param n node in the %graph to which the P-node refers - * @param le list of children - */ - pq_tree(int id, GTL::node n, const std::list& le); - - /** - * @brief Deletes PQ-tree. - */ - ~pq_tree(); - - /** - * @brief Applies so called template matchings to the tree until either - * all leaves labeled with @c id are consecutive in all equivalent - * trees or until it is recognized that this can't be achieved. - * - * This operation is guaranteed to perform in O(PPT), where - * PPT is the size of the so called @em pruned @em pertinent - * @em subtree, which can be constructed, by cutting away all - * the parts of the PQ-tree, that do not contain a leaf - * labeled with @c id. - * - * @param leaves list of full leaves - * - * @retval true if tree was successfully reduced - * @retval false if reduction failed - */ - bool reduce(std::list& leaves); - - /** - * @brief Replaces all the pertinent parts of the PQ-tree after a - * (successful) reduction by a new P-node, whose children are given in - * @p le. - * - * The edges (in the %graph), represented by the leaves are stored in - * left to right order in @c em[n] They form (up to reversion) - * the so called upward-embedding. A direction indicator representing - * the direction in which the leaves were scanned is added to the sons - * of the root of the pertinent subtree (if neccessary). All direction - * indicators in the pertinent subtree are stored in @p dirs. - * - * @param id st-number of @p n - * @param n node in the %graph to which the new P-node refers - * @param le list of children - * @param em planar embedding - * @param dirs direction indicators in pertinent subtree - */ - void replace_pert(int id, - node n, - const std::list& le, - planar_embedding* em = 0, - std::list* dirs = 0); - - /** - * @brief Scans whole tree from left to right and stores edges (in the - * %graph) represented by the leaves in @p em. - * - * All direction indicators in the tree are stored in @p - * dirs. This is used in %planarity test to get the upward - * %embedding of the last node, because no reduction is - * needed in this case since all leaves are labeled with the - * same number. - * - * @param em planar embedding - * @param dirs direction indicators in tree - */ - void get_frontier(planar_embedding& em, std::list& dirs); - - /** - * @brief After a (successful) reduction @c reset has to be called in - * order to prepare the tree for the next reduction. - */ - void reset (); - - /** - * @brief Returns the (PQ-) node to which none of the - * template matchings were applicable. - * - * @return PQ-node at which the reduction failed - */ - pq_node* get_fail() - { - return fail; - } - - /** - * @brief Returns true iff fail is the root of the - * pertinent subtree. - * - * @retval true iff reduction failed at the root of the - * pertinent subtree. - */ - bool is_fail_root() - { - return failed_at_root; - } - - /** - * @brief Remove a direction indicator among sons of a Q-node. - * Needed for computation of the obstruction set. - * - * @param q_fail the Q-node on which the reduction failed - * @param the position of the direction indicator among the sons - * - * @retval next valid sons iterator - */ - sons_iterator remove_dir_ind(q_node* q_fail, sons_iterator s_it); - - /** - * @brief Checks the structure of the tree. - * - * @note Use this only for debugging since it scans the whole tree, - * which isn't acceptable in terms of performance in most cases. - * - * @retval true iff tree passes checks - */ - bool integrity_check () const; - -// p_node* insert_P (pq_node*, sons_list&); - -// q_node* insert_Q (pq_node*, sons_list&); - -// pq_leaf* insert_leaf (pq_node*); - -// void insert (pq_node*, pq_node*); -private: - /** - * @internal - * Tries to give all the nodes in the pertinent subtree the right father - * pointer. If either all nodes in the pertinent subtree recieved a - * valid father pointer or there was excactly one block of inner nodes - * just below the root of the pertinent subtree, the result is true. If - * @c bubble_up returns false a reduction isn't possible. - * - * @param leaves list of full leaves - * - * @retval true iff bubble-up succeeded - */ - bool bubble_up(std::list& leaves); - - /** - * @internal - * Scans the subtree rooted at @p p and stores edges (in the %graph) - * represented by the leaves in @p em. All direction indicators in the - * subtree are stored in @p dirs. - * - * @param p root of subtree - * @param em planar embedding - * @param dirs direction indicators in subtree - */ - void dfs(pq_node* p, - planar_embedding& em, - std::list& dirs); - - /** - * @internal - * Test whether one of the predecessors of @p le has mark @c BLOCKED. - * Used when bubble-up failed to determine a minimum subtree, whose root - * has inner pertinent children. Minimum in this regard means that no - * descendant of the subtree's root has @c BLOCKED children. - * - * @param le (PQ-)node - * - * @return @c BLOCKED node or @c 0 - */ - pq_node* leads_to_blocked(pq_node* le); - - - /** - * @internal - * Tests wheter @p le leads to @p other, i.e. if @p other is a - * predecessor of @p le. Used to limit the leaves for reduction in case - * that bubble-up failed to the leaves in the minimum subtree, whose - * root has inner pertinent children. - * - * @param le node to be tested - * @param other root of subtree - * - * @retval true iff @p le is in subtree rooted at @p other - */ - bool leads_to(pq_node* le, pq_node* other); - - - /** - * @internal - * In case bubble-up failed a (PQ-)node has to be found which has inner - * children pertinent such that no node in its subtree has inner - * children pertinet. Template matchings then are only performed in this - * subtree. - * - * @param leaves list of full leaves - * - * @return root of the minimum subtree - */ - pq_node* where_bubble_up_failed(std::list& leaves); - - - /** - * @internal - * Tests whether some descendants of @p n are @c BLOCKED. - * - * @param n root for subtree to be checked - * - * @return (PQ-) @c BLOCKED node or @c 0 - */ - pq_node* blocked_in_subtree(pq_node* n); - - - // - // Template Matchings - // - - //---------------------------------------------------------- P-Templates - - /** - * @internal - * Template P1. - */ - bool P1 (p_node* x, bool); - - /** - * @internal - * Template P2. - */ - bool P2 (p_node* x); - - /** - * @internal - * Template P3. - */ - bool P3 (p_node* x); - - /** - * @internal - * Template P4. - */ - bool P4 (p_node* x); - - /** - * @internal - * Template P5. - */ - bool P5 (p_node* x); - - /** - * @internal - * Template P6. - */ - bool P6 (p_node* x); - - //---------------------------------------------------------- Q-Templates - - /** - * @internal - * Template Q1. - */ - bool Q1 (q_node* x, bool); - - /** - * @internal - * Template Q2. - */ - bool Q2 (q_node* x, bool); - - /** - * @internal - * Template Q3. - */ - bool Q3 (q_node* x); - - - // - // Data - // - - /** - * @internal - * List of (PQ-) nodes to be cleared if the reduction stopped now. - */ - std::list clear_me; - - /** - * @internal - * Root of tree. - */ - pq_node* root = nullptr; - - /** - * @internal - * Root of pertinent subtree; defined after succesful reduction. - */ - pq_node* pert_root = nullptr; - - /** - * @internal - * In some cases the root of the pertinent subtree might not be known, - * because it is a Q-node and all its pertinent children are inner. In - * this case for the time of reduction an pseudo node is created as root - * of the pertinent subtree, which gets only the pertinent children as - * sons. - */ - q_node* pseudo = nullptr; - - /** - * @internal - * (PQ-) node for which the reduction failed. - */ - pq_node* fail = nullptr; - - /** - * @internal - * @c true iff reduction failed at the root of the pertinent subtree. - */ - bool failed_at_root = false; - - /** - * @internal - * Number of pertinent leaves for the current reduction; defined after - * bubble-up. - */ - int pert_leaves_count = 0; - - // - // Friends - // - - /** - * @internal - * Allow operator<< private access. - */ - GTL_EXTERN friend std::ostream& operator<< (std::ostream&, const pq_tree&); -}; - -__GTL_END_NAMESPACE - -#endif - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/ratio_cut_partition.h b/src/Tracker/graph/GTL/include/GTL/ratio_cut_partition.h deleted file mode 100644 index 8fc28a336..000000000 --- a/src/Tracker/graph/GTL/include/GTL/ratio_cut_partition.h +++ /dev/null @@ -1,909 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// ratio_cut_partition.h -// -//========================================================================== -// $Id: ratio_cut_partition.h,v 1.8 2003/01/31 08:15:04 chris Exp $ - -#ifndef GTL_RATIO_CUT_PARTITION_H -#define GTL_RATIO_CUT_PARTITION_H - -#include -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - - -/** - * @short Heuristic graph bi-partitioning algorithm (Wei-Cheng). - * - * This class implements a heuristic graph bi-partitioning algorithm using - * the ratio cut method proposed by Y. C. Wei and C. K. Cheng in 1991. - * - *

In the case E is the set of edges of the graph, the algorithm needs - * O(|E|) time to proceed. - * - * @see fm_partition - */ -class GTL_EXTERN ratio_cut_partition : public algorithm -{ -public: - /** - * Return type of @ref ratio_cut_partition#get_side_of_node. - * - * @see ratio_cut_partition#A - * @see ratio_cut_partition#B - */ - typedef int side_type; - - /** - * A means the node is on side A. - * - * @see ratio_cut_partition#side_type - */ - const static side_type A; - - /** - * B means the node is on side B. - * - * @see ratio_cut_partition#side_type - */ - const static side_type B; - - /** - * Fix type of each node (needed with - * @ref ratio_cut_partition#set_vars). - * - * @see ratio_cut_partition#FIXA - * @see ratio_cut_partition#FIXB - * @see ratio_cut_partition#UNFIXED - */ - typedef short int fix_type; - - /** - * FIXA means fix node on side A. - * - * @see ratio_cut_partition#set_vars - */ - const static fix_type FIXA; - - /** - * FIXB means fix node on side B. - * - * @see ratio_cut_partition#fixe_type - */ - const static fix_type FIXB; - - /** - * UNFIXED means node is free. - * - * @see ratio_cut_partition#fixe_type - */ - const static fix_type UNFIXED; - - /** - * Default constructor. - * - * @see algorithm#algorithm - */ - ratio_cut_partition(); - - /** - * Destructor. - * - * @see algorithm#~algorithm - */ - virtual ~ratio_cut_partition(); - - /** - * Sets variables. - * Must be executed before @ref ratio_cut_partition#check! - * source_node and target_node will be - * determined automatically. - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge. - * @see ratio_cut_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight); - - /** - * Sets variables. - * Must be executed before @ref ratio_cut_partition#check! - * In order to get good results, you should take two graph - * theoretically far away nodes as source and target. - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param source_node start-node, remains on side A - * @param target_node end-node, remains on side B - * @see ratio_cut_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, const node source_node, - const node target_node); - - /** - * Sets variables. - * Must be executed before @ref ratio_cut_partition#check! - * In order to get good results, you should take two graph - * theoretically far away nodes as source and target. Additionally - * init_side should nearly be in balance. - * source_node must be on side A in - * init_side and target_node on side B - * respectively. - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param source_node start-node, remains on side A - * @param target_node end-node, remains on side B - * @param init_side initial bi-partitioning - * @see ratio_cut_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, const node source_node, - const node target_node, const node_map& init_side); - - /** - * Sets variables. - * Must be executed before @ref ratio_cut_partition#check! - * In order to get good results, you should take two graph - * theoretically far away nodes as source and target. - * source_node must not be fixed on side B - * . - * target_node must not be fixed on side A - * . - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param source_node start-node, remains on side A - * @param target_node end-node, remains on side B - * @param fixed fixed nodes - * @see ratio_cut_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, const node source_node, - const node target_node, const node_map& fixed); - - /** - * Sets variables. - * Must be executed before @ref ratio_cut_partition#check! - * In order to get good results, you should take two graph - * theoretically far away nodes as source and target. Additionally - * init_side should nearly be in balance. Fixed nodes - * are on their fix side, their initial side is overwritten then. - * source_node must be on side A in init_side - * and target_node on side B respectively. - * source_node must not be fixed on side B - * . - * target_node must not be fixed on side A - * . - * - * @param G undirected graph - * @param node_weight weight of each node - * @param edge_weight weight of each edge - * @param source_node start-node, remains on side A - * @param target_node end-node, remains on side B - * @param init_side initial bi-partitioning - * @param fixed fixed nodes - * @see ratio_cut_partition#check - */ - void set_vars(const graph& G, const node_map& node_weight, - const edge_map& edge_weight, const node source_node, - const node target_node, const node_map& init_side, - const node_map& fixed); - - /** - * Enables the storing of cut-edges. If enabled the list of - * cut-edges can be traversed using @ref - * ratio_cut_partition#cut_edges_iterator. - * - * @param set if true cut_edges will be stored - * @see ratio_cut_partition#cut_edges_begin - * @see ratio_cut_partition#cut_edges_end - */ - void store_cut_edges(const bool set); - - /** - * Enables the storing of nodes on their side. If enabled the nodes - * of each side can be traversed using - * ratio_cut_partition#nodes_of_one_side_iterator. - * - * @param set if true nodes on their side will be stored - * @see ratio_cut_partition#nodes_of_sideA_begin - * @see ratio_cut_partition#nodes_of_sideA_end - * @see ratio_cut_partition#nodes_of_sideB_begin - * @see ratio_cut_partition#nodes_of_sideB_end - */ - void store_nodesAB(const bool set); - - /** - * Checks whether following preconditions are satisfied: - *

    - *
  • One of the @ref ratio_cut_partition#set_vars procedures has - * been executed before. - *
  • graph G is undirected. - *
  • if applied, source_node and target_node - * are 2 distinct nodes with node weights > 0. - *
  • only node_weights >= 0 are applied. - *
  • only edge_weights >= 0 are applied. - *
  • if G has more than 2 nodes, then at least - * two of them have a weight > 0. - *
  • if applied fixed source node, fixed[source_node] - * is FIXA. - *
  • if applied fixed target node, fixed[target_node] - * is FIXB. - *
- * - * @param G graph - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise - * @see ratio_cut_partition#set_vars - * @see algorithm#check - */ - virtual int check(GTL::graph& G); - - /** - * Computes a partitioning of G, that means a division - * of its vertices in two sides ratio_cut_partition::A - * and ratio_cut_partition::B. - * - * @param G graph - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise - * @see algorithm#run - */ - int run(GTL::graph& G); - - /** - * Gets the size of the cut after bi-partitioning. - * - * @return cutsize - */ - int get_cutsize(); - - /** - * Gets the ratio of the cut after bi-partitioning as defined in - * [WeiChe91]. - * - * @return cutratio - */ - double get_cutratio(); - - /** - * Gets side of the node after bi-partitioning. - * - * @param n node of graph G - * @return ratio_cut_partition::A if n - * lies on side A, ratio_cut_partition::B - * otherwise - */ - side_type get_side_of_node(const node& n) const; - - /** - * Gets side of the node after bi-partitioning. - * - * @param n node of graph G - * @return ratio_cut_partition::A if n - * lies on side A, ratio_cut_partition::B - * otherwise - * @see ratio_cut_partition#get_side_of_node - */ - side_type operator [](const node& n) const; - - /** - * Gets the sum of all node weights from nodes on side A - * . - * - * @param G graph - * @return node_weight_on_sideA - */ - int get_weight_on_sideA(const graph& G) const; - - /** - * Gets the sum of all node weights from nodes on side B - * . - * - * @param G graph - * @return node_weight_on_sideB - */ - int get_weight_on_sideB(const graph& G) const; - - /** - * Iterator type for edges which belong to the cut. - */ - typedef edges_t::const_iterator cut_edges_iterator; - - /** - * Iterate through all edges which belong to the cut, that means - * all edges with end-nodes on different sides. - * It is only valid if enabled with @ref - * ratio_cut_partition#store_cut_edges before. - * - * @return start for iteration through all cut edges - */ - cut_edges_iterator cut_edges_begin() const; - - /** - * End-Iterator for iteration through all edges which belong to the - * cut. - * It is only valid if enabled with @ref - * ratio_cut_partition#store_cut_edges before. - * - * @return end for iteration through all cut-edges - */ - cut_edges_iterator cut_edges_end() const; - - /** - * Iterator type for nodes of a side. - */ - typedef nodes_t::const_iterator nodes_of_one_side_iterator; - - /** - * Iterate through all nodes which belong to side A, - * It is only valid if enabled with @ref - * ratio_cut_partition#store_nodesAB before. - * - * @return start for iteration through all nodes on A - */ - nodes_of_one_side_iterator nodes_of_sideA_begin() const; - - /** - * End-Iterator for iteration through all nodes which belong to side - * A, - * It is only valid if enabled with @ref - * ratio_cut_partition#store_nodesAB before. - * - * @return end for iteration through all nodes on A - */ - nodes_of_one_side_iterator nodes_of_sideA_end() const; - - /** - * Iterate through all nodes which belong to side B, - * It is only valid if enabled with @ref - * ratio_cut_partition#store_nodesAB before. - * - * @return start for iteration through all nodes on B - */ - nodes_of_one_side_iterator nodes_of_sideB_begin() const; - - /** - * End-Iterator for iteration through all nodes which belong to side - * B, - * It is only valid if enabled with @ref - * ratio_cut_partition#store_nodesAB before. - * - * @return end for iteration through all nodes on B - */ - nodes_of_one_side_iterator nodes_of_sideB_end() const; - - /** - * Resets ratio_cut_partition, i.e. prepares the algorithm to be - * applied to another graph. - * - * @see algorithm#reset - */ - virtual void reset(); -protected: - /** - * @internal - */ - enum direction_type {LEFT_SHIFT = 2, RIGHT_SHIFT = 3}; - - /** - * @internal - * true, iff user enabled storing of cut-edges with - * @ref ratio_cut_partition#store_cut_edges. - */ - bool enable_cut_edges_storing; - - /** - * @internal - * List of edges which belong to the cut. - */ - edges_t cut_edges; - - /** - * @internal - * true, iff user enabled storing of nodes with @ref - * ratio_cut_partition#store_nodesAB. - */ - bool enable_nodesAB_storing; - - /** - * @internal - * List of nodes which belong to side A. - */ - nodes_t nodesA; - - /** - * @internal - * List of nodes which belong to side A. - */ - nodes_t nodesB; - - /** - * @internal - * Corresponds to s in [WeiChe91]. - */ - node source_node; - - /** - * @internal - * Corresponds to t in [WeiChe91]. - */ - node target_node; - - /** - * @internal - * true, iff user has executed @ref - * ratio_cut_partition#set_vars before @ref ratio_cut_partition# - * check and @ref ratio_cut_partition#run. - */ - bool set_vars_executed; - - /** - * @internal - * true, iff user has provided source_node - * and target_node, false else. - */ - bool provided_st; - - /** - * @internal - * true, iff user has provided init_side - * with @ref ratio_cut_partition#set_vars, false - * otherwise. - */ - bool provided_initial_part; - - /** - * @internal - * true, iff user has provided fixed with - * @ref ratio_cut_partition#set_vars, false otherwise. - */ - bool provided_fix; - - /** - * @internal - * Contains information where a node is fixed. - */ - node_map fixed; - - /** - * @internal - * LEFT if @ref ratio_cut_partition#left_shift_op has - * computed last cut, RIGHT else. - */ - direction_type direction; - - /** - * @internal - * Contains the weight of each node. - * Corresponds to w(v) in [Leng90]. - */ - node_map node_weight; - - /** - * @internal - * Contains the weight of each edge. - * Corresponds to c(e) in [Leng90]. - */ - edge_map edge_weight; - - /** - * @internal - * Contains the maximum weight of an edge in G. - * (maximum of edge_weight[...]) - */ - int max_edge_weight; - - /** - * @internal - * Contains the sum over all vertex weights on side A. - * Corresponds to w(A) in [Leng90]. - */ - int node_weight_on_sideA; - - /** - * @internal - * Contains the sum over all vertex weights on side B. - * Corresponds to w(B) in [Leng90]. - */ - int node_weight_on_sideB; - - /** - * @internal - * Counts nodes on side A. - */ - int nodes_on_sideA; - - /** - * @internal - * Counts nodes on side B. - */ - int nodes_on_sideB; - - /** - * @internal - * Contains information about the current side of a node. - */ - node_map side; - - /** - * @internal - * Corresponds to CELL array in [FidMat82] - */ - node_map position_in_bucket; - - /** - * @internal - * Contains the maximal number of adjacent to a node. - */ - int max_vertex_degree; - - /** - * @internal - * Contains how many nodes an edge has on side A. - */ - edge_map aside; - - /** - * @internal - * Contains how many nodes an edge has on side B. - */ - edge_map bside; - - /** - * @internal - * Contains the unlocked nodes of an edge on side A. - * (max. 2) - */ - edge_map unlockedA; - - /** - * @internal - * Contains the unlocked nodes of an edge on side B. - * (max. 2) - */ - edge_map unlockedB; - - /** - * @internal - * Corresponds to D value in Leng[90]. - */ - node_map gain_value; - - /** - * @internal - * true, iff bucketA is empty. - */ - bool bucketA_empty; - - /** - * @internal - * true, iff bucketB is empty. - */ - bool bucketB_empty; - - /** - * @internal - * Contains the maximum gain value of a node in - * bucketA. - */ - int max_gainA; - - /** - * @internal - * Contains the maximum gain value of a node in - * bucketB. - */ - int max_gainB; - - /** - * @internal - * Like a hash table over the gain_value of each node - * on side A. (open hashing, collisions in gain buckets - * are organized through LIFO lists) - */ - std::vector bucketA; - - /** - * @internal - * Like a hash table over the gain_value of each node - * on side B. (open hashing, collisions in gain buckets - * are organized through LIFO lists) - */ - std::vector bucketB; - - /** - * @internal - * Sum over all edge_costs[e] where edge e is an - * element of the cut. - */ - int cur_cutsize; - - /** - * @internal - * Cut ratio as defined in [WeiChe91]. - */ - double cur_cutratio; - - /** - * @internal - * Fix FIXA nodes on side A and FIXB - * nodes on side B. - */ - void divide_up(const graph& G); - - /** - * @internal - * Makes G connected for the run of this algorithm. - * This is done by introducing edges with weight 0 since Ratio Cut - * works well on connected graphs only. - */ - void make_connected(GTL::graph& G, edges_t& artificial_edges); - - /** - * @internal - * Deletes the edges introduced in @ref ratio_cut_partition# - * make_connected. - */ - void restore(GTL::graph& G, edges_t& artificial_edges); - - /** - * @internal - * Corresponds to phase 1 in [WeiChe91]. - */ - void initialization(const graph& G); - - /** - * @internal - * Initialization of the data structure for each step. - */ - void init_data_structure(const graph& G); - - /** - * @internal - * Computes initial gain_value for each node and inserts it in the - * corresponding bucket data structure. - */ - void init_filling_buckets(const graph& G); - - /** - * @internal - * Compute initial gain of a node on side A. - * @return initial gain_value of a node on side A - */ - int inital_gain_of_node_on_sideA(const node cur_node); - - /** - * @internal - * Compute initial gain of a node on side B. - * @return initial gain_value of a node on side B - */ - int inital_gain_of_node_on_sideB(const node cur_node); - - /** - * @internal - * Computes some maximum variables. - */ - void init_variables(const graph& G); - - /** - * @internal - * Computes max_vertex_degree. - */ - void compute_max_vertex_degree(const graph& G); - - /** - * @internal - * Compute source seed [WeiChe91]. - */ - void determine_source_node(const graph& G); - - /** - * @internal - * Compute target seed [WeiChe91]. - */ - void compute_target_node(const graph& G); - - /** - * @internal - * Corresponds to right shifting operation as defined in [WeiChe91]. - * Moves nodes from side A to B. - */ - void right_shift_op(const graph& G); - - /** - * @internal - * Corresponds to left shifting operation as defined in [WeiChe91]. - * Moves nodes from side B to A. - */ - void left_shift_op(const graph& G); - - /** - * @internal - * Moves max_gain node from side A to - * B. - * @return true if vertex stored in parameter - * moved_node has been found - */ - bool move_vertex_A2B(const graph& G, GTL::node& moved_node); - - /** - * @internal - * Moves max_gain node from side B to A. - * @return true if vertex stored in parameter - * moved_node has been found - */ - bool move_vertex_B2A(const graph& G, GTL::node& moved_node); - - /** - * @internal - * Selects node with highest ratio_gain - */ - node compute_highest_ratio_node(nodes_t node_list); - - /** - * @internal - * Computes cut_ratio. - * @return cut_ratio with cutsize cur_cutsize - * and current side weights node_weight_on_sideA - * and node_weight_on_sideB - */ - double cutratio(); - - /** - * @internal - * Corresponds to r(i) in [WeiChe91]. - * @return ratio gain of a node cur_node on side - * A - */ - double ratio_of_node_A2B(const node cur_node); - - /** - * @internal - * Corresponds to r(i) in [WeiChe91]. - * @return ratio gain of a node cur_node on side - * B - */ - double ratio_of_node_B2A(const node cur_node); - - /** - * @internal - * Transform a range from [-a..+a] to [0..2a]. - * (reverse to @ref ratio_cut_partition#range_up) - */ - inline int range_up(const int gain_value) const; - - /** - * @internal - * Transform a range from [0..2a] to [-a..+a]. - * (reverse to @ref ratio_cut_partition#range_down) - */ - inline int range_down(const int index) const; - - /** - * @internal - * Executed, if cur_node is chosen to move from side - * A to B. - */ - void update_data_structure_A2B(const node cur_node, - const bool init_mode); - - /** - * @internal - * Executed, if cur_node is chosen to move from side - * B to A. - */ - void update_data_structure_B2A(const node cur_node, - const bool init_mode); - - /** - * @internal - * Reorganizes bucketA if a nodes gain of it has been - * changed. - */ - void update_bucketA(const node cur_node, const int old_gain, - const int new_gain, const bool init_mode); - - /** - * @internal - * Reorganizes bucketB if a nodes gain of it has been - * changed. - */ - void update_bucketB(const node cur_node, const int old_gain, - const int new_gain, const bool init_mode); - - /** - * @internal - * Recomputes max_gainA or max_gainB - * respectively. - */ - void update_max_gain(const side_type side); - - /** - * @internal - * Do some garbage collection. - */ - void clean_step(const graph& G); - - /** - * @internal - * Copies side node maps. - */ - void copy_side_node_map(const graph& G, GTL::node_map& dest, - const node_map source) const; - - /** - * @internal - * Corresponds to phase 2 in [WeiChe91]. - */ - void iterative_shifting(const graph& G); - - /** - * @internal - * Corresponds to phase 3 in [WeiChe91]. - */ - void group_swapping(const graph& G); - - /** - * @internal - * Moves nodes in group swapping phase. - * @return true on improvement, false - * else - */ - bool move_manager(const graph& G); - - /** - * @internal - * Moves a single node. - * @return true if vertex stored in parameter - * moved_node has been found - */ - bool move_vertex(const graph& G, GTL::node& moved_node); - - /** - * @internal - * Computes list cut_edges. - */ - void compute_cut_edges(const graph& G); - - /** - * @internal - * Computes lists nodesA and nodesB. - */ - void compute_nodesAB(const graph& G); -private: -#ifdef _DEBUG - /** - * @internal - * Prints content of bucketA with associated gain values. - */ - void print_bucketA(); - - /** - * @internal - * Prints content of bucketB with associated gain values. - */ - void print_bucketB(); -#endif // _DEBUG -}; - -__GTL_END_NAMESPACE - -#endif // GTL_RATIO_CUT_PARTITION_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/st_number.h b/src/Tracker/graph/GTL/include/GTL/st_number.h deleted file mode 100644 index cd8f827d5..000000000 --- a/src/Tracker/graph/GTL/include/GTL/st_number.h +++ /dev/null @@ -1,442 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// st_number.h -// -//========================================================================== -// $Id: st_number.h,v 1.17 2002/12/20 08:26:08 chris Exp $ - -#ifndef GTL_ST_NUMBER_H -#define GTL_ST_NUMBER_H - -#include -#include -#include -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** -* @internal -*/ -class GTL_EXTERN pathfinder -{ -public: - //---------------------------------------------------------- CONSTRUCTOR - - /** - * @internal - */ - pathfinder(const graph& G, GTL::edge st, GTL::node s); - - /** - * @internal - */ - bool is_valid() - { - return is_biconn; - } - - //------------------------------------------------------------- ITERATOR - - /** - * @internal - */ - class const_iterator - { - public: - /** - * @internal - */ - const_iterator(pathfinder& _pf) : pf (_pf) - { - } - - /** - * @internal - */ - const_iterator(pathfinder& _pf, GTL::node n); - - /** - * @internal - */ - const_iterator& operator++(); - /** - * @internal - */ - const_iterator operator++(int); - /** - * @internal - */ - const node& operator*() const - { - return curr; - } - - /** - * @internal - */ - bool operator==(const const_iterator& it) - { - return curr == it.curr; - } - - /** - * @internal - */ - bool operator!=(const const_iterator& it) - { - return curr != it.curr; - } - private: - /** - * @internal - */ - enum iteration_state {END, UP, DOWN}; - - /** - * @internal - */ - iteration_state state; - - /** - * @internal - */ - node curr; - - /** - * @internal - */ - pathfinder& pf; - }; - - /** - * @internal - */ - const_iterator path(GTL::node n) - { - return const_iterator(*this, n); - } - - /** - * @internal - */ - const_iterator end() - { - return const_iterator (*this); - } - -private: - //------------------------------------------------------------ FUNCTIONS - - /** - * @internal - */ - void dfs_sub (GTL::node&, GTL::node&); - - //-------------------------------------------------------------- MEMBERS - - /** - * @internal - */ - node_map dfs_num; - - /** - * @internal - */ - node_map low_num; - - /** - * @internal - */ - node_map tree; - - /** - * @internal - */ - node_map back; - - /** - * @internal - */ - node_map forward; - - /** - * @internal - */ - node_map to_low; - - /** - * @internal - */ - node_map to_father; - - /** - * @internal - */ - typedef std::pair pos_pair; - - /** - * @internal - */ - edge_map pos; - - /** - * @internal - */ - node_map used; - - /** - * @internal - */ - int act_dfs_num; - - /** - * @internal - */ - int new_nodes; - - /** - * @internal - */ - bool is_biconn; - - /** - * @internal - * Allows const_iterator private access. - */ - friend class const_iterator; -}; - -/** - * @brief ST-number algorithm. - * - * Encapsulates the st-number algorithm together with all the data produced - * by it. - *

- * Assigns an integer st[n] to each node @c n of a undirected, - * biconnected graph, such that each node is connected with at least one - * node having a smaller and with at least one having a larger number than - * itself. The only exception to this rule are the endpoints of edge @a st - * connecting nodes @a s (st-number 1) and @c t (highest st-number). - *

- * The following options are supported: - * - #st_edge sets/retrieves the edge that connects the node with the lowest - * number to that with the highest. - * - #s_node sets/retrieves that endpoints of the @a st_edge, which gets - * number 1. - */ -class GTL_EXTERN st_number : public algorithm -{ -public: - /** - * @brief Default constructor. - * Creates st-number object. Please note that there are no reasonable - * default settings for the parameters, i.e. the edge @s st connecting - * the lowest with highest numbers node and which of its endpoints - * should get number 1 (= node @a s) has to be specified always. - */ - st_number() : algorithm() - { - } - - /** - * @brief Destructor - */ - virtual ~st_number() - { - } - - /** - * @brief Sets edge @a st for the next run. - * - * @param e edge @a st - */ - void st_edge(GTL::edge e) - { - st = e; - } - - /** - * @brief Get edge @a st. - * - * @retval edge @a st - */ - edge st_edge() const - { - return st; - } - - /** - * @brief Sets node @a s for next run. - * - * This must be one of the endpoints of edge @a st. This node will get - * st-number 1 and thus the other endpoint will get the highest - * st-number. - * - * @param n node @a s - */ - void s_node(GTL::node n) - { - s = n; - } - - /** - * @brief Get node @a s. - * - * @retval node @a s - */ - node s_node() const - { - return s; - } - - /** - * @brief Returns st-number of node @p n as determined in the last run. - * - * @param n node - * - * @return st-number of @p n - */ - int& operator[](const node& n) - { - return st_num[n]; - } - - /** - * @internal - */ - typedef nodes_t::iterator iterator; - - /** - * @internal - */ - typedef nodes_t::reverse_iterator reverse_iterator; - - /** - * @brief Iteration through the nodes of graph st-numbered in last - * run in st-number order, i.e. from 1 to highest st-number. - * - * @return start of iteration through nodes in st-number order - */ - iterator begin() - { - return st_ord.begin(); - } - - /** - * @brief Iteration through nodes of graph in st-number order. - * - * @return end of iteration through nodes of graph in st-number order - */ - iterator end() - { - return st_ord.end(); - } - - /** - * @brief Iteration through the nodes of graph st-numbered in last run - * in reverse st-number order, i.e. from highest st-number down - * to 1. - * - * @return start of iteration through nodes in reverse st-number order - */ - reverse_iterator rbegin() - { - return st_ord.rbegin(); - } - - /** - * @brief End of iteration through nodes of graph in reverse st-number - * order. - * - * @return end of iteration through nodes in reverse st-number order - */ - reverse_iterator rend() - { - return st_ord.rend(); - } - - - /** - * @brief Checks whether st-number algorithm can be applied to @p G. - * - * Besides from the trivial preconditions that edge @a st and node @a s - * lie in @p G and @a s is really an endpoint of @a st (which isn't - * checked), @p G must be undirected and biconnected. - * @note As for all algorithms in GTL, #check must be called, because it - * might do some initialization. - * - * @param G graph - * - * @retval algorithm::GTL_OK iff st-number algorithm may be applied - * - * @sa algorithm::check - */ - int check(GTL::graph& G); - - - /** - * @brief Runs st-number algorithm on graph @p G. - * - * It is assumed that #check was called previously and returned - * algorithm::GTL_OK. - * - * @param G graph - * - * @return algorithm::GTL_OK iff @p G could be correctly st-numbered - * - * @sa algorithm::run - */ - int run(GTL::graph& G); - - - /** - * @brief Resets algorithm in order to be applied to the next graph. - * - * This will delete most of the information obtained in the last run. - * - * @sa algorithm::reset - */ - void reset() - { - st_ord.erase (st_ord.begin(), st_ord.end()); - } -protected: - /** - * @internal - */ - edge st; - - /** - * @internal - */ - node s; - - /** - * @internal - */ - pathfinder* pf = nullptr; - - /** - * @internal - */ - nodes_t st_ord; - - /** - * @internal - */ - node_map st_num; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_ST_NUMBER_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/symlist.h b/src/Tracker/graph/GTL/include/GTL/symlist.h deleted file mode 100644 index c64f75d92..000000000 --- a/src/Tracker/graph/GTL/include/GTL/symlist.h +++ /dev/null @@ -1,737 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// symlist.h -// -//========================================================================== -// $Id: symlist.h,v 1.17 2002/12/20 08:26:08 chris Exp $ - -#ifndef SYMLIST_H -#define SYMLIST_H - -#include - -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - */ -template -struct symnode -{ - /** - * @internal - */ - symnode() - { - } - - /** - * @internal - */ - symnode(const T& n) : data(n) - { - } - - /** - * @internal - */ - symnode* adj[2]; - - /** - * @internal - */ - T data; -}; - -/** - * @internal - */ -template -struct symlist_iterator -{ - /** - * @internal - */ - typedef symlist_iterator self; - - /** - * @internal - */ - typedef symnode* linktype; - - /** - * @internal - */ - symlist_iterator() : act (0) - { - } - - /** - * @internal - */ - symlist_iterator(const self& it) : act(it.act), dir(it.dir) - { - } - - /** - * @internal - */ - symlist_iterator(linktype _act, int _dir) : act(_act), dir(_dir) - { - } - - /** - * @internal - */ - symlist_iterator(linktype _act, linktype _prev) : - act(_act), - dir (where_not (_act, _prev)) - { - } - - /** - * @internal - */ - self& operator=(const self& it) - { - act = it.act; - dir = it.dir; - return *this; - } - - /** - * @internal - */ - bool operator==(const self& it) const - { - return act == it.act; - } - - /** - * @internal - */ - bool operator!=(const self& it) const - { - return act != it.act; - } - - /** - * @internal - */ - Ref operator*() - { - return act->data; - } - - /** - * @internal - */ - self& operator++(); - - /** - * @internal - */ - self& operator--(); - - /** - * @internal - */ - static int where(linktype _act, linktype _prev) - { - return _prev == _act->adj[0] ? 0 : 1; - } - - /** - * @internal - */ - static int where_not(linktype _act, linktype _prev) - { - return _prev == _act->adj[1] ? 0 : 1; - } - - /** - * @internal - */ - void reverse() - { - dir = 1 - dir; - } - - /** - * @internal - */ - linktype& next() - { - return act->adj[dir]; - } - - /** - * @internal - */ - linktype& prev() - { - return act->adj[1 - dir]; - } - - /** - * @internal - */ - linktype act; - - /** - * @internal - */ - int dir = 0; -}; - -/** - * @brief List which can be reversed in @f$\mathcal{O}(1)@f$. - * - * The problem with the STL class list - as with most doubly linked lists -- - * is that isn't possible to turn it in constant time, because each entry in - * the list contains next and prev pointer and turning the list means to - * switch these two in @em each element in the list. Another point is the - * splice operation in STL lists, which is constant time, but for the same - * reason as mentioned above it is not possible to splice a list in reverse - * order into another in constant time. - *

- * The problems arise from the fact that each element "knows" what its next - * and previous elements are. An element in a symlist only knows what its - * neighbors are, what is next and what previous depends on the direction of - * iteration. This of course imposes some overhead in iteration (one - * if-statement) but allows reversion and a splice in reversed order in - * constant time. - */ -template -class symlist -{ -public: - /** - * @internal - */ - typedef symlist_iterator iterator; - - /** - * @internal - */ - typedef symlist_iterator const_iterator; - - /** - * @brief Creates empty symlist. - */ - symlist() - { - link = new symnode; - link->adj[0] = link->adj[1] = link; - } - - /** - * @brief Makes the created list a copy of @c li. - * - * @param li symlist. - */ - symlist(const symlist& li); - - /** - * @brief Assignes @c li to this list. - * - * @note All elements in this list will be deleted. - * - * @param li - * - * @return this list - */ - symlist& operator=(const symlist& li); - - /** - * @brief Destructor - */ - ~symlist(); - - /** - * @brief Checks whether list is empty. - * - * Takes constant time. - * - * @retval true iff list is empty - */ - bool empty() const - { - return link->adj[0] == link && link->adj[1] == link; - } - - /** - * @brief First element in list. - * - * Assumes that list ins't empty. - * - * @return first element - */ - T& front() - { - return link->adj[0]->data; - } - - /** - * @brief Last element in list. - * - * Assumes that list ins't empty. - * - * @return last element - */ - T& back() - { - return link->adj[1]->data; - } - - /** - * @brief Start iteration through elements of list. - * - * @return start iterator - */ - iterator begin() - { - return ++end(); - } - - /** - * @brief End of iteration through elements of list. - * - * @return end iterator - */ - iterator end() - { - return iterator(link, 0); - } - - /** - * @brief Start iteration through elements of list. - * - * @return start iterator - */ - const_iterator begin() const - { - return ++end(); - } - - /** - * @brief End of iteration through elements of list. - * - * @return end iterator - */ - const_iterator end () const - { - return const_iterator (link, 0); - } - - /** - * @brief Start iteration through element of list in reverse order. - * - * @return start iterator - */ - iterator rbegin() - { - return ++rend(); - } - - /** - * @brief End of iteration through elements of list in reverse order. - * - * @return end iterator - */ - iterator rend() - { - return iterator (link, 1); - } - - /** - * @brief Start iteration through element of list in reverse order. - * - * @return start iterator - */ - const_iterator rbegin() const - { - return ++rend(); - } - - /** - * @brief End of iteration through elements of list in reverse order. - * - * @return end iterator - */ - const_iterator rend() const - { - return const_iterator(link, 1); - } - - /** - * @brief Inserts @p data before @p pos in list. - * - * @param pos position - * @param data element to be inserted - * - * @return position of insertion - */ - iterator insert (iterator pos, const T& data); - - /** - * @brief Inserts the element @p it points to before @p pos into this - * list. - * - * It is assumed that the element @p it refers lies in a different list. - * All iterators to elements in either of the two lists stay valid. - * Takes constant time. - * - * @param pos position - * @param it position of element to be inserted - */ - void splice (iterator pos, iterator it); - - /** - * @brief Inserts the elements [it,end) refers to before @p pos - * into this list. - * - * It is assumed that [it,end) lies in a different - * list. All iterators to elements in either of the two lists stay - * valid. Takes constant time. - * - * @param pos position - * @param it position of first element to be inserted - * @param end position of one-past the last element to be inserted - */ - void splice (iterator pos, iterator it, iterator end); - - /** - * @brief Deletes element at position @p pos from list. - * - * @param pos position to be deleted - * - * @return position of next element - */ - iterator erase (iterator pos); - - /** - * @brief Deletes the elements [it, end) from list. - * - * @param it first position to be deleted - * @param end one-past the last position to be deleted - * - * @return position of next element. - */ - iterator erase (iterator it, iterator end); - - /** - * @internal - */ - void attach_sublist (iterator, iterator); - - /** - * @internal - */ - void detach_sublist (); - - /** - * @brief Change the direction of list. - * - * Takes constant time. - */ - void reverse (); -private: - /** - * @internal - */ - symnode* link; - - /** - * @internal - * - * @note Needed only when used as sublist. - */ - iterator _prev; - - /** - * @internal - * - * @note Needed only when used as sublist. - */ - iterator _next; -}; - - -// Implementation Begin - -template -symlist_iterator& symlist_iterator::operator++() -{ - symnode* prev = act; - act = act->adj[dir]; - dir = where_not(act, prev); - return *this; -} - - -template -symlist_iterator& symlist_iterator::operator--() -{ - symnode* prev = act; - act = act->adj[1 - dir]; - dir = where(act, prev); - return *this; -} - - -template -symlist::symlist (const symlist& l) -{ - link = new symnode; - link->adj[0] = link->adj[1] = link; - - const_iterator it = l.begin(); - const_iterator e = l.end(); - - while (it != e) - { - insert(end(), *it); - ++it; - } -} - - -template -symlist::~symlist() -{ - if (_next == iterator()) - { - erase (begin(), end()); - } - else - { - detach_sublist(); - } - - delete link; -} - - -template -symlist& symlist::operator=(const symlist& l) -{ - erase(begin(), end()); - - const_iterator it = l.begin(); - const_iterator e = l.end(); - - while (it != e) - { - insert(end(), *it); - ++it; - } - - return *this; -} - - -template -symlist_iterator symlist::insert( - symlist_iterator pos, - const T& ins) -{ - iterator prev = pos; - --prev; - symnode* n = new symnode(ins); - n->adj[0] = pos.act; - n->adj[1] = prev.act; - - if (pos == prev) - { - pos = prev; - } - - pos.prev() = n; - prev.next() = n; - - return iterator(n, 0); -} - - -template -void symlist::splice(symlist_iterator pos, - symlist_iterator beg, - symlist_iterator end) -{ - if (beg != end) - { - iterator prev = beg; - --prev; - iterator last = end; - --last; - - // - // The following seems to be rather senseless, but it is required - // since two iterator are equal, iff the point to the same element. - // This implies that they might have different directions. Suppose - // that prev == end is true and they have different directions, - // than prev.next() and end.prev() return the same element !! Thus - // the assignment prev = end corrects this, since the direction - // gets copied, too. - // - if (prev == end) - { - prev = end; - } - - prev.next() = end.act; - end.prev() = prev.act; - - prev = pos; - --prev; - - if (pos == prev) - { - pos = prev; - } - - if (last == beg) - { - last = beg; - } - - prev.next() = beg.act; - beg.prev() = prev.act; - pos.prev() = last.act; - last.next() = pos.act; - } -} - - -template -void symlist::splice(symlist_iterator pos, - symlist_iterator beg) -{ - iterator tmp = beg; - ++tmp; - splice(pos, beg, tmp); -} - - -template -symlist_iterator symlist::erase(symlist_iterator pos) -{ - assert (pos.act != link); - iterator prev = pos; - --prev; - iterator next = pos; - ++next; - - if (next == prev) - { - next = prev; - } - - next.prev() = prev.act; - prev.next() = next.act; - - delete (pos.act); - - return next; -} - -template -symlist_iterator symlist::erase(symlist_iterator beg, - symlist_iterator end) -{ - iterator prev = beg; - --prev; - iterator it = beg; - symnode* act; - - while (it != end) - { - assert (it.act != link); - act = it.act; - ++it; - delete (act); - } - - if (prev == end) - { - prev = end; - } - - end.prev() = prev.act; - prev.next() = end.act; - - return end; -} - - -template -void symlist::attach_sublist(symlist_iterator it, - symlist_iterator end) -{ - assert (empty()); - iterator last = end; - --last; - _prev = it; - --_prev; - _next = end; - - if (it == last) - { - it = last; - } - - link->adj[0] = it.act; - it.prev() = link; - link->adj[1] = last.act; - last.next() = link; -} - - -template -void symlist::detach_sublist() -{ - if (_next != iterator()) - { - iterator it(begin()); - iterator e(end()); - - --e; - - if (e == it) - { - e = it; - } - - _prev.next() = it.act; - it.prev() = _prev.act; - _next.prev() = e.act; - e.next() = _next.act; - link->adj[0] = link->adj[1] = link; - - _next = iterator(); - _prev = iterator(); - } -} - - -template -inline void symlist::reverse() -{ - symnode* tmp = link->adj[0]; - link->adj[0] = link->adj[1]; - link->adj[1] = tmp; -} - -// Implementation End - -__GTL_END_NAMESPACE - -#endif // SYMLIST_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/topsort.h b/src/Tracker/graph/GTL/include/GTL/topsort.h deleted file mode 100644 index 0753df22f..000000000 --- a/src/Tracker/graph/GTL/include/GTL/topsort.h +++ /dev/null @@ -1,143 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// topsort.h -// -//========================================================================== -// $Id: topsort.h,v 1.8 2000/09/11 07:36:43 raitner Exp $ - -#ifndef GTL_TOPSORT -#define GTL_TOPSORT - -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @short Topological sorting. - * - * Assigns to each node n a number top_num such - * that for every edge (u,v) top_num[u] < - * top_num[v], if possible, i.e. iff the directed graph is - * acyclic. - * - *

- * Similar to the testing of biconnectivity, which extends DFS to calculate - * low-numbers, the topsort-algorithm extends DFS to calculate the new - * numbering (and thus to test whether such a numbering is possible). - * - *

- * In order to traverse all the nodes in the order of its top-numbers, a - * new iterator, topsort_iterator is provided. - */ - -class GTL_EXTERN topsort : public dfs -{ -public: - /** - * default constructor; enables scanning of the whole_graph. - * - * @see dfs#dfs - */ - topsort () : dfs () {whole_graph = true; acyclic = true;} - - /** - * Number in topological order. - * - * @param n node. - * @return number in topological order. - */ - int top_num (const node& n) const - { return top_numbers[n]; } - - /** - * Tests if graph was acyclic. - * - * @return true iff graph was acyclic. - */ - bool is_acyclic () const - { return acyclic; } - - /** - * @internal - */ - typedef nodes_t::const_iterator topsort_iterator; - - /** - * Iterate through nodes in topsort-order. - * - * @return start-iterator. - */ - topsort_iterator top_order_begin() const - { return top_order.begin(); } - - /** - * Iterate through nodes in topsort-order. - * - * @return end-iterator. - */ - topsort_iterator top_order_end() const - { return top_order.end(); } - - /** - * Preconditions: - *

    - *
  • G is directed. - *
  • DFS may be applied - *
- * - * @param G graph. - * @return algorithm::GTL_OK if topsort may be applied to - * G. - * @see dfs#check - */ - virtual int check (GTL::graph& G); - - /** - * Reset - * @see dfs#reset - */ - virtual void reset (); - - /** - * @internal - */ - virtual void init_handler (GTL::graph& G); - - /** - * @internal - */ - virtual void leave_handler (GTL::graph&, GTL::node&, GTL::node&); - - /** - * @internal - */ - virtual void old_adj_node_handler (GTL::graph&, GTL::edge&, GTL::node&); - -protected: - /** - * @internal - */ - int act_top_num = 0; - /** - * @internal - */ - node_map top_numbers; - /** - * @internal - */ - nodes_t top_order; - /** - * @internal - */ - bool acyclic; -}; - -__GTL_END_NAMESPACE - -#endif // GTL_TOPSORT - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/version.h b/src/Tracker/graph/GTL/include/GTL/version.h deleted file mode 100644 index 1f3148771..000000000 --- a/src/Tracker/graph/GTL/include/GTL/version.h +++ /dev/null @@ -1,19 +0,0 @@ -//========================================================================== -// -// version.h.in - GTL version -// -//========================================================================== -// $Id: version.h.in,v 1.1 1999/02/18 18:46:59 forster Exp $ - -#ifndef GTL_VERSION_H -#define GTL_VERSION_H - -#define GTL_MAJOR_VERSION "@MAJOR_VERSION@" -#define GTL_MINOR_VERSION "@MINOR_VERSION@" -#define GTL_MINI_VERSION "@MINI_VERSION@" - -#endif // GT_VERSION_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/include/GTL/version.h.in b/src/Tracker/graph/GTL/include/GTL/version.h.in deleted file mode 100644 index 1f3148771..000000000 --- a/src/Tracker/graph/GTL/include/GTL/version.h.in +++ /dev/null @@ -1,19 +0,0 @@ -//========================================================================== -// -// version.h.in - GTL version -// -//========================================================================== -// $Id: version.h.in,v 1.1 1999/02/18 18:46:59 forster Exp $ - -#ifndef GTL_VERSION_H -#define GTL_VERSION_H - -#define GTL_MAJOR_VERSION "@MAJOR_VERSION@" -#define GTL_MINOR_VERSION "@MINOR_VERSION@" -#define GTL_MINI_VERSION "@MINI_VERSION@" - -#endif // GT_VERSION_H - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/bellman_ford.cpp b/src/Tracker/graph/GTL/src/bellman_ford.cpp deleted file mode 100644 index 1f8795ccd..000000000 --- a/src/Tracker/graph/GTL/src/bellman_ford.cpp +++ /dev/null @@ -1,140 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bellman_ford.cpp -// -//========================================================================== -// $Id: bellman_ford.cpp,v 1.4 2003/01/30 17:50:56 raitner Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -bellman_ford::bellman_ford() -{ - vars_set = false; - preds = 0; -} - -bellman_ford::~bellman_ford() -{ - if (preds) delete preds; -} - -void bellman_ford::store_preds (bool set) -{ - if (set && !preds) { - preds = new node_map; - } else if (!set && preds) { - delete preds; - preds = 0; - } -} - - -int bellman_ford::check(GTL::graph& G) -{ - if (!vars_set) - { - return algorithm::GTL_ERROR; - } - - if (G.nodes_begin() == G.nodes_end()) - { - return algorithm::GTL_ERROR; - } - - return algorithm::GTL_OK; -} - -int bellman_ford::run(GTL::graph& G) -{ - if (s == node()) - { - s = *(G.nodes_begin()); - } - - //---------------------------------------------------------------------- - // initialize - //---------------------------------------------------------------------- - - inf.init (G, true); - - if (preds) { - preds->init (G, GTL::edge()); - } - - inf[s] = false; - d[s] = 0; - cycle = false; - - //---------------------------------------------------------------------- - // relax - //---------------------------------------------------------------------- - - graph::edge_iterator it, end; - - for (int i = 1; i < G.number_of_nodes(); ++i) - { - for (it = G.edges_begin(), end = G.edges_end(); it != end; ++it) - { - relax (*it, true); - - if (G.is_undirected()) - { - relax(*it, false); - } - } - } - - //---------------------------------------------------------------------- - // cycle detection - //---------------------------------------------------------------------- - - for (it = G.edges_begin(), end = G.edges_end(); it != end; ++it) - { - node u = it->source(); - node v = it->target(); - - if(!inf[u] && !inf[v]) - { - if (d[v] > d[u] + w[*it]) - { - cycle = true; - } - } - } - - return algorithm::GTL_OK; -} - -void bellman_ford::reset() -{ -} - -void bellman_ford::relax(const edge& e, bool dir ) -{ - node u = e.source(); - node v = e.target(); - - if (!dir) { - node tmp = u; - u = v; - v = tmp; - } - - if (!inf[u] && (inf[v] || (d[v] > d[u] + w[e]))) - { - d[v] = d[u] + w[e]; - inf[v] = false; - - if (preds) - { - (*preds)[v] = e; - } - } -} - - - -__GTL_END_NAMESPACE diff --git a/src/Tracker/graph/GTL/src/bfs.cpp b/src/Tracker/graph/GTL/src/bfs.cpp deleted file mode 100644 index 0e4343c24..000000000 --- a/src/Tracker/graph/GTL/src/bfs.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bfs.cpp -// -//========================================================================== -// $Id: bfs.cpp,v 1.11 2001/11/07 13:58:09 pick Exp $ - -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// Con-/Destructors -//-------------------------------------------------------------------------- - -bfs::bfs () : algorithm () -{ - level_number = 0; - preds = 0; - non_tree = 0; - act_bfs_num = 1; - reached_nodes = 0; - whole_graph = false; -} - -bfs::~bfs () -{ - if (level_number) delete level_number; - if (preds) delete preds; - if (non_tree) delete non_tree; -} - - -//-------------------------------------------------------------------------- -// Parameters -//-------------------------------------------------------------------------- - -void bfs::calc_level (bool set) -{ - if (set && !level_number) { - level_number = new node_map; - } else if (!set && level_number) { - delete level_number; - level_number = 0; - } -} - -void bfs::store_preds (bool set) -{ - if (set && !preds) { - preds = new node_map; - } else if (!set && preds) { - delete preds; - preds = 0; - } -} - -void bfs::store_non_tree_edges (bool set) -{ - if (set && !non_tree) - { - non_tree = new edges_t; - } - else if (!set && non_tree) - { - delete non_tree; - non_tree = 0; - } -} - -//-------------------------------------------------------------------------- -// GTL_Algorithm - Interface -//-------------------------------------------------------------------------- - -void bfs::reset () -{ - act_bfs_num = 1; - tree.erase (tree.begin(), tree.end()); - bfs_order.erase (bfs_order.begin(), bfs_order.end()); - roots.erase (roots.begin(), roots.end()); - reached_nodes = 0; - if (non_tree) { - non_tree->erase (non_tree->begin(), non_tree->end()); - } -} - - -int bfs::run (GTL::graph& G) { - - bfs_number.init (G, 0); - - if (level_number) { - level_number->init (G); - } - - if (preds) { - preds->init (G, GTL::node()); - } - - edge_map *used = 0; - - if (non_tree) { - used = new edge_map (G, 0); - } - - init_handler (G); - - // - // Set start-node - // - - if (start == node()) { - start = G.choose_node(); - } - - new_start_handler (G, start); - - bfs_sub (G, start, used); - - node curr; - - if (whole_graph && reached_nodes < G.number_of_nodes()) { - forall_nodes (curr, G) { - if (bfs_number[curr] == 0) { - - new_start_handler (G, curr); - - bfs_sub (G, curr, used); - } - } - } - - if (non_tree) { - delete used; - } - - end_handler (G); - - return 1; -} - - - -//-------------------------------------------------------------------------- -// PRIVATE -//-------------------------------------------------------------------------- - - -void bfs::bfs_sub (GTL::graph& G, const node& st, GTL::edge_map* used) -{ - qu.push_back (st); - bfs_number[st] = act_bfs_num; - ++act_bfs_num; - - if (level_number) { - (*level_number)[st] = 0; - } - - while (!qu.empty()) { - node tmp = qu.front(); - qu.pop_front(); - ++reached_nodes; - - if (tmp == st) { - roots.push_back (bfs_order.insert (bfs_order.end(), tmp)); - } else { - bfs_order.push_back (tmp); - } - - popped_node_handler (G, tmp); - - node::adj_edges_iterator it = tmp.adj_edges_begin(); - node::adj_edges_iterator end = tmp.adj_edges_end(); - - for (; it != end; ++it) { - edge curr = *it; - node opp = tmp.opposite (curr); - - if (bfs_number[opp] == 0) { - bfs_number[opp] = act_bfs_num; - ++act_bfs_num; - tree.push_back (curr); - - if (non_tree) { - (*used)[curr] = 1; - } - - if (level_number) { - (*level_number)[opp] = (*level_number)[tmp] + 1; - } - - if (preds) { - (*preds)[opp] = tmp; - } - - qu.push_back (opp); - - unused_node_handler (G, opp, tmp); - - } else { - if (non_tree && !(*used)[curr]) { - (*used)[curr] = 1; - non_tree->push_back(curr); - } - - used_node_handler (G, opp, tmp); - } - } - - finished_node_handler (G, tmp); - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/biconnectivity.cpp b/src/Tracker/graph/GTL/src/biconnectivity.cpp deleted file mode 100644 index b503fc45c..000000000 --- a/src/Tracker/graph/GTL/src/biconnectivity.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// biconnectivity.cpp -// -//========================================================================== -// $Id: biconnectivity.cpp,v 1.20 2002/02/28 15:40:52 raitner Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -biconnectivity::biconnectivity() : dfs() -{ - add_edges = false; - store_preds(true); - store_comp = false; - scan_whole_graph(true); - num_of_components = 0; -} - -void biconnectivity::reset() -{ - dfs::reset(); - - if (store_comp) { - while (!node_stack.empty()) { - node_stack.pop(); - } - - while (!edge_stack.empty()) { - edge_stack.pop(); - } - - components.erase(components.begin(), components.end()); - } - - if (add_edges) { - additional.erase(additional.begin(), additional.end()); - } - - cut_points.erase(cut_points.begin(), cut_points.end()); - num_of_components = 0; -} - -int biconnectivity::check(GTL::graph& G) -{ - return G.is_undirected() && preds && - dfs::check(G) == GTL_OK ? GTL_OK : GTL_ERROR; -} - - -//-------------------------------------------------------------------------- -// Handler -//-------------------------------------------------------------------------- - - -void biconnectivity::init_handler(GTL::graph& G) -{ - if (add_edges) { - dfs D; - D.scan_whole_graph(true); - D.check(G); - D.run(G); - - roots_iterator it, end; - it = D.roots_begin(); - end = D.roots_end(); - start = *(*it); - ++it; - - for (; it != end; ++it) { - additional.push_back(G.new_edge(start, *(*it))); - } - - first_child.init(G, GTL::node()); - } - - low_num.init(G); - in_component.init(G); - cut_count.init(G, 0); - - // - // Detect self loops and hide them. - // - - assert(self_loops.empty()); - graph::edge_iterator eit = G.edges_begin(), - eend = G.edges_end(); - - while (eit != eend) { - edge e = *eit; - eit++; - if (e.target() == e.source()) { - self_loops.push_back(e); - G.hide_edge(e); - } - } -} - -void biconnectivity::entry_handler(GTL::graph& /*G*/, GTL::node& curr, GTL::node& father) -{ - if (add_edges) { - if (father != node()) { - if (first_child[father] == node()) { - first_child[father] = curr; - } - } - } - - low_num[curr] = dfs_number[curr]; -} - -void biconnectivity::new_start_handler(GTL::graph& /*G*/, GTL::node& st) -{ - cut_count[st] = -1; - - // - // If this node has no adjacent edges, we - // must write down the component right here. This is because - // then the method after_recursive_call_handle is never - // executed. - // - // 28/2/2002 MR - // - - if (st.degree() == 0) { - ++num_of_components; - - if (store_comp) { - component_iterator li = components.insert( - components.end(), - std::pair(nodes_t(), edges_t())); - - li->first.push_back(st); - in_component[st] = li; - } - } -} - -void biconnectivity::before_recursive_call_handler(GTL::graph& /*G*/, GTL::edge& /*e*/, GTL::node& n) -{ - if (store_comp) { - node_stack.push(n); - } -} - - -void biconnectivity::after_recursive_call_handler(GTL::graph& G, GTL::edge& e, GTL::node& n) -{ - node curr = n.opposite(e); - - if (low_num[n] < low_num[curr]) { - low_num[curr] = low_num[n]; - } - - if (low_num[n] >= dfs_num(curr)) { - // - // Component found - // - - if (store_comp) { - component_iterator li = components.insert( - components.end(), - std::pair(nodes_t(), edges_t())); - - nodes_t& component = li->first; - edges_t& co_edges = li->second; - - // - // Nodes of biconnected component - // - - node tmp = node_stack.top(); - - while (dfs_num(tmp) >= dfs_num(n)) { - node_stack.pop(); - component.push_back(tmp); - in_component[tmp] = li; - if (node_stack.empty()) break; - else tmp = node_stack.top(); - } - - component.push_back(curr); - - // - // edges of biconnected component - // - - edge ed = edge_stack.top(); - - while ((dfs_num(ed.source()) >= dfs_num(n) && - dfs_num(ed.target()) >= dfs_num(n)) || - (dfs_num(ed.source()) == dfs_num(curr) && - dfs_num(ed.target()) >= dfs_num(n)) || - (dfs_num(ed.source()) >= dfs_num(n) && - dfs_num(ed.target()) == dfs_num(curr))) { - edge_stack.pop(); - co_edges.push_back(ed); - if (edge_stack.empty()) break; - else ed = edge_stack.top(); - } - } - - - ++num_of_components; - - // - // curr is cut point; increase counter - // - - ++cut_count[curr]; - - if (add_edges) { - node father = (*preds)[curr]; - node first = first_child[curr]; - - if (father != node() && n == first) { - additional.push_back(G.new_edge(father, first)); - } - - if (n != first) { - additional.push_back(G.new_edge(n, first)); - } - } - - } -} - -void biconnectivity::old_adj_node_handler(GTL::graph& /*G*/, GTL::edge& e, GTL::node& n) -{ - node curr = n.opposite(e); - - // - // Store backedges at lower endpoint - // - - if (store_comp) { - if (dfs_num(curr) > dfs_num(n)) { - edge_stack.push(e); - } - } - - if (dfs_num(n) < low_num[curr]) { - low_num[curr] = dfs_number[n]; - } -} - -void biconnectivity::leave_handler(GTL::graph& /*G*/, GTL::node& n, GTL::node& /*f*/) -{ - if (cut_count[n] > 0) - { - cut_points.push_back(n); - } -} - -void biconnectivity::end_handler(GTL::graph& G) -{ - edges_t::iterator it = self_loops.begin(); - edges_t::iterator end = self_loops.end(); - - while (it != end) - { - G.restore_edge(*it); - if (store_comp) - { - component_iterator cit = in_component[it->target()]; - cit->second.push_back(*it); - } - - it = self_loops.erase(it); - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/bid_dijkstra.cpp b/src/Tracker/graph/GTL/src/bid_dijkstra.cpp deleted file mode 100644 index 99ffce591..000000000 --- a/src/Tracker/graph/GTL/src/bid_dijkstra.cpp +++ /dev/null @@ -1,498 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// bid_dijkstra.cpp -// -//========================================================================== -//$Id: bid_dijkstra.cpp,v 1.2 2004/05/06 11:58:19 chris Exp $ - -#include -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - * Binary predicate that compares two nodes according to their distance. - */ - class less_dist -{ -public: - /** - * @internal - * Constructor sets pointer to node distances and infimum info. - */ - less_dist(const node_map* dist, const node_map* mark) - { - this->dist = dist; - this->mark = mark; - } - - /** - * @internal - * Compares distances of @p n1 and @p n2. - */ - bool operator()(const node n1, const node n2) const - { - if (((*mark)[n1] == bid_dijkstra::black) && - ((*mark)[n2] == bid_dijkstra::black)) - { - return false; - } - else if ((*mark)[n1] == bid_dijkstra::black) - { - return false; - } - else if ((*mark)[n2] == bid_dijkstra::black) - { - return true; - } - return (*dist)[n1] < (*dist)[n2]; - } -private: - /** - * @internal - * Node distances from source. - */ - const node_map* dist; - - /** - * @internal - * Infimum distance info (color of nodes). - */ - const node_map* mark; -}; - -bid_dijkstra::bid_dijkstra() -{ - reset_algorithm(); -} - -bid_dijkstra::~bid_dijkstra() -{ -} - -void bid_dijkstra::source_target(const node& s, const node& t) -{ - this->s = s; - this->t = t; -} - -void bid_dijkstra::weights(const edge_map& weight) -{ - this->weight = weight; - weights_set = true; -} - -void bid_dijkstra::store_path(bool set) -{ - path_set = set; -} - -int bid_dijkstra::check(GTL::graph& G) -{ - if ((s == node()) || (t == node()) || (!weights_set)) - return GTL_ERROR; - - bool source_found = false; - bool target_found = false; - graph::node_iterator nodes_end = G.nodes_end(); - for (auto node_it = G.nodes_begin(); node_it != nodes_end; ++node_it) - { - if (*node_it == s) - { - source_found = true; - if (target_found) - break; - } - if (*node_it == t) - { - target_found = true; - if (source_found) - break; - } - } - if ((!source_found) || (!target_found)) - return(GTL_ERROR); - - graph::edge_iterator edges_end = G.edges_end(); - for (auto edge_it = G.edges_begin(); edge_it != edges_end; ++edge_it) - { - if (weight[*edge_it] < 0.0) - return false; - } - return GTL_OK; -} - -int bid_dijkstra::run(GTL::graph& G) -{ - init(G); - - double max_dist = 1; - for (auto edge_it = G.edges_begin(); edge_it != G.edges_end(); ++edge_it) - { - max_dist += weight[*edge_it]; - } - - less_dist source_prd(&source_dist, &source_mark); - less_dist target_prd(&target_dist, &target_mark); - bin_heap source_heap(source_prd, G.number_of_nodes()); - bin_heap target_heap(target_prd, G.number_of_nodes()); - - source_mark[s] = grey; - source_dist[s] = 0.0; - source_heap.push(s); - target_mark[t] = grey; - target_dist[t] = 0.0; - target_heap.push(t); - while ((!source_heap.is_empty()) || (!target_heap.is_empty())) - { - if (source_dist[source_heap.top()] <= - target_dist[target_heap.top()]) - { - - // debug: - // source_heap.print_data_container(); - - node cur_node = source_heap.top(); - source_heap.pop(); - - // debug: - // source_heap.print_data_container(); - - source_mark[cur_node] = white; - - if ((target_mark[cur_node] == white) && - (max_dist == source_dist[cur_node] + - target_dist[cur_node])) - { - fill_node_edge_lists(cur_node); - break; - } - - node::adj_edges_iterator adj_edges_end = - cur_node.adj_edges_end(); - for (auto adj_edge_it = cur_node.adj_edges_begin(); - adj_edge_it != adj_edges_end; - ++adj_edge_it) - { - node op_node = adj_edge_it->opposite(cur_node); - if (source_mark[op_node] == black) - { - source_mark[op_node] = grey; - source_dist[op_node] = source_dist[cur_node] + weight[*adj_edge_it]; - source_heap.push(op_node); - - // debug: - // source_heap.print_data_container(); - - if (path_set) - pred[op_node] = *adj_edge_it; - - if ((target_mark[op_node] == grey) || - (target_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + target_dist[op_node]) - max_dist = source_dist[op_node] + target_dist[op_node]; - } - } - else if (source_mark[op_node] == grey) - { - if (source_dist[op_node] > source_dist[cur_node] + weight[*adj_edge_it]) - { - source_dist[op_node] = source_dist[cur_node] + weight[*adj_edge_it]; - source_heap.changeKey(op_node); - - // debug: - // source_heap.print_data_container(); - - if (path_set) - pred[op_node] = *adj_edge_it; - - if ((target_mark[op_node] == grey) || - (target_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + target_dist[op_node]) - max_dist = source_dist[op_node] + target_dist[op_node]; - } - } - } - else // (source_mark[op_node] == white) - { - // nothing to do: shortest distance to op_node is already computed - } - } - } - else // (source_dist[source_heap.top()] > - // target_dist[target_heap.top()]) - { - // debug: - // target_heap.print_data_container(); - - node cur_node = target_heap.top(); - target_heap.pop(); - - // debug: - // target_heap.print_data_container(); - - target_mark[cur_node] = white; - - if ((source_mark[cur_node] == white) && - (max_dist == source_dist[cur_node] + - target_dist[cur_node])) - { - fill_node_edge_lists(cur_node); - break; - } - - if (G.is_directed()) - { - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - for (auto in_edge_it = cur_node.in_edges_begin(); - in_edge_it != in_edges_end; - ++in_edge_it) - { - node op_node = in_edge_it->opposite(cur_node); - if (target_mark[op_node] == black) - { - target_mark[op_node] = grey; - target_dist[op_node] = target_dist[cur_node] + weight[*in_edge_it]; - target_heap.push(op_node); - - // debug: - // target_heap.print_data_container(); - - if (path_set) - succ[op_node] = *in_edge_it; - - if ((source_mark[op_node] == grey) || - (source_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + target_dist[op_node]) - max_dist = source_dist[op_node] + target_dist[op_node]; - } - } - else if (target_mark[op_node] == grey) - { - if (target_dist[op_node] > target_dist[cur_node] + weight[*in_edge_it]) - { - target_dist[op_node] = target_dist[cur_node] + weight[*in_edge_it]; - target_heap.changeKey(op_node); - - // debug: - // target_heap.print_data_container(); - - if (path_set) - succ[op_node] = *in_edge_it; - - if ((source_mark[op_node] == grey) || - (source_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + target_dist[op_node]) - max_dist = source_dist[op_node] + target_dist[op_node]; - } - } - } - else // (target_mark[op_node] == white) - { - // nothing to do: shortest distance to op_node is already computed - } - } - } - else // (G.is_undirected()) - { - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - for (auto adj_edge_it = cur_node.adj_edges_begin(); - adj_edge_it != adj_edges_end; - ++adj_edge_it) - { - node op_node = adj_edge_it->opposite(cur_node); - if (target_mark[op_node] == black) - { - target_mark[op_node] = grey; - target_dist[op_node] = target_dist[cur_node] + - weight[*adj_edge_it]; - target_heap.push(op_node); - - // debug: - // target_heap.print_data_container(); - - if (path_set) - { - succ[op_node] = *adj_edge_it; - } - - if ((source_mark[op_node] == grey) || - (source_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + - target_dist[op_node]) - { - max_dist = source_dist[op_node] + - target_dist[op_node]; - } - } - } - else if (target_mark[op_node] == grey) - { - if (target_dist[op_node] > target_dist[cur_node] + - weight[*adj_edge_it]) - { - target_dist[op_node] = target_dist[cur_node] + - weight[*adj_edge_it]; - target_heap.changeKey(op_node); - - // debug: - // target_heap.print_data_container(); - - if (path_set) - { - succ[op_node] = *adj_edge_it; - } - - if ((source_mark[op_node] == grey) || - (source_mark[op_node] == white)) - { - if (max_dist > source_dist[op_node] + - target_dist[op_node]) - { - max_dist = source_dist[op_node] + - target_dist[op_node]; - } - } - } - } - else // (target_mark[op_node] == white) - { - // nothing to do: shortest distance to op_node is - // already computed - } - } - } - } - } - return GTL_OK; -} - -node bid_dijkstra::source() const -{ - return s; -} - -node bid_dijkstra::target() const -{ - return t; -} - -bool bid_dijkstra::store_path() const -{ - return path_set; -} - -bool bid_dijkstra::reached() const -{ - return reached_t; -} - -double bid_dijkstra::distance() const -{ - return dist; -} - -bid_dijkstra::shortest_path_node_iterator bid_dijkstra::shortest_path_nodes_begin() -{ - assert(path_set); - return shortest_path_node_list.begin(); -} - -bid_dijkstra::shortest_path_node_iterator bid_dijkstra::shortest_path_nodes_end() -{ - assert(path_set); - return shortest_path_node_list.end(); -} - -bid_dijkstra::shortest_path_edge_iterator bid_dijkstra::shortest_path_edges_begin() -{ - assert(path_set); - return shortest_path_edge_list.begin(); -} - -bid_dijkstra::shortest_path_edge_iterator bid_dijkstra::shortest_path_edges_end() -{ - assert(path_set); - return shortest_path_edge_list.end(); -} - -void bid_dijkstra::reset() -{ - reset_algorithm(); -} - -void bid_dijkstra::reset_algorithm() -{ - s = node(); - t = node(); - weights_set = false; - path_set = false; - dist = -1.0; - reached_t = false; -} - -void bid_dijkstra::init(GTL::graph& G) -{ - source_dist.init(G, -1.0); - source_mark.init(G, black); - target_dist.init(G, -1.0); - target_mark.init(G, black); - - if (path_set) - { - pred.init(G, GTL::edge()); - succ.init(G, GTL::edge()); - shortest_path_node_list.clear(); - shortest_path_edge_list.clear(); - } -} - -void bid_dijkstra::fill_node_edge_lists(const node& n) -{ - reached_t = true; - if (t == s) - return; - - dist = source_dist[n] + target_dist[n]; - if (path_set) - { - node cur_node; - edge cur_edge; - - cur_node = n; - cur_edge = pred[cur_node]; - while (cur_edge != edge()) - { - shortest_path_edge_list.push_front(cur_edge); - cur_node = cur_edge.opposite(cur_node); - cur_edge = pred[cur_node]; - shortest_path_node_list.push_front(cur_node); - } - shortest_path_node_list.push_back(n); - cur_node = n; - cur_edge = succ[cur_node]; - while (cur_edge != edge()) - { - shortest_path_edge_list.push_back(cur_edge); - cur_node = cur_edge.opposite(cur_node); - cur_edge = succ[cur_node]; - shortest_path_node_list.push_back(cur_node); - } - } -} -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/components.cpp b/src/Tracker/graph/GTL/src/components.cpp deleted file mode 100644 index 8577b63d5..000000000 --- a/src/Tracker/graph/GTL/src/components.cpp +++ /dev/null @@ -1,70 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// components.cpp -// -//========================================================================== -// $Id: components.cpp,v 1.5 2001/11/07 13:58:09 pick Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -components::components () : dfs () -{ - scan_whole_graph (true); - num_of_components = 0; -} - -void components::reset () -{ - dfs::reset (); - comp.erase (comp.begin(), comp.end()); - num_of_components = 0; -} - -int components::check (GTL::graph& G) -{ - return G.is_undirected() && whole_graph && - dfs::check (G) == GTL_OK ? GTL_OK : GTL_ERROR; -} - - -//-------------------------------------------------------------------------- -// Handler -//-------------------------------------------------------------------------- - - -void components::new_start_handler (GTL::graph& /*G*/, GTL::node& st) -{ - li = comp.insert(comp.end(), std::pair(nodes_t(), edges_t())); - li->first.push_back(st); - ++num_of_components; -} - -void components::before_recursive_call_handler (GTL::graph& /*G*/, GTL::edge& /*e*/, GTL::node& n) -{ - li->first.push_back(n); - // li->second.push_back(e); -} - - -void components::old_adj_node_handler (GTL::graph& /*G*/, GTL::edge& e, GTL::node& n) -{ - node curr = n.opposite (e); - - // - // Store backedges at lower endpoint - // - - if (dfs_num (curr) > dfs_num (n)) { - li->second.push_back (e); - } -} - - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/debug.cpp b/src/Tracker/graph/GTL/src/debug.cpp deleted file mode 100644 index 615da85cd..000000000 --- a/src/Tracker/graph/GTL/src/debug.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// debug.cpp -// -//========================================================================== -// $Id: debug.cpp,v 1.10 2001/11/07 13:58:09 pick Exp $ - -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -std::ostream* GTL_debug::GTLerr = 0; - -void GTL_debug::debug_message (const char* message, ...) -{ -#ifdef _DEBUG - va_list arg_list; - va_start(arg_list, message); - - char buf[1024]; - vsprintf(buf, message, arg_list); - if (GTLerr) { - os() << buf; - } -#endif -} - -void GTL_debug::init_debug () -{ - if (!GTLerr) { -#ifdef __GTL_MSVCC - GTLerr = new std::ofstream("ERRLOG.txt", std::ios::out | std::ios::app); -#else - GTLerr = &std::cerr; -#endif - } -} - -void GTL_debug::close_debug () -{ - if (GTLerr) { -#ifdef __GTL_MSVCC - ((std::ofstream*) GTLerr)->close(); - delete GTLerr; - GTLerr = 0; -#endif - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/dfs.cpp b/src/Tracker/graph/GTL/src/dfs.cpp deleted file mode 100644 index 17724e161..000000000 --- a/src/Tracker/graph/GTL/src/dfs.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// dfs.cpp -// -//========================================================================== -// $Id: dfs.cpp,v 1.18 2001/11/07 13:58:09 pick Exp $ - -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// Con-/Destructors -//-------------------------------------------------------------------------- - -dfs::dfs() : algorithm() -{ - act_dfs_num = 1; - act_comp_num = 1; - reached_nodes = 0; - whole_graph = false; - comp_number = 0; - preds = 0; - used = 0; - back_edges = 0; -} - -dfs::~dfs() -{ - if (comp_number) delete comp_number; - if (preds) delete preds; - if (back_edges) { - delete back_edges; - delete used; - } -} - -//-------------------------------------------------------------------------- -// GTL_Algorithm - Interface -//-------------------------------------------------------------------------- - - -void dfs::reset() -{ - act_dfs_num = 1; - act_comp_num = 1; - reached_nodes = 0; - tree.erase(tree.begin(), tree.end()); - dfs_order.erase(dfs_order.begin(), dfs_order.end()); - roots.erase(roots.begin(), roots.end()); - start = node(); - - if (back_edges) { - back_edges->erase(back_edges->begin(), back_edges->end()); - } -} - - -int dfs::check(GTL::graph& /*G*/) -{ - return GTL_OK; -} - -int dfs::run(GTL::graph& G) -{ - // - // initialization - // - - node curr; - node dummy; - - dfs_number.init(G, 0); - - if (comp_number) { - comp_number->init(G); - } - - if (preds) { - preds->init(G, GTL::node()); - } - - if (back_edges) { - used = new edge_map(G, 0); - } - - init_handler(G); - - // - // Set start-node - // - - if (G.number_of_nodes() == 0) { - return GTL_OK; - } - - if (start == node()) { - start = G.choose_node(); - } - - new_start_handler(G, start); - - dfs_sub(G, start, dummy); - - if (whole_graph && reached_nodes < G.number_of_nodes()) { - - // - // Continue DFS with next unused node. - // - - forall_nodes(curr, G) { - if (dfs_number[curr] == 0) { - new_start_handler(G, curr); - dfs_sub(G, curr, dummy); - } - } - } - - if (back_edges) { - delete used; - used = 0; - } - - end_handler(G); - - return GTL_OK; -} - -//-------------------------------------------------------------------------- -// PRIVATE -//-------------------------------------------------------------------------- - - -void dfs::dfs_sub(GTL::graph& G, GTL::node& curr, GTL::node& father) -{ - node opp; - edge adj; - - if (father == node()) { - roots.push_back(dfs_order.insert(dfs_order.end(), curr)); - } - else { - dfs_order.push_back(curr); - } - - dfs_number[curr] = act_dfs_num; - reached_nodes++; - - if (preds) { - (*preds)[curr] = father; - } - - entry_handler(G, curr, father); - - ++act_dfs_num; - node::adj_edges_iterator it = curr.adj_edges_begin(); - node::adj_edges_iterator end = curr.adj_edges_end(); - - while (it != end) { - adj = *it; - opp = curr.opposite(adj); - - if (dfs_number[opp] == 0) { - tree.push_back(adj); - - if (back_edges) { - (*used)[adj] = 1; - } - - before_recursive_call_handler(G, adj, opp); - dfs_sub(G, opp, curr); - after_recursive_call_handler(G, adj, opp); - - } - else { - if (back_edges && !(*used)[adj]) { - (*used)[adj] = 1; - back_edges->push_back(adj); - } - - old_adj_node_handler(G, adj, opp); - } - - ++it; - } - - leave_handler(G, curr, father); - - if (comp_number) { - (*comp_number)[curr] = act_comp_num; - ++act_comp_num; - } -} - -//-------------------------------------------------------------------------- -// Parameters -//-------------------------------------------------------------------------- - -void dfs::calc_comp_num(bool set) -{ - if (set && !comp_number) { - comp_number = new node_map; - } - else if (!set && comp_number) { - delete comp_number; - comp_number = 0; - } -} - -void dfs::store_preds(bool set) -{ - if (set && !preds) { - preds = new node_map; - } - else if (!set && preds) { - delete preds; - preds = 0; - } -} - -void dfs::store_non_tree_edges(bool set) -{ - if (set && !back_edges) - { - back_edges = new edges_t; - } - else if (!set && back_edges) - { - delete back_edges; - back_edges = 0; - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/dijkstra.cpp b/src/Tracker/graph/GTL/src/dijkstra.cpp deleted file mode 100644 index 5ea49c82a..000000000 --- a/src/Tracker/graph/GTL/src/dijkstra.cpp +++ /dev/null @@ -1,343 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// dijkstra.cpp -// -//========================================================================== -//$Id: dijkstra.cpp,v 1.6 2002/12/23 13:46:41 chris Exp $ - -#include -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/** - * @internal - * Binary predicate that compares two nodes according to their distance. - */ - class less_dist -{ -public: - /** - * @internal - * Constructor sets pointer to node distances and infimum info. - */ - less_dist(const node_map* dist, const node_map* mark) - { - this->dist = dist; - this->mark = mark; - } - - /** - * @internal - * Compares distances of @p n1 and @p n2. - */ - bool operator()(const node n1, const node n2) const - { - if (((*mark)[n1] == dijkstra::black) && - ((*mark)[n2] == dijkstra::black)) - { - return false; - } - else if ((*mark)[n1] == dijkstra::black) - { - return false; - } - else if ((*mark)[n2] == dijkstra::black) - { - return true; - } - return (*dist)[n1] < (*dist)[n2]; - } -private: - /** - * @internal - * Node distances from source. - */ - const node_map* dist; - - /** - * @internal - * Infimum distance info (color of nodes). - */ - const node_map* mark; -}; - -dijkstra::dijkstra() -{ - reset_algorithm(); -} - -dijkstra::~dijkstra() -{ -} - -void dijkstra::source(const node& n) -{ - s = n; -} - -void dijkstra::target(const node& n) -{ - t = n; -} - -void dijkstra::weights(const edge_map& weight) -{ - this->weight = weight; - weights_set = true; -} - -void dijkstra::store_preds(bool set) -{ - preds_set = set; -} - -int dijkstra::check(GTL::graph& G) -{ - if ((s == node()) || (!weights_set)) - return GTL_ERROR; - - bool source_found = false; - graph::node_iterator nodes_end = G.nodes_end(); - for (auto node_it = G.nodes_begin(); node_it != nodes_end; ++node_it) - { - if (*node_it == s) - { - source_found = true; - break; - } - } - if (!source_found) - return(GTL_ERROR); - - graph::edge_iterator edges_end = G.edges_end(); - for (auto edge_it = G.edges_begin(); edge_it != edges_end; ++edge_it) - { - if (weight[*edge_it] < 0.0) - return false; - } - - return GTL_OK; -} - -int dijkstra::run(GTL::graph& G) -{ - init(G); - - less_dist prd(&dist, &mark); - bin_heap node_heap(prd, G.number_of_nodes()); - mark[s] = grey; - dist[s] = 0.0; - node_heap.push(s); - while (!node_heap.is_empty()) - { - // debug: - // node_heap.print_data_container(); - - node cur_node = node_heap.top(); - node_heap.pop(); - - // debug: - // node_heap.print_data_container(); - - mark[cur_node] = white; - if (cur_node == t) - { - // if @a t is set through #target we are ready - return GTL_OK; - } - - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - for (auto adj_edge_it = cur_node.adj_edges_begin(); - adj_edge_it != adj_edges_end; - ++adj_edge_it) - { - node op_node = adj_edge_it->opposite(cur_node); - if (mark[op_node] == black) - { - mark[op_node] = grey; - dist[op_node] = dist[cur_node] + weight[*adj_edge_it]; - node_heap.push(op_node); - - // debug: - // node_heap.print_data_container(); - if (preds_set) - pred[op_node] = *adj_edge_it; - } - else if (mark[op_node] == grey) - { - if (dist[op_node] > dist[cur_node] + weight[*adj_edge_it]) - { - dist[op_node] = dist[cur_node] + weight[*adj_edge_it]; - node_heap.changeKey(op_node); - - // debug: - // node_heap.print_data_container(); - if (preds_set) - pred[op_node] = *adj_edge_it; - } - } - else // (mark[op_node] == white) - { - // nothing to do: shortest distance to op_node is already - // computed - } - } - } - return GTL_OK; -} - -node dijkstra::source() const -{ - return s; -} - -node dijkstra::target() const -{ - return t; -} - -bool dijkstra::store_preds() const -{ - return preds_set; -} - -bool dijkstra::reached(const node& n) const -{ - return mark[n] != black; -} - -double dijkstra::distance(const node& n) const -{ - return dist[n]; -} - -node dijkstra::predecessor_node(const node& n) const -{ - assert(preds_set); - if ((n == s) || (!reached(n))) - return node(); - return pred[n].opposite(n); -} - -edge dijkstra::predecessor_edge(const node& n) const -{ - assert(preds_set); - return pred[n]; -} - -dijkstra::shortest_path_node_iterator dijkstra::shortest_path_nodes_begin(const node& dest) -{ - assert(preds_set); - if ((shortest_path_node_list[dest].empty()) && - (dest != s) && - (reached(dest))) - { - fill_node_list(dest); - } - return shortest_path_node_list[dest].begin(); -} - -dijkstra::shortest_path_node_iterator dijkstra::shortest_path_nodes_end(const node& dest) -{ - assert(preds_set); - if ((shortest_path_node_list[dest].empty()) && - (dest != s) && - (reached(dest))) - { - fill_node_list(dest); - } - return shortest_path_node_list[dest].end(); -} - -dijkstra::shortest_path_edge_iterator dijkstra::shortest_path_edges_begin(const node& dest) -{ - assert(preds_set); - if ((shortest_path_edge_list[dest].empty()) && - (dest != s) && - (reached(dest))) - { - fill_edge_list(dest); - } - return shortest_path_edge_list[dest].begin(); -} - -dijkstra::shortest_path_edge_iterator dijkstra::shortest_path_edges_end(const node& dest) -{ - assert(preds_set); - if ((shortest_path_edge_list[dest].empty()) && - (dest != s) && - (reached(dest))) - { - fill_edge_list(dest); - } - return shortest_path_edge_list[dest].end(); -} - -void dijkstra::reset() -{ - reset_algorithm(); -} - -void dijkstra::reset_algorithm() -{ - s = node(); - t = node(); - weights_set = false; - preds_set = false; -} - -void dijkstra::init(GTL::graph& G) -{ - dist.init(G, -1.0); - mark.init(G, black); - - if (preds_set) - { - pred.init(G, GTL::edge()); - graph::node_iterator nodes_end = G.nodes_end(); - for (auto node_it = G.nodes_begin(); node_it != nodes_end; ++node_it) - { - shortest_path_node_list[(*node_it)].clear(); - shortest_path_edge_list[(*node_it)].clear(); - } - } -} - -void dijkstra::fill_node_list(const node& dest) -{ - if ((dest == s) || (!reached(dest))) - return; - - GTL::node cur_node = dest; - while (cur_node != node()) - { - shortest_path_node_list[dest].push_front(cur_node); - cur_node = predecessor_node(cur_node); - } -} - -void dijkstra::fill_edge_list(const node& dest) -{ - if ((dest == s) || (!reached(dest))) - return; - - GTL::node cur_node = dest; - GTL::edge cur_edge = predecessor_edge(dest); - while (cur_edge != edge()) - { - shortest_path_edge_list[dest].push_front(cur_edge); - cur_node = predecessor_node(cur_node); - cur_edge = predecessor_edge(cur_node); - } -} -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/edge.cpp b/src/Tracker/graph/GTL/src/edge.cpp deleted file mode 100644 index 20e2a484d..000000000 --- a/src/Tracker/graph/GTL/src/edge.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// edge.cpp -// -//========================================================================== -// $Id: edge.cpp,v 1.17 2001/11/07 13:58:09 pick Exp $ - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// edge -//-------------------------------------------------------------------------- - -edge::edge() : - data(0) -{ -} - -GTL_EXTERN std::ostream& operator<< (std::ostream& os, const edge& e) { - if (e != edge ()) { - return os << e.source() << "-->" << e.target(); - } else { - return os << "UNDEF"; - } -} - -node edge::source() const -{ - return data->nodes[0].front(); -} - -node edge::target() const -{ - return data->nodes[1].front(); -} - -const node& edge::target_() const -{ - return data->nodes[1].front(); -} - -void edge::change_source (GTL::node new_source) -{ - // - // First delete this edge from source's adjacency list - // and clear the list of sources - // - - nodes_t::iterator the_nodes = data->nodes[0].begin(); - nodes_t::iterator the_nodes_end = data->nodes[0].end(); - - while(the_nodes != the_nodes_end) - { - the_nodes->data->edges[1].erase (data->adj_pos[0].front()); - data->adj_pos[0].pop_front(); - - the_nodes = data->nodes[0].erase (the_nodes); - } - - // - // Just to be sure :) - // - - assert (data->nodes[0].empty()); - assert (data->adj_pos[0].empty()); - - // - // insert this edge in the list of outgoing edges of new_source - // - - data->adj_pos[0].push_back(new_source.data->edges[1].insert ( - new_source.data->edges[1].end(), *this)); - - // - // make new_source a source of this node. - // - - data->nodes[0].push_back (new_source); -} - - -void edge::change_target (GTL::node new_target) { - // - // First delete this edge from target's adjacency list - // and clear the list of targets - // - - nodes_t::iterator the_nodes = data->nodes[1].begin(); - nodes_t::iterator the_nodes_end = data->nodes[1].end(); - - while(the_nodes != the_nodes_end) - { - the_nodes->data->edges[0].erase (data->adj_pos[1].front()); - data->adj_pos[1].pop_front(); - - the_nodes = data->nodes[1].erase (the_nodes); - } - - // - // Just to be sure :) - // - - assert (data->nodes[1].empty()); - assert (data->adj_pos[1].empty()); - - // - // insert this edge in the list of incoming edges of new_target - // - - data->adj_pos[1].push_back(new_target.data->edges[0].insert ( - new_target.data->edges[0].end(), *this)); - - // - // make new_target a target of this node. - // - - data->nodes[1].push_back (new_target); -} - - -void edge::reverse () -{ - // - // First delete this edge from all adjacency lists - // - - nodes_t::iterator the_nodes = data->nodes[0].begin(); - nodes_t::iterator the_nodes_end = data->nodes[0].end(); - - while(the_nodes != the_nodes_end) - { - the_nodes->data->edges[1].erase (data->adj_pos[0].front()); - data->adj_pos[0].pop_front(); - - ++the_nodes; - } - - the_nodes = data->nodes[1].begin(); - the_nodes_end = data->nodes[1].end(); - - while(the_nodes != the_nodes_end) - { - the_nodes->data->edges[0].erase (data->adj_pos[1].front()); - data->adj_pos[1].pop_front(); - - ++the_nodes; - } - - // - // Now the lists of positions in the adjacency - lists should be empty - // - - assert (data->adj_pos[0].empty()); - assert (data->adj_pos[1].empty()); - - // - // Now insert this edge reversed - // - - the_nodes = data->nodes[1].begin(); - the_nodes_end = data->nodes[1].end(); - - while(the_nodes != the_nodes_end) - { - data->adj_pos[0].push_back(the_nodes->data->edges[1].insert ( - the_nodes->data->edges[1].end(), *this)); - - ++the_nodes; - } - - the_nodes = data->nodes[0].begin(); - the_nodes_end = data->nodes[0].end(); - - while(the_nodes != the_nodes_end) - { - data->adj_pos[1].push_back(the_nodes->data->edges[0].insert ( - the_nodes->data->edges[0].end(), *this)); - - ++the_nodes; - } - - // - // swap nodes[0] and nodes[1] - // - - nodes_t tmp = data->nodes[0]; - data->nodes[0] = data->nodes[1]; - data->nodes[1] = tmp; -} - - - -nodes_t edge::sources() const -{ - return data->nodes[0]; -} - -nodes_t edge::targets() const -{ - return data->nodes[1]; -} - -int edge::id() const -{ - return data->id; -} - -bool edge::is_hidden () const -{ - return data->hidden; -} - -void edge::remove_from(int where) const -{ - nodes_t::iterator the_nodes = data->nodes[where].begin(); - nodes_t::iterator the_nodes_end = data->nodes[where].end(); - - std::list::iterator the_adj_pos = data->adj_pos[where].begin(); - - while (the_nodes != the_nodes_end) - { - the_nodes->data->edges[1 - where].erase(*the_adj_pos); - - ++the_nodes; - ++the_adj_pos; - } -} - -const node& edge::opposite(GTL::node n) const -{ - // not implemented for hypergraphs - assert(data); - - node& s = *(data->nodes[0].begin()); - if (n == s) - return *(data->nodes[1].begin()); - else - return s; -} - -GTL_EXTERN bool operator==(GTL::edge e1, GTL::edge e2) -{ - return e1.data == e2.data; -} - -GTL_EXTERN bool operator!=(GTL::edge e1, GTL::edge e2) -{ - return e1.data != e2.data; -} - -GTL_EXTERN bool operator<(GTL::edge e1, GTL::edge e2) -{ - return e1.data < e2.data; -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/embedding.cpp b/src/Tracker/graph/GTL/src/embedding.cpp deleted file mode 100644 index bbcba968e..000000000 --- a/src/Tracker/graph/GTL/src/embedding.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// embedding.cpp -// -//========================================================================== -// $Id: embedding.cpp,v 1.18 2002/10/04 08:07:36 chris Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -planar_embedding::planar_embedding (const planar_embedding& em) -{ - init (*(em.G)); - - node n; - forall_nodes (n, *G) { - adj_list::const_iterator it = em.adj[n].begin(); - adj_list::const_iterator end = em.adj[n].end(); - - for (; it != end; ++it) { - pos (n, *it) = push_back (n, *it); - } - } - - self.insert (self.begin(), em.self.begin(), em.self.end()); - multi.insert (multi.begin(), em.multi.begin(), em.multi.begin()); -} - - -planar_embedding& -planar_embedding::operator= (const planar_embedding& em) -{ - node n; - if (G != 0) { - forall_nodes (n, *G) { - adj[n].erase (adj[n].begin(), adj[n].end()); - } - } - - self.erase (self.begin(), self.end()); - multi.erase (multi.begin(), multi.end()); - - init (*(em.G)); - - forall_nodes (n, *G) { - adj_list::const_iterator it = em.adjacency(n).begin(); - adj_list::const_iterator end = em.adjacency(n).end(); - - for (; it != end; ++it) { - pos (n, *it) = push_back (n, *it); - } - } - - self.insert (self.begin(), em.self.begin(), em.self.end()); - multi.insert (multi.begin(), em.multi.begin(), em.multi.begin()); - - return *this; -} - - -void -planar_embedding::init (GTL::graph& my_G) -{ - adj.init (my_G); - - // - // There is a problem with node/edge maps of iterators with Visual C++ - // which I dont fully understand at the moment. Anyway the init for the - // maps below is only needed to allocate memory, which is done anyway, when - // values are assigned to it. - // - -#ifndef __GTL_MSVCC - s_pos.init (my_G); - t_pos.init (my_G); -#endif - G = &my_G; -} - - -symlist::iterator -planar_embedding::push_back (GTL::node n, GTL::edge e) -{ - return adj[n].insert (adj[n].end(), e); -} - - -symlist::iterator -planar_embedding::push_front (GTL::node n, GTL::edge e) -{ - return adj[n].insert (adj[n].begin(), e); -} - - -symlist::iterator& -planar_embedding::pos (GTL::node n, GTL::edge e) -{ - if (e.source() == n) { - return s_pos[e]; - } else if (e.target() == n) { - return t_pos[e]; - } else { - assert (false); - // this should not happen. - return s_pos[e]; - } -} - - -void -planar_embedding::insert_selfloop (GTL::edge e) -{ - node n = e.source(); - s_pos[e] = t_pos[e] = adj[n].insert (adj[n].begin(), e); -} - - -void -planar_embedding::turn (GTL::node n) -{ - adj[n].reverse(); -} - - -edge -planar_embedding::cyclic_next (GTL::node n, GTL::edge e) -{ - iterator it = pos (n, e); - ++it; - - if (it == adj[n].end()) { - ++it; - } - - return *it; -} - - -edge -planar_embedding::cyclic_prev (GTL::node n, GTL::edge e) -{ - iterator it = pos (n, e); - --it; - - if (it == adj[n].end()) { - --it; - } - - return *it; -} - -bool -planar_embedding::check () -{ - node n; - forall_nodes (n ,*G) { - iterator it, end; - - for (it = adj[n].begin(), end = adj[n].end(); it != end; ++it) { - edge curr = *it; - node other = n.opposite (curr); - - edge prev = cyclic_prev (n, curr); - edge next = cyclic_next (n, prev); - assert (next == curr); - - while (other != n) { - curr = cyclic_next (other, curr); - other = other.opposite (curr); - } - if (curr != prev) { - return false; - } - - } - } - - return true; -} - - -void -planar_embedding::write_st(std::ostream& os, st_number& st) -{ - st_number::iterator n_it = st.begin(); - st_number::iterator n_end = st.end(); - iterator it, end; - - for (; n_it != n_end; ++n_it) { - node n = *n_it; - os << "[" << st[n] << "]::"; - - it = adj[n].begin(); - end = adj[n].end(); - - for (; it != end; ++it) { - os << "[" << st[n.opposite (*it)] << "]"; - } - - os << std::endl; - } - - os << "SELFLOOPS:" << std::endl; - edges_t::iterator e_it, e_end; - for (e_it = self.begin(), e_end = self.end(); e_it != e_end; ++e_it) - { - os << st[e_it->source()] << "---" << st[e_it->target()] << std::endl; - } - - os << "MULTIPLE EDGES:" << std::endl; - for (e_it = multi.begin(), e_end = multi.end(); e_it != e_end; ++e_it) - { - os << st[e_it->source()] << "---" << st[e_it->target()] << std::endl; - } -} - -GTL_EXTERN std::ostream& operator<< (std::ostream& os, planar_embedding& em) -{ - graph::node_iterator n_it = em.G->nodes_begin(); - graph::node_iterator n_end = em.G->nodes_end(); - symlist::iterator it, end; - - for (; n_it != n_end; ++n_it) { - node n = *n_it; - os << n << ":: "; - - it = em.adj[n].begin(); - end = em.adj[n].end(); - - for (; it != end; ++it) { - os << n.opposite (*it) << "*"; - } - - os << std::endl; - } - - os << "SELFLOOPS:" << std::endl; - edges_t::iterator e_it, e_end; - for (e_it = em.self.begin(), e_end = em.self.end(); e_it != e_end; ++e_it) - { - os << *e_it << std::endl; - } - - os << "MULTIPLE EDGES:" << std::endl; - for (e_it = em.multi.begin(), e_end = em.multi.end(); e_it != e_end; ++e_it) - { - os << *e_it << std::endl; - } - - return os; -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/fm_partition.cpp b/src/Tracker/graph/GTL/src/fm_partition.cpp deleted file mode 100644 index f432a301e..000000000 --- a/src/Tracker/graph/GTL/src/fm_partition.cpp +++ /dev/null @@ -1,1037 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// fm_partition.cpp -// -//========================================================================== -// $Id: fm_partition.cpp,v 1.8 2001/11/07 13:58:10 pick Exp $ - -#include -#include - -#include -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -const fm_partition::side_type fm_partition::A = 0; -const fm_partition::side_type fm_partition::B = 1; - -const fm_partition::fix_type fm_partition::FIXA = 0; -const fm_partition::fix_type fm_partition::FIXB = 1; -const fm_partition::fix_type fm_partition::UNFIXED = 2; - -fm_partition::fm_partition() -{ - set_vars_executed = false; - enable_cut_edges_storing = false; - enable_nodesAB_storing = false; -} - -fm_partition::~fm_partition() -{ -} - -void fm_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - set_vars_executed = true; - provided_initial_part = false; - this->fixed.init(G, UNFIXED); - provided_fix = false; - side.init(G); -} - -void fm_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight, - const node_map& init_side) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->side = init_side; - set_vars_executed = true; - provided_initial_part = true; - this->fixed.init(G, UNFIXED); - provided_fix = false; -} - -void fm_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight, - const node_map& fixed) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - set_vars_executed = true; - provided_initial_part = false; - this->fixed = fixed; - provided_fix = true; - side.init(G); -} - -void fm_partition::set_vars(const graph& /*G*/, - const node_map& node_weight, const edge_map& edge_weight, - const node_map& init_side, - const node_map& fixed) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->side = init_side; - set_vars_executed = true; - provided_initial_part = true; - this->fixed = fixed; - provided_fix = true; -} - -void fm_partition::store_cut_edges(const bool set) -{ - enable_cut_edges_storing = set; -} - -void fm_partition::store_nodesAB(const bool set) -{ - enable_nodesAB_storing = set; -} - -int fm_partition::check(GTL::graph& G) -{ - if ((!set_vars_executed) || (!G.is_undirected())) - return GTL_ERROR; - - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_weight[*edge_it] < 0) - return GTL_ERROR; - ++edge_it; - } - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (node_weight[*node_it] < 0) - return GTL_ERROR; - - ++node_it; - } - return GTL_OK; -} - -int fm_partition::run(GTL::graph& G) -{ - init_variables(G); - if ((provided_initial_part) && (provided_fix)) - divide_up(G); - - if (!provided_initial_part) - create_initial_bipart(G); - - hide_self_loops(G); - compute_max_vertex_degree(G); - - pass_manager(G); - - if (enable_cut_edges_storing) - compute_cut_edges(G); - - if (enable_nodesAB_storing) - compute_nodesAB(G); - - G.restore_graph(); - - return GTL_OK; -} - -int fm_partition::get_cutsize() -{ - return cur_cutsize; -} - -int fm_partition::get_needed_passes() -{ - return no_passes; -} - -fm_partition::side_type fm_partition::get_side_of_node(const node& n) const -{ - return side[n]; -} - -fm_partition::side_type fm_partition::operator [](const node& n) const -{ - return side[n]; -} - -int fm_partition::get_weight_on_sideA(const graph& G) const -{ - int nwA = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - nwA += node_weight[*node_it]; - - ++node_it; - } - return nwA; -} - -int fm_partition::get_weight_on_sideB(const graph& G) const -{ - int nwB = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == B) - nwB += node_weight[*node_it]; - - ++node_it; - } - return nwB; -} - -fm_partition::cut_edges_iterator fm_partition::cut_edges_begin() const -{ - return cut_edges.begin(); -} - -fm_partition::cut_edges_iterator fm_partition::cut_edges_end() const -{ - return cut_edges.end(); -} - -fm_partition::nodes_of_one_side_iterator fm_partition::nodes_of_sideA_begin() const -{ - return nodesA.begin(); -} - -fm_partition::nodes_of_one_side_iterator fm_partition::nodes_of_sideA_end() const -{ - return nodesA.end(); -} - -fm_partition::nodes_of_one_side_iterator fm_partition::nodes_of_sideB_begin() const -{ - return nodesB.begin(); -} - -fm_partition::nodes_of_one_side_iterator fm_partition::nodes_of_sideB_end() const -{ - return nodesB.end(); -} - -void fm_partition::reset() -{ - set_vars_executed = false; - cut_edges.clear(); - nodesA.clear(); - nodesB.clear(); -} - -void fm_partition::divide_up(const graph& G) -{ - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (fixed[*node_it] == FIXA) - side[*node_it] = A; - - else if (fixed[*node_it] == FIXB) - side[*node_it] = B; - - ++node_it; - } -} - -void fm_partition::hide_self_loops(GTL::graph& G) -{ - graph::edge_iterator temp_it; - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_it->source() == edge_it->target()) - { - temp_it = edge_it; - ++edge_it; - G.hide_edge(*temp_it); - } - else - { - ++edge_it; - } - } -} - -void fm_partition::init_variables(const graph& G) -{ - bool first_edge_found = true; - bool first_node_found = true; - max_edge_weight = 0; - max_node_weight = 0; - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (first_edge_found) - { - max_edge_weight = edge_weight[*edge_it]; - first_edge_found = false; - } - else if (edge_weight[*edge_it] > max_edge_weight) - { - max_edge_weight = edge_weight[*edge_it]; - } - ++edge_it; - } - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - total_node_weight = 0; - while (node_it != nodes_end) - { - total_node_weight += node_weight[*node_it]; - if (first_node_found) - { - max_node_weight = node_weight[*node_it]; - first_node_found = false; - } - else if (node_weight[*node_it] > max_node_weight) - { - max_node_weight = node_weight[*node_it]; - } - ++node_it; - } -} - -void fm_partition::create_initial_bipart(const graph& G) -{ - int i = 0; // counter - int no_nodes = G.number_of_nodes(); - node_weight_on_sideA = 0; - node_weight_on_sideB = 0; - - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - std::vector node_vector(G.number_of_nodes()); - while (node_it != nodes_end) - { - node_vector[i] = node_it; - if (fixed[*node_it] == FIXA) - { - side[*node_it] = A; - node_weight_on_sideA += node_weight[*node_it]; - } - else if (fixed[*node_it] == FIXB) - { - side[*node_it] = B; - node_weight_on_sideB += node_weight[*node_it]; - } - else // fixed[*node_it] == UNFIXED - { - node_weight_on_sideB += node_weight[*node_it]; - side[*node_it] = B; - } - ++i; - ++node_it; - } - shuffle_vector(no_nodes, node_vector); - - // compute best balance - int best_bal = node_weight_on_sideA * node_weight_on_sideB; - int best_pos = -1; - for (i = 0; i < no_nodes; i++) - { - if (fixed[*node_vector[i]] == UNFIXED) - { - node_weight_on_sideA += node_weight[*node_vector[i]]; - node_weight_on_sideB -= node_weight[*node_vector[i]]; - if (node_weight_on_sideA * node_weight_on_sideB > best_bal) - { - best_bal = node_weight_on_sideA * node_weight_on_sideB; - best_pos = i; - } - } - } - - // create partition with best balance - for (i = 0; i <= best_pos; i++) - { - if (fixed[*node_vector[i]] == UNFIXED) - side[*node_vector[i]] = A; - } -} - -void fm_partition::shuffle_vector(const int vector_size, std::vector& node_vector) -{ - srand((unsigned)time(NULL)); - rand(); // necessary, otherwise the next rand() returns always 0 ?-) - for (int i = 1; i <= vector_size; i++) - { - int pos_1 = (int)floor((((double)rand() / (double)RAND_MAX) * - (double)(vector_size - 1)) + 0.5); - int pos_2 = (int)floor((((double)rand() / (double)RAND_MAX) * - (double)(vector_size - 1)) + 0.5); - graph::node_iterator temp_it; - temp_it = node_vector[pos_1]; - node_vector[pos_1] = node_vector[pos_2]; - node_vector[pos_2] = temp_it; - } -} - -void fm_partition::compute_max_vertex_degree(const graph& G) -{ - max_vertex_degree = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (max_vertex_degree < node_it->degree()) - max_vertex_degree = node_it->degree(); - - ++node_it; - } -} - -void fm_partition::pass_manager(const graph& G) -{ - // final pass which doesn't improve cur_cutsize is not counted - no_passes = -1; - int best_cutsize = -1; // = -1 to avoid warning - node_map best_side(G); - bool improved_cutsize; - - do - { - init_data_structure(G); - if (no_passes == -1) - { - best_cutsize = cur_cutsize; - copy_side_node_map(G, best_side, side); - } - move_manager(G); - clean_pass(G); - improved_cutsize = false; - if (best_cutsize > cur_cutsize) - { - best_cutsize = cur_cutsize; - copy_side_node_map(G, best_side, side); - improved_cutsize = true; - } - ++no_passes; - } while (improved_cutsize); - cur_cutsize = best_cutsize; - copy_side_node_map(G, side, best_side); -} - -void fm_partition::copy_side_node_map(const graph& G, node_map& dest, const node_map source) const -{ - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - dest[*node_it] = source[*node_it]; - ++node_it; - } -} - -void fm_partition::init_data_structure(const graph& G) -{ - aside.init(G); - bside.init(G); - unlockedA.init(G); - unlockedB.init(G); - cur_cutsize = 0; - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if ((side[edge_it->source()] == A) && - (side[edge_it->target()] == A)) - { - aside[*edge_it] = 2; - bside[*edge_it] = 0; - unlockedA[*edge_it].push_back(edge_it->source()); - unlockedA[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == B) && - (side[edge_it->target()] == B)) - { - aside[*edge_it] = 0; - bside[*edge_it] = 2; - unlockedB[*edge_it].push_back(edge_it->source()); - unlockedB[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == A) && - (side[edge_it->target()] == B)) - { - aside[*edge_it] = 1; - bside[*edge_it] = 1; - cur_cutsize += edge_weight[*edge_it]; - unlockedA[*edge_it].push_back(edge_it->source()); - unlockedB[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == B) && - (side[edge_it->target()] == A)) - { - aside[*edge_it] = 1; - bside[*edge_it] = 1; - cur_cutsize += edge_weight[*edge_it]; - unlockedA[*edge_it].push_back(edge_it->target()); - unlockedB[*edge_it].push_back(edge_it->source()); - } - ++edge_it; - } - - bucketA.resize(2 * max_vertex_degree * max_edge_weight + 1); - bucketB.resize(2 * max_vertex_degree * max_edge_weight + 1); - - init_filling_buckets(G); -} - -void fm_partition::init_filling_buckets(const graph &G) -{ - node_weight_on_sideA = 0; - node_weight_on_sideB = 0; - bucketA_empty = true; - bucketB_empty = true; - bool first_A_node = true; - bool first_B_node = true; - int index; - // position_in_bucket.init(G); - gain_value.init(G); - - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - { - node_weight_on_sideA += node_weight[*node_it]; - gain_value[*node_it] = inital_gain_of_node_on_sideA(*node_it); - if (fixed[*node_it] == UNFIXED) - { - if (first_A_node) - { - bucketA_empty = false; - max_gainA = gain_value[*node_it]; - first_A_node = false; - } - else - { - if (max_gainA < gain_value[*node_it]) - max_gainA = gain_value[*node_it]; - } - index = range_up(gain_value[*node_it]); - position_in_bucket[*node_it] = bucketA[index].insert( - bucketA[index].end(), *node_it); - } - } - else // side[*node_it] == B - { - node_weight_on_sideB += node_weight[*node_it]; - gain_value[*node_it] = inital_gain_of_node_on_sideB(*node_it); - if (fixed[*node_it] == UNFIXED) - { - if (first_B_node) - { - bucketB_empty = false; - max_gainB = gain_value[*node_it]; - first_B_node = false; - } - else - { - if (max_gainB < gain_value[*node_it]) - max_gainB = gain_value[*node_it]; - } - index = range_up(gain_value[*node_it]); - position_in_bucket[*node_it] = bucketB[index].insert( - bucketB[index].end(), *node_it); - } - } - ++node_it; - } -} - -int fm_partition::inital_gain_of_node_on_sideA(const node cur_node) -{ - int node_gain = 0; - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - if (aside[*adj_edge_it] == 1) - node_gain += edge_weight[*adj_edge_it]; - - if (bside[*adj_edge_it] == 0) - node_gain -= edge_weight[*adj_edge_it]; - - ++adj_edge_it; - } - return node_gain; -} - -int fm_partition::inital_gain_of_node_on_sideB(const node cur_node) -{ - int node_gain = 0; - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - if (bside[*adj_edge_it] == 1) - node_gain += edge_weight[*adj_edge_it]; - - if (aside[*adj_edge_it] == 0) - node_gain -= edge_weight[*adj_edge_it]; - - ++adj_edge_it; - } - return node_gain; -} - -void fm_partition::move_manager(const graph& G) -{ - int step_number = 0; - int best_tentative_move = 0; - int best_bal = node_weight_on_sideA * node_weight_on_sideB; - std::vector tentative_moves(G.number_of_nodes() + 1); - std::vector tentative_cutsize(G.number_of_nodes() + 1); - node moved_node; - tentative_cutsize[0] = cur_cutsize; - - while (move_vertex(G, moved_node)) - { - ++step_number; - tentative_cutsize[step_number] = cur_cutsize; - tentative_moves[step_number] = moved_node; - if (tentative_cutsize[best_tentative_move] > cur_cutsize) - { - best_tentative_move = step_number; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - else if (tentative_cutsize[best_tentative_move] == cur_cutsize) - { - if (node_weight_on_sideA * node_weight_on_sideB > best_bal) - { - best_tentative_move = step_number; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - } - } - - for (int i = 1; i <= best_tentative_move; i++) - { - if (side[tentative_moves[i]] == A) - side[tentative_moves[i]] = B; - else // side[tentative_moves[i]] == B - side[tentative_moves[i]] = A; - } - cur_cutsize = tentative_cutsize[best_tentative_move]; -} - -bool fm_partition::move_vertex(const graph& G, GTL::node& moved_node) -{ - node cons_nodeA; - if (!bucketA_empty) - cons_nodeA = bucketA[range_up(max_gainA)].back(); - - node cons_nodeB; - if (!bucketB_empty) - cons_nodeB = bucketB[range_up(max_gainB)].back(); - - if ((!bucketA_empty) && (!bucketB_empty) && - (balance_holds(G, cons_nodeA)) && (balance_holds(G, cons_nodeB))) - { - if (gain_value[cons_nodeA] > gain_value[cons_nodeB]) - { - update_data_structure_A2B(cons_nodeA); - moved_node = cons_nodeA; - } - else if (gain_value[cons_nodeB] > gain_value[cons_nodeA]) - { - update_data_structure_B2A(cons_nodeB); - moved_node = cons_nodeB; - } - else // gain_value[cons_nodeB] == gain_value[cons_nodeA] - { - int bal_diff_A2B = abs(node_weight_on_sideA - 2 * - node_weight[cons_nodeA] - node_weight_on_sideB); - int bal_diff_B2A = abs(node_weight_on_sideB - 2 * - node_weight[cons_nodeB] - node_weight_on_sideA); - if (bal_diff_A2B < bal_diff_B2A) - { - update_data_structure_A2B(cons_nodeA); - moved_node = cons_nodeA; - } - else if (bal_diff_B2A < bal_diff_A2B) - { - update_data_structure_B2A(cons_nodeB); - moved_node = cons_nodeB; - } - else // break remaining ties as desired [FidMat82] - { - update_data_structure_A2B(cons_nodeA); - moved_node = cons_nodeA; - } - } - } - else if ((!bucketA_empty) && (balance_holds(G, cons_nodeA))) - { - update_data_structure_A2B(cons_nodeA); - moved_node = cons_nodeA; - } - else if ((!bucketB_empty) && (balance_holds(G, cons_nodeB))) - { - update_data_structure_B2A(cons_nodeB); - moved_node = cons_nodeB; - } - else - { - return false; // no more vertices can be moved - } - update_max_gain(A); - update_max_gain(B); - return true; -} - -bool fm_partition::balance_holds(const graph& /*G*/, const node cur_node) -{ - if (side[cur_node] == A) - { - if ((double)node_weight_on_sideB + (double)node_weight[cur_node] - <= ((double)total_node_weight / 2.0) + (double)max_node_weight) - return true; - } - else // side[cur_node] == B - { - if ((double)node_weight_on_sideA + (double)node_weight[cur_node] - <= ((double)total_node_weight / 2.0) + (double)max_node_weight) - return true; - } - return false; -} - -void fm_partition::update_data_structure_A2B(const node cur_node) -{ - bucketA[range_up(max_gainA)].pop_back(); - node_weight_on_sideA -= node_weight[cur_node]; - node_weight_on_sideB += node_weight[cur_node]; - cur_cutsize -= gain_value[cur_node]; - - // updating gain values - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - // delete cur_node from side A -#if 1 - unlockedA[*adj_edge_it].remove(cur_node); -#else - auto& ua = unlockedA[*adj_edge_it]; - ua.erase(std::remove(ua.begin(), ua.end(), cur_node), ua.end()); -#endif - --aside[*adj_edge_it]; - if (aside[*adj_edge_it] == 0) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it]); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (aside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it]); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - // add cur_node to side B - ++bside[*adj_edge_it]; - if (bside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it]); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (bside[*adj_edge_it] == 2) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it]); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - ++adj_edge_it; - } -} - -void fm_partition::update_data_structure_B2A(const node cur_node) -{ - bucketB[range_up(max_gainB)].pop_back(); - node_weight_on_sideA += node_weight[cur_node]; - node_weight_on_sideB -= node_weight[cur_node]; - cur_cutsize -= gain_value[cur_node]; - - // updating gain values - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - // delete cur_node from side B -#if 1 - unlockedB[*adj_edge_it].remove(cur_node); -#else - auto& ub = unlockedB[*adj_edge_it]; - ub.erase(std::remove(ub.begin(), ub.end(), cur_node), ub.end()); -#endif - - bside[*adj_edge_it] -= 1; - if (bside[*adj_edge_it] == 0) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it]); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (bside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it]); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - // add cur_node to side A - aside[*adj_edge_it] += 1; - if (aside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it]); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (aside[*adj_edge_it] == 2) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it]); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - ++adj_edge_it; - } -} - -void fm_partition::update_bucketA(const node cur_node, const int old_gain, - const int new_gain) -{ - if (fixed[cur_node] != UNFIXED) - return; // fixed nodes need no update - - bucketA[range_up(old_gain)].erase(position_in_bucket[cur_node]); - position_in_bucket[cur_node] = bucketA[range_up(new_gain)].insert( - bucketA[range_up(new_gain)].end(), cur_node); - - if (max_gainA < new_gain) - max_gainA = new_gain; -} - -void fm_partition::update_bucketB(const node cur_node, const int old_gain, const int new_gain) -{ - if (fixed[cur_node] != UNFIXED) - return; // fixed nodes need no update - - bucketB[range_up(old_gain)].erase(position_in_bucket[cur_node]); - position_in_bucket[cur_node] = bucketB[range_up(new_gain)].insert( - bucketB[range_up(new_gain)].end(), cur_node); - - if (max_gainB < new_gain) - max_gainB = new_gain; -} - -void fm_partition::update_max_gain(const side_type side) -{ - if ((side == A) && (!bucketA_empty)) - { - while (bucketA[range_up(max_gainA)].empty()) - { - --max_gainA; - if (range_up(max_gainA) < 0) - { - bucketA_empty = true; - return; - } - } - bucketA_empty = false; - } - if ((side == B) && (!bucketB_empty)) - { - while (bucketB[range_up(max_gainB)].empty()) - { - --max_gainB; - if (range_up(max_gainB) < 0) - { - bucketB_empty = true; - return; - } - } - bucketB_empty = false; - } -} - -inline int fm_partition::range_up(const int gain_value) const -{ - return gain_value + (max_vertex_degree * max_edge_weight); -} - -inline int fm_partition::range_down(const int index) const -{ - return index - (max_vertex_degree * max_edge_weight); -} - -void fm_partition::clean_pass(const graph& G) -{ - // clean unlocked* lists - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - unlockedA[*edge_it].clear(); - unlockedB[*edge_it].clear(); - ++edge_it; - } - - // clean buckets - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - bucketA[i].clear(); - bucketB[i].clear(); - } - bucketA.clear(); - bucketB.clear(); -} - -void fm_partition::compute_cut_edges(const graph& G) -{ - cut_edges.clear(); - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (side[edge_it->source()] != side[edge_it->target()]) - cut_edges.push_back(*edge_it); - - ++edge_it; - } -} - -void fm_partition::compute_nodesAB(const graph& G) -{ - nodesA.clear(); - nodesB.clear(); - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - nodesA.push_back(*node_it); - else // side[*node_it] == B - nodesB.push_back(*node_it); - ++node_it; - } -} - -#ifdef _DEBUG -void fm_partition::print_bucketA() -{ - GTL_debug::init_debug(); - GTL_debug::os() << std::endl << "bucketA:" << std::endl; - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - GTL_debug::os() << range_down(i) << ": "; - nodes_t::iterator node_it = bucketA[i].begin(); - nodes_t::iterator nodes_end = bucketA[i].end(); - while (node_it != nodes_end) - { - GTL_debug::os() << *node_it << " "; - ++node_it; - } - GTL_debug::os() << std::endl; - } - GTL_debug::os() << std::endl; - GTL_debug::close_debug(); -} - -void fm_partition::print_bucketB() -{ - GTL_debug::init_debug(); - GTL_debug::os() << std::endl << "bucketB:" << std::endl; - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - GTL_debug::os() << range_down(i) << ": "; - nodes_t::iterator node_it = bucketB[i].begin(); - nodes_t::iterator nodes_end = bucketB[i].end(); - while (node_it != nodes_end) - { - GTL_debug::os() << *node_it << " "; - ++node_it; - } - GTL_debug::os() << std::endl; - } - GTL_debug::os() << std::endl; - GTL_debug::close_debug(); -} -#endif // _DEBUG - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/gml_parser.cpp b/src/Tracker/graph/GTL/src/gml_parser.cpp deleted file mode 100644 index f6f6bf23e..000000000 --- a/src/Tracker/graph/GTL/src/gml_parser.cpp +++ /dev/null @@ -1,283 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// gml_parser.cpp - parser for the GML-file-format specified in: -// Michael Himsolt, GML: Graph Modelling Language, -// 21.01.1997 -// -//========================================================================== -// $Id: gml_parser.cpp,v 1.9 2001/11/07 13:58:10 pick Exp $ - -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -struct GML_pair* GML_parser (FILE* source, struct GML_stat* stat, int open) { - - struct GML_token token; - struct GML_pair* pair; - struct GML_pair* list; - struct GML_pair* tmp = NULL; - struct GML_list_elem* tmp_elem; - - assert (stat); - - pair = (struct GML_pair*) malloc (sizeof (struct GML_pair)); - pair->next = NULL; - list = pair; - - for (;;) { - token = GML_scanner (source); - - if (token.kind == GML_END) { - if (open) { - stat->err.err_num = GML_OPEN_BRACKET; - stat->err.line = GML_line; - stat->err.column = GML_column; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - } - - break; - - } else if (token.kind == GML_R_BRACKET) { - if (!open) { - stat->err.err_num = GML_TOO_MANY_BRACKETS; - stat->err.line = GML_line; - stat->err.column = GML_column; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - } - - break; - - } else if (token.kind == GML_ERROR) { - stat->err.err_num = token.value.err.err_num; - stat->err.line = token.value.err.line; - stat->err.column = token.value.err.column; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - - } else if (token.kind != GML_KEY) { - stat->err.err_num = GML_SYNTAX; - stat->err.line = GML_line; - stat->err.column = GML_column; - free (pair); - - if (token.kind == GML_STRING) { - free (token.value.str); - } - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - } - - if (!stat->key_list) { - stat->key_list = (struct GML_list_elem*) - malloc (sizeof (struct GML_list_elem)); - stat->key_list->next = NULL; - stat->key_list->key = token.value.str; - pair->key = token.value.str; - - } else { - tmp_elem = stat->key_list; - - while (tmp_elem) { - if (!strcmp (tmp_elem->key, token.value.str)) { - free (token.value.str); - pair->key = tmp_elem->key; - break; - } - - tmp_elem = tmp_elem->next; - } - - if (!tmp_elem) { - tmp_elem = (struct GML_list_elem*) - malloc (sizeof (struct GML_list_elem)); - tmp_elem->next = stat->key_list; - stat->key_list = tmp_elem; - tmp_elem->key = token.value.str; - pair->key = token.value.str; - } - } - - token = GML_scanner (source); - - switch (token.kind) { - case GML_INT: - pair->kind = GML_INT; - pair->value.integer = token.value.integer; - break; - - case GML_DOUBLE: - pair->kind = GML_DOUBLE; - pair->value.floating = token.value.floating; - break; - - case GML_STRING: - pair->kind = GML_STRING; - pair->value.str = token.value.str; - break; - - case GML_L_BRACKET: - pair->kind = GML_LIST; - pair->value.list = GML_parser (source, stat, 1); - - if (stat->err.err_num != GML_OK) { - return list; - } - - break; - - case GML_ERROR: - stat->err.err_num = token.value.err.err_num; - stat->err.line = token.value.err.line; - stat->err.column = token.value.err.column; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - - default: - stat->err.line = GML_line; - stat->err.column = GML_column; - stat->err.err_num = GML_SYNTAX; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } - } - - tmp = pair; - pair = (struct GML_pair*) malloc (sizeof (struct GML_pair)); - tmp->next = pair; - pair->next = NULL; - } - - stat->err.err_num = GML_OK; - free (pair); - - if (tmp == NULL) { - return NULL; - } else { - tmp->next = NULL; - return list; - } -} - - -void GML_free_list (struct GML_pair* list, struct GML_list_elem* keys) { - - struct GML_pair* tmp = list; - struct GML_list_elem* tmp_key; - - while (keys) { - free (keys->key); - tmp_key = keys->next; - free (keys); - keys = tmp_key; - } - - while (list) { - - switch (list->kind) { - case GML_LIST: - GML_free_list (list->value.list, NULL); - break; - - case GML_STRING: - free (list->value.str); - break; - - default: - break; - } - - tmp = list->next; - free (list); - list = tmp; - } -} - - - -void GML_print_list (struct GML_pair* list, int level) { - - struct GML_pair* tmp = list; - int i; - - while (tmp) { - - for (i = 0; i < level; i++) { - printf (" "); - } - - printf ("*KEY* : %s", tmp->key); - - switch (tmp->kind) { - case GML_INT: - printf (" *VALUE* (long) : %ld \n", tmp->value.integer); - break; - - case GML_DOUBLE: - printf (" *VALUE* (double) : %f \n", tmp->value.floating); - break; - - case GML_STRING: - printf (" *VALUE* (string) : %s \n", tmp->value.str); - break; - - case GML_LIST: - printf (" *VALUE* (list) : \n"); - GML_print_list (tmp->value.list, level+1); - break; - - default: - break; - } - - tmp = tmp->next; - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/gml_scanner.cpp b/src/Tracker/graph/GTL/src/gml_scanner.cpp deleted file mode 100644 index 33a858a1e..000000000 --- a/src/Tracker/graph/GTL/src/gml_scanner.cpp +++ /dev/null @@ -1,426 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// gml_scanner.cpp - Scanner for the GML - file format -// -//========================================================================== -// $Id: gml_scanner.cpp,v 1.10 2001/11/07 13:58:10 pick Exp $ - -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -/* - * ISO8859-1 coding of chars >= 160 - */ - -const char* GML_table[] = { - " ", /* 160 */ - "¡", - "¢", - "£", - "¤", - "¥", - "¦", - "§", - "¨", - "©", - "ª", /* 170 */ - "«", - "¬", - "­", - "®", - "¯", - "°", - "±", - "²", - "³", /* 180 */ - "´", - "µ", - "¶", - "·", - "¸", - "¹", - "º", - "»", - "¼", - "½", - "¾", /* 190 */ - "¿", - "À", - "Á", - "Â", - "Ã", - "Ä", - "Å", - "Æ", - "Ç", - "È", /* 200 */ - "É", - "Ê", - "Ë", - "Ì", - "Í", - "Î", - "Ï", - "Ð", - "Ñ", - "Ò", /* 210 */ - "Ó", - "Ô", - "Õ", - "Ö", - "×", - "Ø", - "Ù", - "Ú", - "Û", - "Ü", /* 220 */ - "Ý", - "Þ", - "ß", - "à", - "á", - "â", - "ã", - "ä", - "å", - "æ", /* 230 */ - "ç", - "è", - "é", - "ê", - "ë", - "ì", - "í", - "î", - "ï", - "ð", /* 240 */ - "ñ", - "ò", - "ó", - "ô", - "õ", - "ö", - "÷", - "ø", - "ù", - "ú", /* 250 */ - "û", - "ü", - "ý", - "þ", - "ÿ" -}; - - -unsigned int GML_line; -unsigned int GML_column; - - -int GML_search_ISO (char* str, int len) { - - int i; - int ret = '&'; - - // - // First check the extraordinary ones - // - - if (!strncmp (str, """, len)) { - return 34; - } else if (!strncmp (str, "&", len)) { - return 38; - } else if (!strncmp (str, "<", len)) { - return 60; - } else if (!strncmp (str, ">", len)) { - return 62; - } - - for (i = 0; i < 96; i++) { - if (!strncmp (str, GML_table[i], len)) { - ret = i + 160; - break; - } - } - - return ret; -} - - -void GML_init () { - - GML_line = 1; - GML_column = 1; -} - - - -struct GML_token GML_scanner (FILE* source) { - - int cur_max_size = INITIAL_SIZE; - static char buffer[INITIAL_SIZE]; - char* tmp = buffer; - char* ret = tmp; - struct GML_token token; - int is_float = 0; - int count = 0; - int next; - char ISO_buffer[8]; - int ISO_count; - - assert (source != NULL); - - /* - * eliminate preceeding white spaces - */ - - do { - next = fgetc (source); - GML_column++; - - if (next == '\n') { - GML_line++; - GML_column = 1; - } else if (next == EOF) { - token.kind = GML_END; - return token; - } - } while (isspace (next)); - - if (isdigit (next) || next == '.' || next == '+' || next == '-') { - - /* - * floating point or integer - */ - - do { - if (count == INITIAL_SIZE - 1) { - token.value.err.err_num = GML_TOO_MANY_DIGITS; - token.value.err.line = GML_line; - token.value.err.column = GML_column + count; - token.kind = GML_ERROR; - return token; - } - - if (next == '.' || next == 'E') { - is_float = 1; - } - - buffer[count] = next; - count++; - next = fgetc (source); - - } while (!isspace(next) && next != ']'); - - if (next == ']') { - ungetc (next, source); - } - - buffer[count] = 0; - - if (next == '\n') { - GML_line++; - GML_column = 1; - } else { - GML_column += count; - } - - if (is_float) { - token.value.floating = atof (tmp); - token.kind = GML_DOUBLE; - } else { - token.value.integer = atol (tmp); - token.kind = GML_INT; - } - - return token; - - } else if (isalpha (next) || next == '_') { - - /* - * key - */ - - do { - if (count == cur_max_size - 1) { - *tmp = 0; - tmp = (char*) malloc(2 * cur_max_size * sizeof (char)); - strcpy (tmp, ret); - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - ret = tmp; - tmp += count; - cur_max_size *= 2; - } - - if (!isalnum (next) && next != '_') { - token.value.err.err_num = GML_UNEXPECTED; - token.value.err.line = GML_line; - token.value.err.column = GML_column + count; - token.kind = GML_ERROR; - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - return token; - } - - *tmp++ = next; - count++; - next = fgetc (source); - } while (!isspace (next) && next != EOF); - - if (next == '\n') { - GML_line++; - GML_column = 1; - } else { - GML_column += count; - } - - *tmp = 0; - token.kind = GML_KEY; - token.value.str = (char*) malloc((count+1) * sizeof (char)); - strcpy (token.value.str, ret); - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - return token; - - } else { - /* - * comments, brackets and strings - */ - - switch (next) { - case '#': - do { - next = fgetc (source); - } while (next != '\n' && next != EOF); - - GML_line++; - GML_column = 1; - return GML_scanner (source); - - case '[': - token.kind = GML_L_BRACKET; - return token; - - case ']': - token.kind = GML_R_BRACKET; - return token; - - case '"': - next = fgetc (source); - GML_column++; - - while (next != '"') { - - if (count >= cur_max_size - 8) { - *tmp = 0; - tmp = (char*) malloc (2 * cur_max_size * sizeof(char)); - strcpy (tmp, ret); - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - ret = tmp; - tmp += count; - cur_max_size *= 2; - } - - if (next == '&') { - ISO_count = 0; - - while (next != ';') { - if (next == '"' || next == EOF) { - ungetc (next, source); - ISO_count = 0; - break; - } - - if (ISO_count < 8) { - ISO_buffer[ISO_count] = next; - ISO_count++; - } - - next = fgetc (source); - } - - if (ISO_count == 8) { - ISO_count = 0; - } - - if (ISO_count) { - ISO_buffer[ISO_count] = ';'; - ISO_count++; - next = GML_search_ISO (ISO_buffer, ISO_count); - ISO_count = 0; - } else { - next = '&'; - } - } - - *tmp++ = next; - count++; - GML_column++; - - next = fgetc (source); - - if (next == EOF) { - token.value.err.err_num = GML_PREMATURE_EOF; - token.value.err.line = GML_line; - token.value.err.column = GML_column + count; - token.kind = GML_ERROR; - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - return token; - } - - if (next == '\n') { - GML_line++; - GML_column = 1; - } - } - - *tmp = 0; - token.kind = GML_STRING; - token.value.str = (char*) malloc((count+1) * sizeof (char)); - strcpy (token.value.str, ret); - - if (cur_max_size > INITIAL_SIZE) { - free (ret); - } - - return token; - - default: - token.value.err.err_num = GML_UNEXPECTED; - token.value.err.line = GML_line; - token.value.err.column = GML_column; - token.kind = GML_ERROR; - return token; - } - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/graph.cpp b/src/Tracker/graph/GTL/src/graph.cpp deleted file mode 100644 index b21eea4c6..000000000 --- a/src/Tracker/graph/GTL/src/graph.cpp +++ /dev/null @@ -1,1123 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// graph.cpp -// -//========================================================================== -// $Id: graph.cpp,v 1.58 2003/01/14 16:47:14 raitner Exp $ - -#include -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// Con-/Destructors -//-------------------------------------------------------------------------- - -graph::graph() : - directed(true), - nodes_count(0), edges_count(0), - hidden_nodes_count(0), hidden_edges_count(0), - free_node_ids_count(0), free_edge_ids_count(0) -{ -} - -graph::graph(const graph &G) : - directed(G.directed), - nodes_count(0), edges_count(0), - hidden_nodes_count(0), hidden_edges_count(0), - free_node_ids_count(0), free_edge_ids_count(0) -{ - copy (G, G.nodes.begin(), G.nodes.end()); -} - - -graph::graph(const graph& G, const nodes_t& nod) : - directed(G.directed), - nodes_count(0), edges_count(0), - hidden_nodes_count(0), hidden_edges_count(0), - free_node_ids_count(0), free_edge_ids_count(0) -{ - copy (G, nod.begin(), nod.end()); -} - - -graph::graph (const graph& G, - nodes_t::const_iterator it, - nodes_t::const_iterator end) : - directed(G.directed), - nodes_count(0), edges_count(0), - hidden_nodes_count(0), hidden_edges_count(0), - free_node_ids_count(0), free_edge_ids_count(0) -{ - copy (G, it, end); -} - -void graph::copy (const graph& G, - nodes_t::const_iterator it, - nodes_t::const_iterator end) -{ - node_map copy (G, GTL::node()); - nodes_t::const_iterator n_it; - nodes_t::const_iterator n_end; - - for(n_it = it, n_end = end; n_it != n_end; ++n_it) - { - copy[*n_it] = new_node(); - } - - for(n_it = it, n_end = end; n_it != n_end; ++n_it) - { - node::out_edges_iterator e_it, e_end; - - for (e_it = n_it->out_edges_begin(), e_end = n_it->out_edges_end(); - e_it != e_end; ++e_it) { - - if (copy[e_it->target()] != node()) { - new_edge(copy[e_it->source()], copy[e_it->target()]); - } - } - } -} - - - -graph::~graph() -{ - clear(); -} - -//------------------------------------------------------------------------- -// Output -//------------------------------------------------------------------------- - -GTL_EXTERN std::ostream& operator<< (std::ostream& os, const graph& G) { - node n; - edge out; - std::string conn; - - if (G.is_directed()) - conn = "-->"; - else - conn = "<-->"; - - forall_nodes (n, G) { - os << n << ":: "; - - forall_adj_edges (out, n) { - os << conn << n.opposite (out); - } - - os << std::endl; - } - - return os; -} - -//-------------------------------------------------------------------------- -// Directed/Undirected -//-------------------------------------------------------------------------- - -void graph::make_directed() -{ - if (!directed) - { - pre_make_directed_handler(); - directed = true; - post_make_directed_handler(); - } -} - -void graph::make_undirected() -{ - if (directed) - { - pre_make_undirected_handler(); - directed = false; - post_make_undirected_handler(); - } -} - -bool graph::is_directed() const -{ - return directed; -} - -bool graph::is_undirected() const -{ - return !directed; -} - -//-------------------------------------------------------------------------- -// Creation -//-------------------------------------------------------------------------- - -node graph::new_node() -{ - pre_new_node_handler(); - - // create node - - node n; - n.data = new node_data; - - // set data variables - - n.data->id = new_node_id(); - n.data->owner = this; - n.data->pos = nodes.insert(nodes.end(), n); - n.data->hidden = false; - ++nodes_count; - - // done - - post_new_node_handler(n); - - return n; -} - -edge graph::new_edge(GTL::node source, GTL::node target) -{ - assert(source.data); - assert(target.data); - assert(source.data->owner == this); - assert(target.data->owner == this); - - pre_new_edge_handler(source, target); - - // create edge - - edge e; - e.data = new edge_data; - - // set id - - e.data->owner = this; - e.data->id = new_edge_id(); - - // set sources and targets - - e.data->nodes[0].push_back(source); - e.data->nodes[1].push_back(target); - - // set pos - - e.data->pos = edges.insert(edges.end(), e); - e.data->hidden = false; - ++edges_count; - - // set adj_pos - - edges_t& source_adj = source.data->edges[1]; - edges_t& target_adj = target.data->edges[0]; - - e.data->adj_pos[0].push_back(source_adj.insert(source_adj.begin(), e)); - e.data->adj_pos[1].push_back(target_adj.insert(target_adj.begin(), e)); - - // done - - post_new_edge_handler(e); - - return e; -} - -edge graph::new_edge(const nodes_t &/*sources*/, const nodes_t &/*targets*/) -{ - // not implemented - - return edge(); -} - -//-------------------------------------------------------------------------- -// Deletion -//-------------------------------------------------------------------------- - -void graph::del_node(GTL::node n) -{ - assert (n.data); - assert (n.data->owner == this); - - // delete edges - - while(n.in_edges_begin() != n.in_edges_end()) - { - del_edge (*n.in_edges_begin()); - } - - while(n.out_edges_begin() != n.out_edges_end()) - { - del_edge (*n.out_edges_begin()); - } - - // - // delete hidden edges adjacent to n. - // - // [ TODO ] This is only a quick fix and should be thought - // over some time or the other. - // - - edges_t::iterator it = hidden_edges.begin(); - edges_t::iterator end = hidden_edges.end(); - - while (it != end) - { - if (it->source() == n || it->target() == n) - { - delete it->data; - it = hidden_edges.erase (it); - } - else - { - ++it; - } - } - - // delete node - - pre_del_node_handler(n); - - nodes.erase(n.data->pos); - --nodes_count; - free_node_ids.push_back(n.data->id); - ++free_node_ids_count; - delete n.data; - - post_del_node_handler(); -} - -void graph::del_edge(GTL::edge e) -{ - assert (e.data->owner == this); - assert (e.data->owner == this); - - pre_del_edge_handler(e); - node s = e.source(); - node t = e.target(); - - e.remove_from(0); - e.remove_from(1); - edges.erase(e.data->pos); - --edges_count; - free_edge_ids.push_back(e.data->id); - ++free_edge_ids_count; - delete e.data; - - post_del_edge_handler(s, t); -} - -void graph::clear() -{ - pre_clear_handler(); - - del_list(edges); - del_list(hidden_edges); - del_list(nodes); - del_list(hidden_nodes); - - free_node_ids.clear(); - free_edge_ids.clear(); - - nodes_count = edges_count = 0; - hidden_nodes_count = hidden_edges_count = 0; - free_node_ids_count = free_edge_ids_count = 0; - - post_clear_handler(); -} - -void graph::del_all_nodes() -{ - assert(false); - // not fully implemented: - // * update id lists !!! - // * call handlers - - del_list(edges); - del_list(nodes); - - nodes_count = edges_count = 0; -} - -void graph::del_all_edges() -{ - assert(false); - // not fully implemented: - // * update id lists !!! - // * call handlers - del_list(edges); - - edges_count = 0; - - nodes_t::iterator it = nodes.begin(); - nodes_t::iterator end = nodes.end(); - - while(it != end) - { - it->data->edges[0].clear(); - it->data->edges[1].clear(); - } -} - -//-------------------------------------------------------------------------- -// Informations -//-------------------------------------------------------------------------- - - - -bool graph::is_bidirected(GTL::edge_map& rev) const { - edge e1; - node target, source; - bool bidirected = true; - node::out_edges_iterator it; - node::out_edges_iterator end; - - forall_edges (e1, *this) { - target = e1.target (); - source = e1.source (); - end = target.out_edges_end (); - it = target.out_edges_begin (); - - // - // Search all out-edges of target if they are connected to the actual - // edges source. - // - - while (it != end) { - if (it->target () == source) { - break; - } - ++it; - } - - if (it == end) { - bidirected = false; - rev[e1] = edge (); - } else { - rev[e1] = *it; - } - } - - return bidirected; -} - -bool graph::is_connected() const -{ - bool save_directed = directed; - directed = false; - - dfs d; - d.run(*const_cast(this)); - - directed = save_directed; - - return d.number_of_reached_nodes() == number_of_nodes(); -} - -bool graph::is_acyclic() const -{ - topsort t; - t.run(*const_cast(this)); - - return t.is_acyclic(); -} - -int graph::number_of_nodes() const -{ - return nodes_count - hidden_nodes_count; -} - -int graph::number_of_edges() const -{ - return edges_count - hidden_edges_count; -} - -node graph::center() const -{ - int min_excentricity = number_of_nodes()+1; - node n, center; - forall_nodes(n, *this) - { - int excentricity = n.excentricity(); - if(excentricity < min_excentricity) - { - center = n; - min_excentricity = excentricity; - } - } - return center; -} - -//-------------------------------------------------------------------------- -// Iterators -//-------------------------------------------------------------------------- - -graph::node_iterator graph::nodes_begin() const -{ - return nodes.begin(); -} - -graph::node_iterator graph::nodes_end() const -{ - return nodes.end(); -} - -graph::edge_iterator graph::edges_begin() const -{ - return edges.begin(); -} - -graph::edge_iterator graph::edges_end() const -{ - return edges.end(); -} - -//-------------------------------------------------------------------------- -// Node/Edge lists -//-------------------------------------------------------------------------- - -nodes_t graph::all_nodes() const -{ - return nodes; -} - -edges_t graph::all_edges() const -{ - return edges; -} - -//-------------------------------------------------------------------------- -// Hide -// If an edge is already hidden (this really happens :-), it will not be -// hidden for the second time -//-------------------------------------------------------------------------- - -void graph::hide_edge (GTL::edge e) -{ - assert (e.data->owner == this); - assert (e.data->owner == this); - - pre_hide_edge_handler (e); - - if (!e.is_hidden()) { - - // - // remove e from all sources and targets adjacency lists - // - e.remove_from(0); - e.remove_from(1); - - // - // clear the list of positions - // - e.data->adj_pos[0].erase - (e.data->adj_pos[0].begin(), e.data->adj_pos[0].end()); - e.data->adj_pos[1].erase - (e.data->adj_pos[1].begin(), e.data->adj_pos[1].end()); - - // - // remove e from the list of all edges - // - edges.erase (e.data->pos); - - // - // insert e in hidden edges list - // - e.data->pos = hidden_edges.insert(hidden_edges.end(), e); - e.data->hidden = true; - ++hidden_edges_count; - } - - post_hide_edge_handler (e); -} - -//-------------------------------------------------------------------------- -// restore_edge -// An edge will be restored only if it is hidden (sounds wise, hmm ...) -//-------------------------------------------------------------------------- - -void graph::restore_edge (GTL::edge e) -{ - assert (e.data->owner == this); - assert (e.data->owner == this); - - pre_restore_edge_handler (e); - - if (e.is_hidden()) { - // - // remove e from hidden edges list - // - hidden_edges.erase (e.data->pos); - --hidden_edges_count; - - // - // for each source of e insert e in its list of out-edges and store - // the position in e's list of positions - // - nodes_t::iterator it; - nodes_t::iterator end = e.data->nodes[0].end(); - - for (it = e.data->nodes[0].begin (); it != end; ++it) - { - edges_t& adj = it->data->edges[1]; - e.data->adj_pos[0].push_back(adj.insert(adj.begin(), e)); - } - - // - // for each target of e insert e in its list of in-edges and store - // the pos - // - end = e.data->nodes[1].end(); - - for (it = e.data->nodes[1].begin (); it != end; ++it) - { - edges_t& adj = it->data->edges[0]; - e.data->adj_pos[1].push_back(adj.insert(adj.begin(), e)); - } - - e.data->pos = edges.insert(edges.end(), e); - e.data->hidden = false; - } - - post_restore_edge_handler (e); -} - -//-------------------------------------------------------------------------- -// Hide -// If an node is already hidden (this really happens :-), it will not be -// hidden for the second time -// Note: also all adjacent edges will be hidden -//-------------------------------------------------------------------------- - -edges_t graph::hide_node(GTL::node n) -{ - assert (n.data->owner == this); - - pre_hide_node_handler (n); - edges_t implicitly_hidden_edges; - - if (!n.is_hidden()){ - // hide all connected egdes - for (int i = 0; i <= 1; ++i) - { - edges_t::iterator end = n.data->edges[i].end(); - edges_t::iterator edge = n.data->edges[i].begin(); - while (edge != end) - { - implicitly_hidden_edges.push_back(*edge); - hide_edge(*edge); - edge = n.data->edges[i].begin(); - } - } - - // hide node - hidden_nodes.push_back(n); - nodes.erase(n.data->pos); - n.data->hidden = true; - ++hidden_nodes_count; - } - - post_hide_node_handler (n); - - return implicitly_hidden_edges; -} - -//-------------------------------------------------------------------------- -// restore_node -// A node will be restored only if it is hidden (sounds wise, hmm ...) -// connected nodes won't be restored automatically ! -//-------------------------------------------------------------------------- - -void graph::restore_node (GTL::node n) -{ - assert (n.data->owner == this); - - pre_restore_node_handler(n); - - if (n.is_hidden()) - { - // node is hidden - - nodes.push_back(n); - n.data->pos = --nodes.end(); - -#if 1 - hidden_nodes.remove(n); -#else - hidden_nodes.erase(std::remove(hidden_nodes.begin(), hidden_nodes.end(), n), hidden_nodes.end()); -#endif - n.data->hidden = false; - --hidden_nodes_count; - } - - post_restore_node_handler (n); -} - - -void graph::induced_subgraph(nodes_t& sub_nodes) -{ - node_map in_sub (*this, 0); - nodes_t::iterator it, end, tmp; - - for (it = sub_nodes.begin(), end = sub_nodes.end(); it != end; ++it) { - in_sub[*it] = 1; - } - - it = nodes.begin(); - end = nodes.end(); - - while (it != end) { - tmp = it; - ++tmp; - - if (!in_sub[*it]) { - hide_node (*it); - } - - it = tmp; - } -} - -void graph::restore_graph () -{ - nodes_t::iterator it, end, tmp; - - it = hidden_nodes.begin(); - end = hidden_nodes.end(); - - while (it != end) - { - tmp = it; - ++tmp; - restore_node(*it); - it = tmp; - } - - edges_t::iterator e_it = hidden_edges.begin(); - edges_t::iterator e_end = hidden_edges.end(); - - while (e_it != e_end) - { - edges_t::iterator e_tmp = e_it; - ++e_tmp; - restore_edge (*e_it); - e_it = e_tmp; - } -} - -//-------------------------------------------------------------------------- -// Node/edge numbering -//-------------------------------------------------------------------------- - -int graph::number_of_ids(GTL::node) const -{ - return - free_node_ids_count + - nodes_count; -} - -int graph::number_of_ids(GTL::edge) const -{ - return - free_edge_ids_count + - edges_count; -} - -int graph::new_node_id() -{ - if(free_node_ids.empty()) - return nodes_count; - - int id = free_node_ids.back(); - free_node_ids.pop_back(); - --free_node_ids_count; - return id; -} - -int graph::new_edge_id() -{ - if(free_edge_ids.empty()) - return edges_count; - - int id = free_edge_ids.back(); - free_edge_ids.pop_back(); - --free_edge_ids_count; - return id; -} - -//-------------------------------------------------------------------------- -// Utilities -//-------------------------------------------------------------------------- - -void graph::del_list(nodes_t& l) -{ - nodes_t::const_iterator it = l.begin(); - nodes_t::const_iterator end = l.end(); - - while(it != end) - { - delete it->data; - ++it; - } - - l.clear(); -} - -void graph::del_list(edges_t& l) -{ - edges_t::const_iterator it = l.begin(); - edges_t::const_iterator end = l.end(); - - while(it != end) - { - delete it->data; - ++it; - } - - l.clear(); -} - -//-------------------------------------------------------------------------- -// Others -//-------------------------------------------------------------------------- - -edges_t graph::insert_reverse_edges() { - edges_t rev; - edge e; - - node::out_edges_iterator it, end; - - forall_edges (e, *this) { - it = e.target().out_edges_begin(); - end = e.target().out_edges_end(); - - while (it != end) { - if (it->target() == e.source()) - break; - ++it; - } - - if (it == end) { - rev.push_back(new_edge (e.target(), e.source())); - } - } - - return rev; -} - -node graph::choose_node () const -{ - // Well, probably doesn't guarantee uniform distribution :-) - return nodes.empty() ? node() : nodes.front(); -} - -//-------------------------------------------------------------------------- -// I/O -//-------------------------------------------------------------------------- - -GML_error graph::load (const char* filename, bool preserve_ids) { - - GML_stat stat; - stat.key_list = NULL; - GML_pair* key_list; - GML_pair* orig_list; - - FILE* file = fopen (filename, "r"); - - if (!file) { - stat.err.err_num = GML_FILE_NOT_FOUND; - return stat.err; - } - - GML_init (); - key_list = GML_parser (file, &stat, 0); - fclose (file); - - if (stat.err.err_num != GML_OK) { - GML_free_list (key_list, stat.key_list); - return stat.err; - } - - // - // This file is a valid GML-file, let's build the graph. - // - - clear(); - orig_list = key_list; - - - - // - // get the first entry with key "graph" in the list - // - - while (key_list) { - if (!strcmp ( "graph", key_list->key)) { - break; - } - - key_list = key_list->next; - } - - assert (key_list); - - key_list = key_list->value.list; - GML_pair* graph_list = key_list; - - GML_pair* tmp_list; - // GML_pair* node_entries = 0; - // GML_pair* edge_entries = 0; - - std::list > node_entries; - std::list, GML_pair*> > edge_entries; - - int num_nodes = 0; - - bool target_found; - bool source_found; - - // - // Node and edge keys may come in arbitrary order, so sort them such - // that all nodes come before all edges. - // - - while (key_list) { - if (!strcmp (key_list->key, "node")) { - - // - // Search the list associated with this node for the id - // - - assert (key_list->kind == GML_LIST); - tmp_list = key_list->value.list; - std::pair n; - n.second = tmp_list; - - while (tmp_list) { - if (!strcmp (tmp_list->key, "id")) { - assert (tmp_list->kind == GML_INT); - n.first = tmp_list->value.integer; - break; - } - - tmp_list = tmp_list->next; - } - - assert (tmp_list); - node_entries.push_back(n); - ++num_nodes; - - } else if (!strcmp (key_list->key, "edge")) { - - // - // Search for source and target entries - // - - assert (key_list->kind == GML_LIST); - tmp_list = key_list->value.list; - source_found = false; - target_found = false; - std::pair, GML_pair*> e; - e.second = tmp_list; - - while (tmp_list) { - if (!strcmp (tmp_list->key, "source")) { - assert (tmp_list->kind == GML_INT); - source_found = true; - e.first.first = tmp_list->value.integer; - if (target_found) break; - - } else if (!strcmp (tmp_list->key, "target")) { - assert (tmp_list->kind == GML_INT); - target_found = true; - e.first.second = tmp_list->value.integer; - if (source_found) break; - } - - tmp_list = tmp_list->next; - } - - assert (source_found && target_found); - edge_entries.push_back (e); - - } else if (!strcmp (key_list->key, "directed")) { - directed = (key_list->value.integer != 0); - } - - key_list = key_list->next; - } - - // - // make this graph the graph decribed in list - // - - std::map id_2_node; - node source, target; - node tmp_node; - edge tmp_edge; - std::list >::iterator it, end; - std::vector node_ids; - node_ids.reserve(num_nodes); - - for (it = node_entries.begin(), end = node_entries.end(); - it != end; ++it) { - tmp_node = new_node (); - if (preserve_ids) { - tmp_node.data->id = it->first; - node_ids.push_back(it->first); - } - id_2_node[it->first] = tmp_node; - load_node_info_handler (tmp_node, it->second); - } - - std::list, GML_pair*> >::iterator eit, eend; - for (eit = edge_entries.begin(), eend = edge_entries.end(); - eit != eend; ++eit) { - source = id_2_node[eit->first.first]; - target = id_2_node[eit->first.second]; - tmp_edge = new_edge (source, target); - load_edge_info_handler (tmp_edge, eit->second); - } - - load_graph_info_handler (graph_list); - top_level_key_handler (orig_list); - - std::sort(node_ids.begin(),node_ids.end()); - - std::vector::iterator iit, iend; - int prev = 0; - - for (iit = node_ids.begin(), iend = node_ids.end(); - iit != iend; ++iit) - { - if (iit != node_ids.begin()) { - free_node_ids_count += *iit - prev - 1; - } else { - free_node_ids_count += *iit; - } - prev = *iit; - } - - GML_free_list (orig_list, stat.key_list); - stat.err.err_num = GML_OK; - return stat.err; -} - -void graph::load_node_info_handler (GTL::node /*n*/, GML_pair* /*li*/) { -} - - -void graph::load_edge_info_handler (GTL::edge /*e*/, GML_pair* /*li*/) { -} - -void graph::load_graph_info_handler (GML_pair* /*li*/) { -} - -void graph::top_level_key_handler (GML_pair* /*li*/) { -} - - -void graph::save(std::ostream* file) const { - pre_graph_save_handler (file); - (*file) << "graph [" << std::endl; - (*file) << "directed " << (directed ? "1" : "0") << std::endl; - - node_iterator it = nodes_begin(); - node_iterator end = nodes_end(); - - for (;it != end; ++it) { - (*file) << "node [\n" << "id " << it->id() << "\n"; - save_node_info_handler (file, *it); - (*file) << " ]" << std::endl; - } - - edge_iterator e_it = edges_begin(); - edge_iterator e_end = edges_end(); - - for (; e_it != e_end; ++e_it) { - (*file) << "edge [\n" << "source " << e_it->source().id() << "\n"; - (*file) << "target " << e_it->target().id() << "\n"; - save_edge_info_handler (file, *e_it); - (*file) << " ]" << std::endl; - } - - save_graph_info_handler (file); - - (*file) << "]" << std::endl; - after_graph_save_handler (file); -} - -int graph::save (const char* filename) const { - - std::ofstream file(filename); - if (!file) return 0; - - save (&file); - - return 1; -} - - -// void graph::top_level_key_handler (GML_pair_list::const_iterator it, -// GML_pair_list::const_iterator end) -// { -// cout << "TOP_LEVEL_HANDLER" << endl; - -// for (; it != end; ++it) { -// cout << *it << endl; -// } -// } - -// void graph::load_graph_info_handler (GML_pair_list::const_iterator it, -// GML_pair_list::const_iterator end) -// { -// cout << "GRAPH_INFO_HANDLER" << endl; - -// for (; it != end; ++it) { -// cout << *it << endl; -// } -// } - -// void graph::load_node_info_handler (GTL::node n, GML_pair_list::const_iterator it, -// GML_pair_list::const_iterator end) -// { -// cout << "NODE_INFO_HANDLER for " << n << endl; - -// for (; it != end; ++it) { -// cout << *it << endl; -// } -// } - -// void graph::load_edge_info_handler (GTL::edge e, GML_pair_list::const_iterator it, -// GML_pair_list::const_iterator end) -// { -// cout << "EDGE_INFO_HANDLER for " << e.source() << "-->" -// << e.target() << endl; - -// for (; it != end; ++it) { -// cout << *it << endl; -// } -// } - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/maxflow_ff.cpp b/src/Tracker/graph/GTL/src/maxflow_ff.cpp deleted file mode 100644 index 9d05b9fa8..000000000 --- a/src/Tracker/graph/GTL/src/maxflow_ff.cpp +++ /dev/null @@ -1,311 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_ff.cpp -// -//========================================================================== -// $Id: maxflow_ff.cpp,v 1.7 2001/11/07 13:58:10 pick Exp $ - -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -maxflow_ff::maxflow_ff() -{ - max_graph_flow = 0.0; - set_vars_executed = false; -} - -maxflow_ff::~maxflow_ff() -{ -} - -void maxflow_ff::set_vars(const edge_map& edge_capacity) -{ - this->edge_capacity = edge_capacity; - artif_source_target = true; - max_graph_flow = 0.0; - set_vars_executed = true; -} - -void maxflow_ff::set_vars(const edge_map& edge_capacity, const node& net_source, const node& net_target) -{ - this->edge_capacity = edge_capacity; - this->net_source = net_source; - this->net_target = net_target; - artif_source_target = false; - max_graph_flow = 0.0; - set_vars_executed = true; -} - -int maxflow_ff::check(GTL::graph& G) -{ - if (!set_vars_executed) - return(GTL_ERROR); - - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_capacity[*edge_it] < 0) - return(GTL_ERROR); - ++edge_it; - } - // G.is_acyclic may be false - if ((G.number_of_nodes() <= 1) || (!G.is_connected()) || (G.is_undirected())) - return(GTL_ERROR); - - if (artif_source_target) - { - bool source_found = false; - bool target_found = false; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (node_it->indeg() == 0) - source_found = true; - - if (node_it->outdeg() == 0) - target_found = true; - - ++node_it; - } - if (!(source_found && target_found)) - return(GTL_ERROR); - } - else - { - if (net_source == net_target) - return(GTL_ERROR); - } - return(GTL_OK); // ok -} - -int maxflow_ff::run(GTL::graph& G) -{ - // init - if (artif_source_target) - create_artif_source_target(G); - - prepare_run(G); - - node_map last_edge(G); - - while (get_sp(G, last_edge) == SP_FOUND) - { - comp_single_flow(G, last_edge); - } - - restore_graph(G); - return(GTL_OK); -} - -double maxflow_ff::get_max_flow(const edge& e) const -{ - return(edge_max_flow[e]); -} - -double maxflow_ff::get_max_flow() const -{ - return(max_graph_flow); -} - -double maxflow_ff::get_rem_cap(const edge& e) const -{ - return(edge_capacity[e] - edge_max_flow[e]); -} - -void maxflow_ff::reset() -{ -} - -void maxflow_ff::create_artif_source_target(GTL::graph& G) -{ - net_source = G.new_node(); - net_target = G.new_node(); - edge e; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (*node_it != net_source && node_it->indeg() == 0) - { - e = G.new_edge(net_source, *node_it); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::out_edges_iterator out_edge_it = node_it->out_edges_begin(); - node::out_edges_iterator out_edges_end = node_it->out_edges_end(); - while (out_edge_it != out_edges_end) - { - edge_capacity[e] += edge_capacity[*out_edge_it]; - ++out_edge_it; - } - } - if (*node_it != net_target && node_it->outdeg() == 0) - { - e = G.new_edge(*node_it, net_target); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::in_edges_iterator in_edge_it = node_it->in_edges_begin(); - node::in_edges_iterator in_edges_end = node_it->in_edges_end(); - while (in_edge_it != in_edges_end) - { - edge_capacity[e] += edge_capacity[*in_edge_it]; - ++in_edge_it; - } - } - ++node_it; - } -} - -void maxflow_ff::prepare_run(const graph& G) -{ - edge_max_flow.init(G, 0.0); - edge_org.init(G, true); - back_edge_exists.init(G, false); - max_graph_flow = 0.0; -} - -void maxflow_ff::comp_single_flow(GTL::graph& G, GTL::node_map& last_edge) -{ - double min_value = extra_charge(last_edge); - - node cur_node = net_target; - do - { - if (edge_org[last_edge[cur_node]]) // shortest path runs over a org. edge - { - if (!back_edge_exists[last_edge[cur_node]]) // create back edge - { - create_back_edge(G, last_edge[cur_node]); - } - edge_max_flow[last_edge[cur_node]] += min_value; - G.restore_edge(back_edge[last_edge[cur_node]]); - edge_capacity[back_edge[last_edge[cur_node]]] += min_value; - } - else // shortest path runs over a inserted back edge - { - edge oe = back_edge[last_edge[cur_node]]; - G.restore_edge(oe); - edge_max_flow[oe] -= min_value; - edge_capacity[last_edge[cur_node]] -= min_value; - } - if (edge_capacity[last_edge[cur_node]] <= edge_max_flow[last_edge[cur_node]]) - G.hide_edge(last_edge[cur_node]); - cur_node = last_edge[cur_node].source(); - } while (cur_node != net_source); -} - - -int maxflow_ff::get_sp(const graph& G, GTL::node_map& last_edge) -{ - std::queue next_nodes; - node_map visited(G, false); - next_nodes.push(net_source); - visited[net_source] = true; - - if (comp_sp(G, next_nodes, visited, last_edge) == SP_FOUND) - return(SP_FOUND); - else - return(NO_SP_FOUND); -} - -int maxflow_ff::comp_sp(const graph& /*G*/, std::queue& next_nodes, node_map& visited, GTL::node_map& last_edge) -{ - node cur_node; - - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - node next = out_edge_it->target(); - if (!visited[next]) - { - last_edge[next] = *out_edge_it; - if (next == net_target) - { - return(SP_FOUND); - } - else - { - next_nodes.push(next); - visited[next] = true; - } - } - ++out_edge_it; - } - } - return(NO_SP_FOUND); -} - -double maxflow_ff::extra_charge(const node_map& last_edge) const -{ - node cur_node = net_target; - double min_value = edge_capacity[last_edge[cur_node]] - edge_max_flow[last_edge[cur_node]]; - - do - { - double cur_capacity = edge_capacity[last_edge[cur_node]] - edge_max_flow[last_edge[cur_node]]; - - if (cur_capacity < min_value) - min_value = cur_capacity; - cur_node = last_edge[cur_node].source(); - } while (cur_node != net_source); - return(min_value); -} - -void maxflow_ff::create_back_edge(GTL::graph& G, const edge& org_edge) -{ - edge be = G.new_edge(org_edge.target(), org_edge.source()); - edge_org[be] = false; - edges_not_org.push_back(be); - back_edge[org_edge] = be; - back_edge[be] = org_edge; - edge_max_flow[be] = 0.0; - edge_capacity[be] = 0.0; - back_edge_exists[org_edge] = true; - back_edge_exists[be] = true; // a back edge always has a org. edge ;-) -} - -void maxflow_ff::comp_max_flow(const graph& /*G*/) -{ - max_graph_flow = 0.0; - - node::out_edges_iterator out_edge_it = net_source.out_edges_begin(); - node::out_edges_iterator out_edges_end = net_source.out_edges_end(); - while (out_edge_it != out_edges_end) - { - max_graph_flow += edge_max_flow[*out_edge_it]; - ++out_edge_it; - } -} - -void maxflow_ff::restore_graph(GTL::graph& G) -{ - G.restore_graph(); // hidden edges can not be deleted! - while (!edges_not_org.empty()) - { - G.del_edge(edges_not_org.front()); - edges_not_org.pop_front(); - } - comp_max_flow(G); - if (artif_source_target) - { - G.del_node(net_source); - G.del_node(net_target); - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/maxflow_pp.cpp b/src/Tracker/graph/GTL/src/maxflow_pp.cpp deleted file mode 100644 index 80ca81bf6..000000000 --- a/src/Tracker/graph/GTL/src/maxflow_pp.cpp +++ /dev/null @@ -1,669 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_pp.cpp -// -//========================================================================== -// $Id: maxflow_pp.cpp,v 1.7 2001/11/07 13:58:10 pick Exp $ - -#include - -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -maxflow_pp::maxflow_pp() -{ - max_graph_flow = 0.0; - set_vars_executed = false; -} - -maxflow_pp::~maxflow_pp() -{ -} - -void maxflow_pp::set_vars(const edge_map& edge_capacity) -{ - this->edge_capacity = edge_capacity; - artif_source_target = true; - max_graph_flow = 0.0; - set_vars_executed = true; -} - -void maxflow_pp::set_vars(const edge_map& edge_capacity, const node& net_source, const node& net_target) -{ - this->edge_capacity = edge_capacity; - this->net_source = net_source; - this->net_target = net_target; - artif_source_target = false; - max_graph_flow = 0.0; - set_vars_executed = true; -} - -int maxflow_pp::check(GTL::graph& G) -{ - if (!set_vars_executed) - return(GTL_ERROR); - - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_capacity[*edge_it] < 0) - return(GTL_ERROR); - - ++edge_it; - } - // G.is_acyclic may be false - if ((G.number_of_nodes() <= 1) || (!G.is_connected()) || (G.is_undirected())) - return(GTL_ERROR); - - if (artif_source_target) - { - bool source_found = false; - bool target_found = false; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (node_it->indeg() == 0) - source_found = true; - - if (node_it->outdeg() == 0) - target_found = true; - - ++node_it; - } - if (!(source_found && target_found)) - return(GTL_ERROR); - } - else - { - if (net_source == net_target) - return(GTL_ERROR); - } - return(GTL_OK); // ok -} - -int maxflow_pp::run(GTL::graph& G) -{ - // init - if (artif_source_target) - create_artif_source_target(G); - - prepare_run(G); - - double flow_value = 0; - node min_tp_node; - while (leveling(G) == TARGET_FROM_SOURCE_REACHABLE) - { - hide_unreachable_nodes(G); - min_throughput_node(G, min_tp_node, flow_value); - push(G, min_tp_node, flow_value); - pull(G, min_tp_node, flow_value); - comp_rem_net(G); - } - - restore_graph(G); - return(GTL_OK); -} - -double maxflow_pp::get_max_flow(const edge& e) const -{ - return(edge_max_flow[e]); -} - -double maxflow_pp::get_max_flow() const -{ - return(max_graph_flow); -} - -double maxflow_pp::get_rem_cap(const edge& e) const -{ - return(edge_capacity[e] - edge_max_flow[e]); -} - -void maxflow_pp::reset() -{ -} - -void maxflow_pp::create_artif_source_target(GTL::graph& G) -{ - net_source = G.new_node(); - net_target = G.new_node(); - edge e; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (*node_it != net_source && node_it->indeg() == 0) - { - e = G.new_edge(net_source, *node_it); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::out_edges_iterator out_edge_it = node_it->out_edges_begin(); - node::out_edges_iterator out_edges_end = node_it->out_edges_end(); - while (out_edge_it != out_edges_end) - { - edge_capacity[e] += edge_capacity[*out_edge_it]; - ++out_edge_it; - } - } - if (*node_it != net_target && node_it->outdeg() == 0) - { - e = G.new_edge(*node_it, net_target); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::in_edges_iterator in_edge_it = node_it->in_edges_begin(); - node::in_edges_iterator in_edges_end = node_it->in_edges_end(); - while (in_edge_it != in_edges_end) - { - edge_capacity[e] += edge_capacity[*in_edge_it]; - ++in_edge_it; - } - } - ++node_it; - } -} - -void maxflow_pp::prepare_run(const graph& G) -{ - flow_update.init(G, 0.0); - edge_max_flow.init(G, 0.0); - edge_org.init(G, true); - back_edge_exists.init(G, false); - max_graph_flow = 0.0; - full_edges.clear(); - temp_unvisible_nodes.clear(); - temp_unvisible_edges.clear(); -} - -int maxflow_pp::leveling(GTL::graph& G) -{ - bool source_target_con = false; - node_map level(G, -1); // -1 means no level yet! - std::queue next_nodes; - next_nodes.push(net_source); - level[net_source] = 0; - node cur_node; - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - if (level[out_edge_it->target()] == -1) - { - if (out_edge_it->target() == net_target) - source_target_con = true; - - level[out_edge_it->target()] = level[cur_node] + 1; - next_nodes.push(out_edge_it->target()); - ++out_edge_it; - } - else if (level[out_edge_it->target()] <= level[cur_node]) - { - node::out_edges_iterator temp_it = out_edge_it; - ++out_edge_it; - temp_unvisible_edges.push_back(*temp_it); - G.hide_edge(*temp_it); - } - else - { - ++out_edge_it; - } - } - } - if (source_target_con) - return(TARGET_FROM_SOURCE_REACHABLE); - else - return(TARGET_FROM_SOURCE_NOT_REACHABLE); -} - -void maxflow_pp::hide_unreachable_nodes(GTL::graph& G) -{ - node_map reachable_from_net_source(G, false); - node_map reachable_from_net_target(G, false); - std::queue next_nodes; - node cur_node; - - next_nodes.push(net_source); - reachable_from_net_source[net_source] = true; - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - node next = out_edge_it->target(); - if (!reachable_from_net_source[next]) - { - next_nodes.push(next); - reachable_from_net_source[next] = true; - } - ++out_edge_it; - } - } - - next_nodes.push(net_target); - reachable_from_net_target[net_target] = true; - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - node::in_edges_iterator in_edge_it = cur_node.in_edges_begin(); - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - while (in_edge_it != in_edges_end) - { - node next = in_edge_it->source(); - if (!reachable_from_net_target[next]) - { - next_nodes.push(next); - reachable_from_net_target[next] = true; - } - ++in_edge_it; - } - } - - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if ((!reachable_from_net_source[*node_it]) || - (!reachable_from_net_target[*node_it])) - { - graph::node_iterator temp_it = node_it; - ++node_it; - temp_unvisible_nodes.push_back(*temp_it); - store_temp_unvisible_edges(*temp_it); - G.hide_node(*temp_it); - } - else - { - ++node_it; - } - } -} - -void maxflow_pp::store_temp_unvisible_edges(const node& cur_node) -{ - node::in_edges_iterator in_it = cur_node.in_edges_begin(); - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - while (in_it != in_edges_end) - { - temp_unvisible_edges.push_back(*in_it); - ++in_it; - } - node::out_edges_iterator out_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_it != out_edges_end) - { - temp_unvisible_edges.push_back(*out_it); - ++out_it; - } -} - -void maxflow_pp::min_throughput_node(const graph& G, GTL::node& min_tp_node, - double& flow_value) -{ - min_tp_node = net_source; - flow_value = comp_min_throughput(min_tp_node); - - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - double cur_tp; - while (node_it != nodes_end) - { - cur_tp = comp_min_throughput(*node_it); - if (cur_tp < flow_value) - { - min_tp_node = *node_it; - flow_value = cur_tp; - } - ++node_it; - } -} - -double maxflow_pp::comp_min_throughput(const node cur_node) const -{ - double in_flow = 0.0; - double out_flow = 0.0; - node::in_edges_iterator in_it = cur_node.in_edges_begin(); - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - while (in_it != in_edges_end) - { - in_flow += edge_capacity[*in_it] - edge_max_flow[*in_it]; - ++in_it; - } - node::out_edges_iterator out_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_it != out_edges_end) - { - out_flow += edge_capacity[*out_it] - edge_max_flow[*out_it]; - ++out_it; - } - if (cur_node == net_source) - return(out_flow); - - if (cur_node == net_target) - return(in_flow); - - return(in_flow < out_flow ? in_flow : out_flow); -} - -void maxflow_pp::get_sp_ahead(const graph& G, const node& start_node, node_map& last_edge) -{ - std::queue next_nodes; - node_map visited(G, false); - next_nodes.push(start_node); - visited[start_node] = true; - - node cur_node; - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - node next = out_edge_it->target(); - if (!visited[next]) - { - last_edge[next] = *out_edge_it; - if (next == net_target) - return; // sp found - - next_nodes.push(next); - visited[next] = true; - } - ++out_edge_it; - } - } -} - -void maxflow_pp::get_sp_backwards(const graph& G, const node& start_node, node_map& prev_edge) -{ - std::queue next_nodes; - node_map visited(G, false); - next_nodes.push(start_node); - visited[start_node] = true; - - node cur_node; - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - - node::in_edges_iterator in_edge_it = cur_node.in_edges_begin(); - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - while (in_edge_it != in_edges_end) - { - node next = in_edge_it->source(); - if (!visited[next]) - { - prev_edge[next] = *in_edge_it; - if (next == net_source) - return; // sp found - - next_nodes.push(next); - visited[next] = true; - } - ++in_edge_it; - } - } -} - -void maxflow_pp::push(GTL::graph& G, const node& start_node, const double flow_value) -{ - node_map last_edge; - double cur_flow = flow_value; - double min_value = 0.0; - - if (start_node == net_target) - return; // no push necessary - - do - { - get_sp_ahead(G, start_node, last_edge); - min_value = extra_charge_ahead(start_node, last_edge); - if (min_value > cur_flow) - min_value = cur_flow; - - GTL::node cur_node = net_target; - do - { - if (edge_org[last_edge[cur_node]]) - { - edge_max_flow[last_edge[cur_node]] += min_value; - if (back_edge_exists[last_edge[cur_node]]) - flow_update[back_edge[last_edge[cur_node]]] += min_value; - - } - else - { - edge_capacity[last_edge[cur_node]] -= min_value; - flow_update[back_edge[last_edge[cur_node]]] += min_value; - } - if (edge_capacity[last_edge[cur_node]] <= - edge_max_flow[last_edge[cur_node]]) - { - full_edges.push_back(last_edge[cur_node]); - G.hide_edge(last_edge[cur_node]); - } - cur_node = last_edge[cur_node].source(); - } while (cur_node != start_node); - cur_flow -= min_value; - if (cur_flow < 1e-015) // quite hacky ;-) - cur_flow = 0.0; // to avoid rounding errors - - } while (cur_flow > 0.0); -} - -void maxflow_pp::pull(GTL::graph& G, const node& start_node, const double flow_value) -{ - node_map prev_edge; - double cur_flow = flow_value; - double min_value = 0.0; - - if (start_node == net_source) - return; // pull not necessary - - do - { - get_sp_backwards(G, start_node, prev_edge); - min_value = extra_charge_backwards(start_node, prev_edge); - if (min_value > cur_flow) - min_value = cur_flow; - - GTL::node cur_node = net_source; - do - { - if (edge_org[prev_edge[cur_node]]) - { - edge_max_flow[prev_edge[cur_node]] += min_value; - if (back_edge_exists[prev_edge[cur_node]]) - flow_update[back_edge[prev_edge[cur_node]]] += min_value; - } - else - { - edge_capacity[prev_edge[cur_node]] -= min_value; - flow_update[back_edge[prev_edge[cur_node]]] += min_value; - } - if (edge_capacity[prev_edge[cur_node]] <= - edge_max_flow[prev_edge[cur_node]]) - { - full_edges.push_back(prev_edge[cur_node]); - G.hide_edge(prev_edge[cur_node]); - } - cur_node = prev_edge[cur_node].target(); - } while (cur_node != start_node); - cur_flow -= min_value; - if (cur_flow < 1e-015) // quite hacky ;-) - cur_flow = 0.0; // to avoid rounding errors - } while (cur_flow > 0.0); -} - -void maxflow_pp::comp_rem_net(GTL::graph& G) -{ - // update back_edges - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - single_edge_update(G, *edge_it); - ++edge_it; - } - edges_t::iterator list_it = full_edges.begin(); - edges_t::iterator list_end = full_edges.end(); - while (list_it != list_end) - { - G.restore_edge(*list_it); - if (flow_update[*list_it] > 0.0) - { - single_edge_update(G, *list_it); - edges_t::iterator temp_it = list_it; - ++list_it; - full_edges.erase(temp_it); // now it's visible again - } - else - { - if (!back_edge_exists[*list_it]) - { - create_back_edge(G, *list_it); - edge_capacity[back_edge[*list_it]] = edge_max_flow[*list_it]; - } - G.hide_edge(*list_it); - ++list_it; - } - } - - // make hidden levels visible again - nodes_t::iterator temp_un_node_it = temp_unvisible_nodes.begin(); - nodes_t::iterator temp_un_nodes_end = temp_unvisible_nodes.end(); - while (temp_un_node_it != temp_un_nodes_end) - { - G.restore_node(*temp_un_node_it); - ++temp_un_node_it; - } - edges_t::iterator temp_un_edge_it = temp_unvisible_edges.begin(); - edges_t::iterator temp_un_edges_end = temp_unvisible_edges.end(); - while (temp_un_edge_it != temp_un_edges_end) - { - G.restore_edge(*temp_un_edge_it); - if (flow_update[*temp_un_edge_it] > 0.0) - single_edge_update(G, *temp_un_edge_it); - - ++temp_un_edge_it; - } - temp_unvisible_nodes.clear(); - temp_unvisible_edges.clear(); -} - -void maxflow_pp::single_edge_update(GTL::graph& G, GTL::edge cur_edge) -{ - if (edge_org[cur_edge]) - { - edge_max_flow[cur_edge] -= flow_update[cur_edge]; - flow_update[cur_edge] = 0.0; - if (!back_edge_exists[cur_edge]) - { - if (edge_max_flow[cur_edge] > 0.0) - { - create_back_edge(G, cur_edge); - edge_capacity[back_edge[cur_edge]] = edge_max_flow[cur_edge]; - } - } - } - else - { - edge_capacity[cur_edge] += flow_update[cur_edge]; - flow_update[cur_edge] = 0.0; - } -} - -double maxflow_pp::extra_charge_ahead(const node& start_node, const node_map& last_edge) const -{ - node cur_node = net_target; - double min_value = edge_capacity[last_edge[cur_node]] - - edge_max_flow[last_edge[cur_node]]; - double cur_capacity; - - do - { - cur_capacity = edge_capacity[last_edge[cur_node]] - - edge_max_flow[last_edge[cur_node]]; - if (cur_capacity < min_value) min_value = cur_capacity; - cur_node = last_edge[cur_node].source(); - } while (cur_node != start_node); - return(min_value); -} - -double maxflow_pp::extra_charge_backwards(const node& start_node, const node_map& prev_edge) const -{ - node cur_node = net_source; - double min_value = edge_capacity[prev_edge[cur_node]] - edge_max_flow[prev_edge[cur_node]]; - - do - { - double cur_capacity = edge_capacity[prev_edge[cur_node]] - - edge_max_flow[prev_edge[cur_node]]; - if (cur_capacity < min_value) min_value = cur_capacity; - cur_node = prev_edge[cur_node].target(); - } while (cur_node != start_node); - return(min_value); -} - -void maxflow_pp::create_back_edge(GTL::graph& G, const edge& org_edge) -{ - edge be = G.new_edge(org_edge.target(), org_edge.source()); - edge_org[be] = false; - edges_not_org.push_back(be); - back_edge[org_edge] = be; - back_edge[be] = org_edge; - edge_max_flow[be] = 0.0; - edge_capacity[be] = 0.0; - back_edge_exists[org_edge] = true; - back_edge_exists[be] = true; // a back edge always has a org. edge ;-) - flow_update[be] = 0.0; -} - -void maxflow_pp::comp_max_flow(const graph& /*G*/) -{ - max_graph_flow = 0.0; - - node::out_edges_iterator out_edge_it = net_source.out_edges_begin(); - node::out_edges_iterator out_edges_end = net_source.out_edges_end(); - while (out_edge_it != out_edges_end) - { - max_graph_flow += edge_max_flow[*out_edge_it]; - ++out_edge_it; - } -} - -void maxflow_pp::restore_graph(GTL::graph& G) -{ - G.restore_graph(); - while (!edges_not_org.empty()) - { - G.del_edge(edges_not_org.front()); - edges_not_org.pop_front(); - } - comp_max_flow(G); - if (artif_source_target) - { - G.del_node(net_source); - G.del_node(net_target); - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/maxflow_sap.cpp b/src/Tracker/graph/GTL/src/maxflow_sap.cpp deleted file mode 100644 index d8a6a8165..000000000 --- a/src/Tracker/graph/GTL/src/maxflow_sap.cpp +++ /dev/null @@ -1,435 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// maxflow_sap.cpp -// -//========================================================================== -// $Id: maxflow_sap.cpp,v 1.6 2001/11/07 13:58:10 pick Exp $ - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -maxflow_sap::maxflow_sap() -{ - max_graph_flow = 0.0; - set_vars_executed = false; -} - - -maxflow_sap::~maxflow_sap() -{ -} - - -void maxflow_sap::set_vars(const edge_map& edge_capacity) -{ - this->edge_capacity = edge_capacity; - artif_source_target = true; - max_graph_flow = 0.0; - set_vars_executed = true; -} - - -void maxflow_sap::set_vars(const edge_map& edge_capacity, - const node& net_source, const node& net_target) -{ - this->edge_capacity = edge_capacity; - this->net_source = net_source; - this->net_target = net_target; - artif_source_target = false; - max_graph_flow = 0.0; - set_vars_executed = true; -} - - -int maxflow_sap::check(GTL::graph& G) -{ - if (!set_vars_executed) - { - return(GTL_ERROR); - } - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_capacity[*edge_it] < 0) - { - return(GTL_ERROR); - } - ++edge_it; - } - // G.is_acyclic may be false - if ((G.number_of_nodes() <= 1) || (!G.is_connected()) - || (G.is_undirected())) - { - return(GTL_ERROR); - } - if (artif_source_target) - { - bool source_found = false; - bool target_found = false; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (node_it->indeg() == 0) - { - source_found = true; - } - if (node_it->outdeg() == 0) - { - target_found = true; - } - ++node_it; - } - if (!(source_found && target_found)) - { - return(GTL_ERROR); - } - } - else - { - if (net_source == net_target) - return(GTL_ERROR); - } - return(GTL_OK); // everything ok -} - - -int maxflow_sap::run(GTL::graph& G) -{ - // init - if (artif_source_target) - { - create_artif_source_target(G); - } - bool go_on = true; - node_map last_edge(G); - node cur_node; - int number_of_nodes = G.number_of_nodes(); - std::vector numb(number_of_nodes, 0); - prepare_run(G); - - comp_dist_labels(G, numb); - cur_node = net_source; - - while (go_on) - { - if (has_an_admissible_arc(cur_node)) - { - advance(cur_node, last_edge); - if (cur_node == net_target) - { - augment(G, last_edge); - cur_node = net_source; - } - } - else // only inadmissible edges - { - go_on = retreat(number_of_nodes, cur_node, last_edge, numb); - } - } - - restore_graph(G); - return(GTL_OK); -} - - -double maxflow_sap::get_max_flow(const edge& e) const -{ - return(edge_max_flow[e]); -} - - -double maxflow_sap::get_max_flow() const -{ - return(max_graph_flow); -} - - -double maxflow_sap::get_rem_cap(const edge& e) const -{ - return(edge_capacity[e] - edge_max_flow[e]); -} - - -void maxflow_sap::reset() -{ - max_graph_flow = 0.0; - set_vars_executed = false; -} - - -void maxflow_sap::create_artif_source_target(GTL::graph& G) -{ - net_source = G.new_node(); - net_target = G.new_node(); - edge e; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (*node_it != net_source && node_it->indeg() == 0) - { - e = G.new_edge(net_source, *node_it); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::out_edges_iterator out_edge_it = - node_it->out_edges_begin(); - node::out_edges_iterator out_edges_end = - node_it->out_edges_end(); - while (out_edge_it != out_edges_end) - { - edge_capacity[e] += edge_capacity[*out_edge_it]; - ++out_edge_it; - } - } - if (*node_it != net_target && node_it->outdeg() == 0) - { - e = G.new_edge(*node_it, net_target); - edge_capacity[e] = 1.0; // 1.0 prevents e from hiding - node::in_edges_iterator in_edge_it = - node_it->in_edges_begin(); - node::in_edges_iterator in_edges_end = - node_it->in_edges_end(); - while (in_edge_it != in_edges_end) - { - edge_capacity[e] += edge_capacity[*in_edge_it]; - ++in_edge_it; - } - } - ++node_it; - } -} - - -void maxflow_sap::prepare_run(const graph& G) -{ - edge_max_flow.init(G, 0.0); - edge_org.init(G, true); - back_edge_exists.init(G, false); - max_graph_flow = 0.0; -} - - -void maxflow_sap::comp_dist_labels(const graph& G, std::vector& numb) -{ - std::queue next_nodes; - node_map visited(G, false); - next_nodes.push(net_target); - visited[net_target] = true; - dist_label[net_target] = 0; - numb[0] = 1; // only one sink - node cur_node; - - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - node::in_edges_iterator in_edge_it = cur_node.in_edges_begin(); - node::in_edges_iterator in_edges_end = cur_node.in_edges_end(); - while (in_edge_it != in_edges_end) - { - node next = in_edge_it->source(); - if (!visited[next]) - { - next_nodes.push(next); - visited[next] = true; - dist_label[next] = dist_label[cur_node] + 1; - ++numb[dist_label[next]]; - } - ++in_edge_it; - } - } -} - - -bool maxflow_sap::has_an_admissible_arc(const node cur_node) -{ - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - if (dist_label[cur_node] == dist_label[out_edge_it->target()] + 1) - { - return true; - } - ++out_edge_it; - } - return false; -} - - -void maxflow_sap::advance(GTL::node& cur_node, GTL::node_map& last_edge) -{ - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - if (dist_label[cur_node] == dist_label[out_edge_it->target()] + 1) - { - last_edge[out_edge_it->target()] = *out_edge_it; - cur_node = out_edge_it->target(); - } - ++out_edge_it; - } -} - - -void maxflow_sap::augment(GTL::graph& G, const node_map& last_edge) -{ - double additional_flow = free_capacity(last_edge); - node cur_node = net_target; - - do - { - if (edge_org[last_edge[cur_node]]) - // shortest path runs over a org. edge - { - if (!back_edge_exists[last_edge[cur_node]]) // create back edge - { - create_back_edge(G, last_edge[cur_node]); - } - edge_max_flow[last_edge[cur_node]] += additional_flow; - G.restore_edge(back_edge[last_edge[cur_node]]); - edge_capacity[back_edge[last_edge[cur_node]]] += - additional_flow; - } - else // shortest path runs over a inserted back edge - { - edge oe = back_edge[last_edge[cur_node]]; - G.restore_edge(oe); - edge_max_flow[oe] -= additional_flow; - edge_capacity[last_edge[cur_node]] -= additional_flow; - } - if (edge_capacity[last_edge[cur_node]] <= - edge_max_flow[last_edge[cur_node]]) - { - G.hide_edge(last_edge[cur_node]); - } - cur_node = last_edge[cur_node].source(); - } - while (cur_node != net_source); -} - - -bool maxflow_sap::retreat(const int number_of_nodes, - node& cur_node, - const node_map& last_edge, - std::vector& numb) -{ - --numb[dist_label[cur_node]]; - if (numb[dist_label[cur_node]] == 0) - { - return false; - } - else - { - dist_label[cur_node] = - min_neighbour_label(number_of_nodes, cur_node) + 1; - ++numb[dist_label[cur_node]]; - if (cur_node != net_source) - { - cur_node = last_edge[cur_node].source(); - } - return true; - } -} - - -int maxflow_sap::min_neighbour_label(const int number_of_nodes, - const node cur_node) const -{ - int min_value = number_of_nodes; // if no out edge exists - - node::out_edges_iterator out_edge_it = cur_node.out_edges_begin(); - node::out_edges_iterator out_edges_end = cur_node.out_edges_end(); - while (out_edge_it != out_edges_end) - { - if (min_value > dist_label[out_edge_it->target()]) - { - min_value = dist_label[out_edge_it->target()]; - } - ++out_edge_it; - } - return min_value; -} - - -double maxflow_sap::free_capacity(const node_map& last_edge) const -{ - node cur_node = net_target; - double min_value = - edge_capacity[last_edge[cur_node]] - - edge_max_flow[last_edge[cur_node]]; - double cur_capacity; - - do - { - cur_capacity = - edge_capacity[last_edge[cur_node]] - - edge_max_flow[last_edge[cur_node]]; - - if (cur_capacity < min_value) - { - min_value = cur_capacity; - } - cur_node = last_edge[cur_node].source(); - } - while (cur_node != net_source); - return(min_value); -} - - -void maxflow_sap::create_back_edge(GTL::graph& G, const edge& org_edge) -{ - edge be = G.new_edge(org_edge.target(), org_edge.source()); - edge_org[be] = false; - edges_not_org.push_back(be); - back_edge[org_edge] = be; - back_edge[be] = org_edge; - edge_max_flow[be] = 0.0; - edge_capacity[be] = 0.0; - back_edge_exists[org_edge] = true; - back_edge_exists[be] = true; // a back edge always has a org. edge -} - - -void maxflow_sap::comp_max_flow(const graph& /*G*/) -{ - max_graph_flow = 0.0; - - node::out_edges_iterator out_edge_it = net_source.out_edges_begin(); - node::out_edges_iterator out_edges_end = net_source.out_edges_end(); - while (out_edge_it != out_edges_end) - { - max_graph_flow += edge_max_flow[*out_edge_it]; - ++out_edge_it; - } -} - - -void maxflow_sap::restore_graph(GTL::graph& G) -{ - G.restore_graph(); // hidden edges can not be deleted! - while (!edges_not_org.empty()) - { - G.del_edge(edges_not_org.front()); - edges_not_org.pop_front(); - } - comp_max_flow(G); - if (artif_source_target) - { - G.del_node(net_source); - G.del_node(net_target); - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/min_tree.cpp b/src/Tracker/graph/GTL/src/min_tree.cpp deleted file mode 100644 index 9ff3c1b50..000000000 --- a/src/Tracker/graph/GTL/src/min_tree.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// min_tree.cpp -// -//========================================================================== -// $Id: min_tree.cpp,v 1.4 2001/11/07 13:58:10 pick Exp $ - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -min_tree::min_tree() { - is_set_distances = false; - weight = 0; -} - -int min_tree::check(GTL::graph& g) { - if (g.is_directed()) return GTL_ERROR; - else if (g.number_of_nodes() < 2) return GTL_ERROR; - else if (!g.is_connected()) return GTL_ERROR; - else if (!is_set_distances) return GTL_ERROR; - else return GTL_OK; -} - -void min_tree::set_distances(const edge_map& dist) { - this->dist = dist; - is_set_distances = true; -} - -std::set min_tree::get_min_tree() { - return this->tree; -} - -int min_tree::get_min_tree_length() { - int sum; - std::set::iterator tree_it; - - sum = 0; - - for (tree_it = tree.begin(); tree_it != tree.end(); tree_it++) - sum += dist[*tree_it]; - - return sum; -} - -int min_tree::run(GTL::graph& g) { - std::priority_queue , input_comp> node_distances; - node::adj_edges_iterator adj_it, adj_end; - std::set tree_nodes; - std::set::iterator tree_it; - edge curr; - node new_node; - graph::edge_iterator edge_it, edges_end; - unsigned int number_of_nodes; - int min_dist; - - - // making out the start edge - - edge_it = g.edges_begin(); - edges_end = g.edges_end(); - - curr = *edge_it; - min_dist = dist[*edge_it]; - - for (; edge_it != edges_end; edge_it++) { - if (dist[*edge_it] < min_dist) { - curr = *edge_it; - min_dist = dist[*edge_it]; - } - } - - tree.insert(curr); - - tree_nodes.insert(curr.source()); - tree_nodes.insert(curr.target()); - - - for (tree_it = tree_nodes.begin(); tree_it != tree_nodes.end(); tree_it++) { - adj_it = tree_it->adj_edges_begin(); - adj_end = tree_it->adj_edges_end(); - - for (; adj_it != adj_end; adj_it++) { - node_distances.push(TSP_A_VALUE(dist[*adj_it], adj_it)); - } - } - - // create the min_tree - - number_of_nodes = g.number_of_nodes(); - - while (tree.size() < number_of_nodes - 1) { - curr = *((node_distances.top()).second); - - node_distances.pop(); - - if (tree_nodes.find(curr.source()) != tree_nodes.end() && - tree_nodes.find(curr.target()) != tree_nodes.end()) { - } - else { - tree.insert(curr); - weight += dist[curr]; - - if (tree_nodes.find(curr.source()) != tree_nodes.end()) { - new_node = curr.target(); - } - else { - new_node = curr.source(); - } - - tree_nodes.insert(new_node); - - adj_it = new_node.adj_edges_begin(); - adj_end = new_node.adj_edges_end(); - - for (; adj_it != adj_end; adj_it++) { - node_distances.push(TSP_A_VALUE(dist[*adj_it], adj_it)); - } - } - } - return GTL_OK; -} - -void min_tree::reset() -{ - tree.erase(tree.begin(), tree.end()); - weight = 0; -} - -__GTL_END_NAMESPACE diff --git a/src/Tracker/graph/GTL/src/node.cpp b/src/Tracker/graph/GTL/src/node.cpp deleted file mode 100644 index 5860ec204..000000000 --- a/src/Tracker/graph/GTL/src/node.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// node.cpp -// -//========================================================================== -// $Id: node.cpp,v 1.18 2001/11/07 13:58:10 pick Exp $ - -#include -#include -#include -#include - -#include -#include - -__GTL_BEGIN_NAMESPACE - -node::node() : - data(0) -{ -} - -GTL_EXTERN std::ostream& operator<< (std::ostream& os, const node& n) { - if (n != node()) { - return os << "[" << n.id() << "]"; - } else { - return os << "[ UNDEF ]"; - } -} - -node::adj_nodes_iterator node::adj_nodes_begin() const -{ - return node::adj_nodes_iterator(*this, true); -} - -node::adj_nodes_iterator node::adj_nodes_end() const -{ - return node::adj_nodes_iterator(*this, false); -} - -node::adj_edges_iterator node::adj_edges_begin() const -{ - return node::adj_edges_iterator(*this, true); -} - -node::adj_edges_iterator node::adj_edges_end() const -{ - return node::adj_edges_iterator(*this, false); -} - -node::inout_edges_iterator node::inout_edges_begin() const -{ - return node::inout_edges_iterator(*this, true); -} - -node::inout_edges_iterator node::inout_edges_end() const -{ - return node::inout_edges_iterator(*this, false); -} - -node::in_edges_iterator node::in_edges_begin() const -{ - return data->edges[0].begin(); -} - -node::in_edges_iterator node::in_edges_end() const -{ - return data->edges[0].end(); -} - -node::out_edges_iterator node::out_edges_begin() const -{ - return data->edges[1].begin(); -} - -node::out_edges_iterator node::out_edges_end() const -{ - return data->edges[1].end(); -} - -int node::degree() const -{ - return outdeg() + indeg(); -} - -int node::outdeg() const -{ - return data->edges[1].size(); -} - -int node::indeg() const -{ - return data->edges[0].size(); -} - -int node::id() const -{ - return data->id; -} - -bool node::is_directed() const -{ - return data->owner->is_directed(); -} - -bool node::is_undirected() const -{ - return data->owner->is_undirected(); -} - -const node& node::opposite(GTL::edge e) const -{ - // not implemented for hypergraphs - assert(e.data); - - node& s = *(e.data->nodes[0].begin()); - if (*this == s) - return *(e.data->nodes[1].begin()); - else - return s; -} - -nodes_t node::opposites(GTL::edge) const -{ - // not implemented yet - return nodes_t(); // to avoid compiler warnings -} - -bool node::is_hidden () const -{ - return data->hidden; -} - -int node::excentricity() const -{ - bfs b; - b.start_node(*this); - b.calc_level(true); - b.run(*data->owner); - - node last_node = *(--b.end()); - - return b.level(last_node); -} - -GTL_EXTERN bool operator==(GTL::node v1, GTL::node v2) -{ - return v1.data == v2.data; -} - -GTL_EXTERN bool operator!=(GTL::node v1, GTL::node v2) -{ - return v1.data != v2.data; -} - -GTL_EXTERN bool operator<(GTL::node v1, GTL::node v2) -{ - return v1.data < v2.data; -} - -//-------------------------------------------------------------------------- -// adj_edges_iterator -//-------------------------------------------------------------------------- - -node::adj_edges_iterator::adj_edges_iterator() -{ -} - -node::adj_edges_iterator::adj_edges_iterator(GTL::node n, bool start) -{ - // iterators that are used everytime - last_edge[0] = n.out_edges_end(); - last_edge[1] = n.in_edges_end(); - directed = n.is_directed(); - if (!directed) - { - begin_edge[0] = n.out_edges_begin(); - begin_edge[1] = n.in_edges_begin(); - } - - // set at start or end - if (start) - { - inout = 0; - akt_edge[0] = n.out_edges_begin(); - if (!directed) - { - akt_edge[1] = n.in_edges_begin(); - if (akt_edge[0] == last_edge[0]) - inout = 1; - } - } - else - { - inout = directed ? 0 : 1; - akt_edge[0] = n.out_edges_end(); - if (!directed) - akt_edge[1] = n.in_edges_end(); - } -} - -bool node::adj_edges_iterator::operator==(const - node::adj_edges_iterator& i) const -{ - return i.akt_edge[i.inout] == akt_edge[inout]; -} - -bool node::adj_edges_iterator::operator!=(const - node::adj_edges_iterator& i) const -{ - return i.akt_edge[i.inout] != akt_edge[inout]; -} - -node::adj_edges_iterator& node::adj_edges_iterator::operator++() -{ - if (directed) - ++akt_edge[inout]; - else - { - if (inout == 0) - { - ++akt_edge[0]; - if (akt_edge[0] == last_edge[0]) - ++inout; - } - else // inout == 1 - { - if (akt_edge[1] == last_edge[1]) - { - inout = 0; - akt_edge[0] = begin_edge[0]; - akt_edge[1] = begin_edge[1]; - if (begin_edge[0] == last_edge[0]) - inout = 1; - } - else - ++akt_edge[inout]; - } - } - return *this; -} - -node::adj_edges_iterator node::adj_edges_iterator::operator++(int) -{ - node::adj_edges_iterator tmp = *this; - operator++(); - return tmp; -} - -node::adj_edges_iterator& node::adj_edges_iterator::operator--() -{ - if (!directed && inout == 1 && akt_edge[1] == begin_edge[1]) - inout = 0; - --akt_edge[inout]; - return *this; -} - -node::adj_edges_iterator node::adj_edges_iterator::operator--(int) -{ - node::adj_edges_iterator tmp = *this; - operator--(); - return tmp; -} - -const edge& node::adj_edges_iterator::operator*() const -{ - return *akt_edge[inout]; -} - -const edge* node::adj_edges_iterator::operator->() const -{ - return akt_edge[inout].operator->(); -} - -//-------------------------------------------------------------------------- -// inout_edges_iterator -//-------------------------------------------------------------------------- - -node::inout_edges_iterator::inout_edges_iterator() -{ -} - -node::inout_edges_iterator::inout_edges_iterator(GTL::node n, bool start) -{ - // iterators that are used everytime - last_edge = n.in_edges_end(); - begin_edge = n.out_edges_begin(); - - // set at start or end - if (start) - { - inout = 0; - akt_edge[0] = n.in_edges_begin(); - akt_edge[1] = n.out_edges_begin(); - if (akt_edge[0] == last_edge) - inout = 1; - } - else - { - inout = 1; - akt_edge[0] = n.in_edges_end(); - akt_edge[1] = n.out_edges_end(); - } -} - -bool node::inout_edges_iterator::operator==(const - node::inout_edges_iterator& i) const -{ - return i.akt_edge[i.inout] == akt_edge[inout]; -} - -bool node::inout_edges_iterator::operator!=(const - node::inout_edges_iterator& i) const -{ - return i.akt_edge[i.inout] != akt_edge[inout]; -} - -node::inout_edges_iterator& node::inout_edges_iterator::operator++() -{ - ++akt_edge[inout]; - if ((akt_edge[inout] == last_edge) && (inout==0)) - ++inout; - return *this; -} - -node::inout_edges_iterator node::inout_edges_iterator::operator++(int) -{ - node::inout_edges_iterator tmp = *this; - operator++(); - return tmp; -} - -node::inout_edges_iterator& node::inout_edges_iterator::operator--() -{ - if (inout == 1 && (akt_edge[1] == begin_edge)) - inout = 0; - --akt_edge[inout]; - return *this; -} - -node::inout_edges_iterator node::inout_edges_iterator::operator--(int) -{ - node::inout_edges_iterator tmp = *this; - operator--(); - return tmp; -} - -const edge& node::inout_edges_iterator::operator*() const -{ - return *akt_edge[inout]; -} - -const edge* node::inout_edges_iterator::operator->() const -{ - return akt_edge[inout].operator->(); -} - -//-------------------------------------------------------------------------- -// adj_nodes_iterator -//-------------------------------------------------------------------------- - -node::adj_nodes_iterator::adj_nodes_iterator() -{ -} - -node::adj_nodes_iterator::adj_nodes_iterator(const node& n, bool start) -{ - int_node = n; - if (start) - akt_edge = n.adj_edges_begin(); - else - akt_edge = n.adj_edges_end(); -} - -bool node::adj_nodes_iterator::operator==(const - node::adj_nodes_iterator& i) const -{ - return i.akt_edge == akt_edge; -} - -bool node::adj_nodes_iterator::operator!=(const - node::adj_nodes_iterator& i) const -{ - return i.akt_edge != akt_edge; -} - -node::adj_nodes_iterator& node::adj_nodes_iterator::operator++() -{ - ++akt_edge; - return *this; -} - -node::adj_nodes_iterator node::adj_nodes_iterator::operator++(int) -{ - node::adj_nodes_iterator tmp = *this; - operator++(); - return tmp; -} - -node::adj_nodes_iterator& node::adj_nodes_iterator::operator--() -{ - --akt_edge; - return *this; -} - -node::adj_nodes_iterator node::adj_nodes_iterator::operator--(int) -{ - node::adj_nodes_iterator tmp = *this; - operator--(); - return tmp; -} - -const node& node::adj_nodes_iterator::operator*() const -{ - return int_node.opposite(*akt_edge); -} - -const node* node::adj_nodes_iterator::operator->() const -{ - return &(int_node.opposite(*akt_edge)); -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/planarity.cpp b/src/Tracker/graph/GTL/src/planarity.cpp deleted file mode 100644 index 5e67d5943..000000000 --- a/src/Tracker/graph/GTL/src/planarity.cpp +++ /dev/null @@ -1,1672 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// planarity.cpp -// -//========================================================================== -// $Id: planarity.cpp,v 1.28 2008/02/03 18:12:07 chris Exp $ - -#include -#include -#include -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// Planarity Test -//-------------------------------------------------------------------------- - - -planarity::planarity() : - algorithm (), emp (false), kup (false), bip (true) -{ -#ifdef _DEBUG - GTL_debug::init_debug(); -#endif -} - -planarity::~planarity() -{ -#ifdef _DEBUG - GTL_debug::close_debug(); -#endif -} - - -int planarity::check (GTL::graph& /*G*/) -{ - return algorithm::GTL_OK; -} - -bool planarity::run_on_biconnected (GTL::graph& G, planar_embedding& em) -{ - - if (G.number_of_edges() == 0) return algorithm::GTL_OK; - - st_number st_; - - // - // The graph may have self loops. Make sure that we - // choose a normal edge for st. - // - - graph::edge_iterator - edge_it = G.edges_begin(), - edge_end = G.edges_end(); - - edge st; - - while (edge_it != edge_end) { - if (edge_it->source() != edge_it->target()) { - st = *edge_it; - break; - } - ++edge_it; - } - - // - // G has only selfloops - // - - if (st == edge()) { - if (emp) { - em.init (G); - edge_it = G.edges_begin(); - edge_end = G.edges_end(); - - for (;edge_it != edge_end; ++edge_it) { - em.self.push_back (*edge_it); - } - } - - return algorithm::GTL_OK; - } - - st_.st_edge (st); - st_.s_node (st.source()); - int res = st_.check(G); - assert (res == algorithm::GTL_OK); - res = st_.run(G); - assert (res == algorithm::GTL_OK); - int size = G.number_of_nodes(); - - if (emp) { - em.init (G); - } - - std::list neighbors; - st_number::iterator st_it = st_.begin(); - node curr = *st_it; - node::out_edges_iterator o_it = curr.out_edges_begin(); - node::out_edges_iterator o_end = curr.out_edges_end(); - node::in_edges_iterator i_it = curr.in_edges_begin(); - node::in_edges_iterator i_end = curr.in_edges_end(); - edges_t self_loops; - node opp; - node_map visited_from (G, 0); - pq_leaf* tmp_leaf; - std::vector< std::list > leaves(size); - - for (; o_it != o_end; ++o_it) { - opp = curr.opposite (*o_it); - - if (opp != curr) { - if (visited_from[opp] == st_[curr] && emp) { - em.multi.push_back (*o_it); - } else { - visited_from[opp] = st_[curr]; - tmp_leaf = new pq_leaf (st_[opp], st_[curr], *o_it, opp); - leaves[st_[opp]-1].push_back (tmp_leaf); - neighbors.push_back (tmp_leaf); - } - - } else if (emp) { - em.self.push_back (*o_it); - } - } - - for (; i_it != i_end; ++i_it) { - opp = curr.opposite (*i_it); - - if (opp != curr) { - if (visited_from[opp] == st_[curr] && emp) { - em.multi.push_back (*i_it); - } else { - visited_from[opp] = st_[curr]; - tmp_leaf = new pq_leaf (st_[opp], st_[curr], *i_it, opp); - leaves[st_[opp]-1].push_back (tmp_leaf); - neighbors.push_back (tmp_leaf); - } - } - } - - node_map > dirs; - - // - // There is a problem with node/edge maps of iterators with Visual C++ - // which I dont fully understand at the moment. Fortunatly the init for the - // maps below is only needed to allocate memory, which is done anyway, when - // values are assigned to it. - // - -#ifndef __GTL_MSVCC - dirs.init (G); -#endif - pq_tree PQ (st_[curr], curr, neighbors); - neighbors.erase (neighbors.begin(), neighbors.end()); - ++st_it; - curr = *st_it; - - while (st_[curr] < size) { - -#ifdef _DEBUG - char filename[10] = "out"; - char buffer[12]; -#ifdef __GTL_MSVCC - _snprintf (buffer, 12, "%s%d.gml", filename, st_[curr]); -#else - snprintf (buffer, 12, "%s%d.gml", filename, st_[curr]); -#endif - std::ofstream os(buffer, std::ios::out | std::ios::trunc); - os << PQ << std::endl; - os.close(); - bool ret_flag = PQ.integrity_check(); - assert(ret_flag); -#endif - - if (!PQ.reduce (leaves[st_[curr]-1])) { -#ifdef _DEBUG - os.open("fail.gml", std::ios::out | std::ios::trunc); - os << PQ << std::endl; - os.close (); -#endif - if (kup) { - examine_obstruction (G, st_, curr, - PQ.get_fail(), PQ.is_fail_root(), em, dirs, &PQ); - } - - PQ.reset(); - return false; - } - - - // - // It seems to be not very comfortable to use in and out iterators to - // go through the adjacency of a node. For graphs without selfloops this - // could be replaced by using adj_iterator, but if there are selfloops - // they will occur in both the list of outgoing and the one of incoming - // edges, and thus two times in the adjacency. - // - - o_it = curr.out_edges_begin(); - o_end = curr.out_edges_end(); - i_it = curr.in_edges_begin(); - i_end = curr.in_edges_end(); - - for (; o_it != o_end; ++o_it) { - opp = curr.opposite (*o_it); - - if (st_[opp] > st_[curr]) { - if (visited_from[opp] == st_[curr] && emp) { - em.multi.push_back (*o_it); - } else { - visited_from[opp] = st_[curr]; - tmp_leaf = new pq_leaf (st_[opp], st_[curr], *o_it, opp); - leaves[st_[opp]-1].push_back (tmp_leaf); - neighbors.push_back (tmp_leaf); - } - - } else if (st_[opp] == st_[curr] && emp) { - em.self.push_back (*o_it); - } - } - - for (; i_it != i_end; ++i_it) { - opp = curr.opposite (*i_it); - - if (st_[opp] > st_[curr]) { - if (visited_from[opp] == st_[curr] && emp) { - em.multi.push_back (*i_it); - } else { - visited_from[opp] = st_[curr]; - tmp_leaf = new pq_leaf (st_[opp], st_[curr], *i_it, opp); - leaves[st_[opp]-1].push_back (tmp_leaf); - neighbors.push_back (tmp_leaf); - } - } - } - - if (emp) { - PQ.replace_pert (st_[curr], curr, neighbors, &em, &(dirs[curr])); -#ifdef _DEBUG - GTL_debug::os() << "Embedding of " << st_[curr] << ":: "; - planar_embedding::iterator adit, adend; - for (adit = em.adj_edges_begin (curr), adend = em.adj_edges_end (curr); adit != adend; ++adit) { - GTL_debug::os() << "[" << st_[curr.opposite (*adit)] << "]"; - } - GTL_debug::os() << std::endl; - GTL_debug::os() << "Direction Indicators for: " << st_[curr] << ":: "; - std::list::iterator dit, dend; - - for (dit = dirs[curr].begin(), dend = dirs[curr].end(); dit != dend; ++dit) { - GTL_debug::os() << "["; - if (dit->direction) - GTL_debug::os() << ">> " << dit->id << " >>"; - else - GTL_debug::os() << "<< " << dit->id << " <<"; - GTL_debug::os() << "]"; - } - GTL_debug::os() << std::endl; -#endif - - } else { - PQ.replace_pert (st_[curr], curr, neighbors); - } - - PQ.reset(); - - - neighbors.erase (neighbors.begin(), neighbors.end()); - ++st_it; - curr = *st_it; - } - - if (emp) { - PQ.get_frontier (em, dirs[curr]); - - - // - // get self_loops for last node - // - - o_it = curr.out_edges_begin(); - o_end = curr.out_edges_end(); - - for (; o_it != o_end; ++o_it) { - if (o_it->target() == o_it->source()) { - em.self.push_back (*o_it); - } - } - - // - // some adjcacency list of the embedding obtained so far have to be - // turned. - // - - correct_embedding(em, st_, dirs); - - node_map mark; - mark.init (G, 0); - node_map::iterator > upward_begin; - upward_begin.init (G); - node tmp; - - forall_nodes (tmp, G) { - upward_begin[tmp] = em.adjacency(tmp).begin(); - } - - extend_embedding(curr, em, mark, upward_begin); - } - - return true; -} - - -int planarity::run (GTL::graph& G) -{ - bool directed = false; - - if (G.is_directed()) { - G.make_undirected(); - directed = true; - } - - biconnectivity biconn; - - if (bip) { - biconn.make_biconnected (true); - } else { - biconn.store_components (true); - } - - biconn.check (G); - biconn.run (G); - - if (emp) { - embedding.init (G); - } - - planar_embedding em; - - if (!biconn.is_biconnected() && !bip) { - biconnectivity::component_iterator c_it, c_end; - - for (c_it = biconn.components_begin(), c_end = biconn.components_end(); - c_it != c_end; ++c_it) { - - switch_to_component (G, c_it); - -#ifdef _DEBUG - GTL_debug::os() << "Component is: " << std::endl; - GTL_debug::os() << G << std::endl; -#endif - if (!run_on_biconnected (G, em)) { - if (directed) { - G.make_directed(); - } - - G.restore_graph(); - planar = false; - return algorithm::GTL_OK; - } - - if (emp) { - add_to_embedding (G, em); - } - } - - G.restore_graph(); - - } else { - - // - // G is already biconnected - // - - GTL_debug::debug_message ("graph is biconnected\n"); - - if (!run_on_biconnected (G, embedding)) { - if (directed) { - G.make_directed(); - } - - planar = false; - return algorithm::GTL_OK; - } - } - - if (bip) { - edges_t::iterator it, end; - it = biconn.additional_begin(); - end = biconn.additional_end(); - - for (; it != end; ++it) { - - if (emp) { - node s = it->source(); - node t = it->target(); - embedding.adj[s].erase (embedding.s_pos[*it]); - embedding.adj[t].erase (embedding.t_pos[*it]); - } - - G.del_edge (*it); - } - } - - if (directed) { - G.make_directed(); - } - - planar = true; - return algorithm::GTL_OK; -} - -void planarity::add_to_embedding (GTL::graph& G, planar_embedding& em) -{ - node n; - forall_nodes (n, G) { - planar_embedding::iterator it = em.adj_edges_begin (n); - planar_embedding::iterator end = em.adj_edges_end (n); - - for (; it != end; ++it) { - embedding.pos (n, *it) = em.pos (n, *it); - } - - embedding.adjacency(n).splice ( - embedding.adj_edges_end (n), - em.adj_edges_begin (n), - em.adj_edges_end (n)); - } - - embedding.self.splice ( - embedding.self.end(), - em.self, em.self.begin(), em.self.end()); - embedding.multi.splice ( - embedding.multi.end(), - em.multi, em.multi.begin(), em.multi.end()); -} - - -void planarity::reset () -{ - ob_edges.erase (ob_edges.begin(), ob_edges.end()); - ob_nodes.erase (ob_nodes.begin(), ob_nodes.end()); -} - - -void planarity::correct_embedding ( - planar_embedding& em, - st_number& st_, - node_map >& dirs) -{ - st_number::reverse_iterator it = st_.rbegin(); - st_number::reverse_iterator end = st_.rend(); - bool* turn = new bool[st_[*it]]; - - for (int i = 0; i < st_[*it]; ++i) { - turn[i] = false; - } - - while (it != end) { - node curr = *it; - - if (turn[st_[curr] - 1]) { - em.adjacency(curr).reverse(); - } - - std::list::iterator d_it = dirs[curr].begin(); - - while (!dirs[curr].empty()) - { - - if ((d_it->direction && turn[st_[curr] - 1]) || - (!d_it->direction && !turn[st_[curr] - 1])) - { - turn[d_it->id - 1] = true; - } - - d_it = dirs[curr].erase (d_it); - } - - ++it; - } - - delete[] turn; -} - - -void planarity::extend_embedding ( - node n, - planar_embedding& em, - node_map& mark, - node_map::iterator >& upward_begin) -{ - mark[n] = 1; - - symlist::iterator it = upward_begin[n]; - symlist::iterator end = em.adjacency(n).end(); - node other; - - for (; it != end; ++it) { - em.pos (n, *it) = it; - other = n.opposite (*it); - em.pos (other, *it) = em.push_front (other, *it); - - if (mark[other] == 0) { - extend_embedding (other, em, mark, upward_begin); - } - } -} - -void planarity::switch_to_component (GTL::graph& G, - biconnectivity::component_iterator c_it) -{ - // - // hide all nodes - // - - nodes_t dummy; - G.induced_subgraph (dummy); - - // - // Restore nodes in this component. - // - - nodes_t::iterator it = c_it->first.begin(); - nodes_t::iterator end = c_it->first.end(); - - for (; it != end; ++it) { - G.restore_node (*it); - } - - // - // Restore edges in this component. - // - - edges_t::iterator e_it = c_it->second.begin(); - edges_t::iterator e_end = c_it->second.end(); - - for (; e_it != e_end; ++e_it) { - G.restore_edge (*e_it); - } -} - -void planarity::examine_obstruction (GTL::graph& G, - st_number& st_, - node act, - pq_node* fail, - bool is_root, - planar_embedding& em, - node_map >& dirs, - pq_tree* PQ) -{ - node_map used (G, 0); - node_map to_father (G); - - // - // Create a dfs-tree of the so called bush form. This is basically a normal dfs - // applied to the induced subgraph of G consisting only of the nodes with st_number - // 1, ..., st_[act] - 1. The only difference is that edges are always directed from - // the lower numbered vertex to higher numbered one. - // - - dfs_bushform (st_.s_node(), used, st_, st_[act], to_father); - - if (fail->kind() == pq_node::Q_NODE) { - - // - // In case the reduction failed at a Q-Node we need to know the edges that - // form the boundary of the biconnected component, which this Q-Node represents. - // These can easily be obtained from the embedding we got so far. - // - - q_node* q_fail = fail->Q(); - - pq_tree::sons_iterator s_it = q_fail->sons.begin(); - pq_tree::sons_iterator s_end = q_fail->sons.end(); - node greatest = fail->n; - - while (s_it != s_end) { - if ((*s_it)->kind() == pq_node::DIR) { - direction_indicator* dir = (*s_it)->D(); - pq_tree::sons_iterator tmp = s_it; - - if (++tmp == ++(dir->pos)) { - dir->direction = true; - } else { - dir->direction = false; - } - - dirs[act].push_back (*dir); - - // - // chris 2/3/2008: - // - // To avoid a memory leak, it is not sufficient to erase it from the - // PQ-tree (-node). The direction indicator object also has to be - // deleted. Since it is then not a member of the pertinent subtree any - // more, it must not be cleared by PQ->reset(). The instance in the - // dirs node map is a clone! - // - - // s_it = q_fail->sons.erase (s_it); - s_it = PQ->remove_dir_ind(q_fail, s_it); - } else { - if (st_[(*s_it)->up] > st_[greatest]) { - greatest = (*s_it)->up; - } - - ++s_it; - } - } - - correct_embedding (em, st_, dirs); - node_map mark; - mark.init (G, 0); - node_map::iterator > upward_begin; - upward_begin.init (G); - node tmp; - - em.adjacency(fail->n).erase ( - em.adjacency(fail->n).begin(), - em.adjacency(fail->n).end()); - - forall_nodes (tmp, G) { - upward_begin[tmp] = em.adjacency(tmp).begin(); - } - - // - // chris 2/3/2008: - // - // With the code of MR 11/27/2001 the component of the failing Q-node is not found - // correctly. - // - - extend_embedding(greatest, em, mark, upward_begin); - - /* - // - // MR 11/27/2001: - // - // This is important! We restricted building the embedding to the nodes in - // the biconnected component which the Q-node fail refers to. But the st-number - // obtained for the whole graph restricted to these nodes will not be a st-numbering - // for this biconnected component. - // - - st_number::reverse_iterator st_it, st_end; - - for (st_it = st_.rbegin(), st_end = st_.rend(); - st_it != st_end; - ++st_it) - { - if (mark[*st_it] == 0) { - extend_embedding (*st_it, em, mark, upward_begin); - } - } - */ -#ifdef _DEBUG - GTL_debug::os() << "Embedding so far (st_numbered): " << std::endl; - em.write_st (GTL_debug::os(), st_); -#endif - - attachment_cycle (fail->n, em); - - if (!q_fail->pert_cons) { - - // - // the reduction failed because there was more than one block - // of pertinent children. The reduction in this case assures that - // pert_begin and pert_end lie in different blocks and that - // --pert_end is empty and lies between these two blocks. - // - // This is one of the two cases that may apply when the reduction - // fails already in bubble up. The reduction takes care of this. - // - - GTL_debug::debug_message ("CASE C (non consecutive pertinent children)\n"); - pq_tree::sons_iterator tmp = q_fail->pert_begin; - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_full_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_full_leaf (*tmp); - nodes[2] = (*tmp)->up; - - --tmp; - while ((*tmp)->kind() == pq_node::DIR) { - --tmp; - } - - leaves[1] = search_empty_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_C (nodes, leaves, st_, to_father, G, q_fail); - - } else if (!(*(q_fail->pert_end))->is_endmost && !is_root) { - - GTL_debug::debug_message ("CASE D (non-root q-node with both endmost sons empty)\n"); - pq_tree::sons_iterator tmp = q_fail->sons.begin(); - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_empty_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = --(q_fail->sons.end()); - leaves[2] = search_empty_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->pert_begin; - leaves[1] = search_full_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_D (nodes, leaves, st_, to_father, G, q_fail); - - } else if (q_fail->partial_count == 1) { - if (q_fail->partial_pos[0] == q_fail->pert_end) { - GTL_debug::debug_message ("CASE D (non-root q-node with partial child at end of pertinent children\n"); - pq_tree::sons_iterator tmp = q_fail->sons.begin(); - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_empty_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_empty_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->pert_begin; - leaves[1] = search_full_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_D (nodes, leaves, st_, to_father, G, q_fail); - } else { - GTL_debug::debug_message ("CASE C (q-node with partial children surrounded by pertinent children)\n"); - pq_tree::sons_iterator tmp = q_fail->pert_begin; - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_full_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_full_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->partial_pos[0]; - leaves[1] = search_empty_leaf (*tmp); - nodes[1] = (*tmp)->up; - - - case_C (nodes, leaves, st_, to_father, G, q_fail); - } - - } else if ((q_fail->partial_pos[0] == q_fail->pert_begin || - q_fail->partial_pos[0] == q_fail->pert_end) && - (q_fail->partial_pos[1] == q_fail->pert_begin || - q_fail->partial_pos[1] == q_fail->pert_end)) { - - if (++(q_fail->sons.begin()) == --(q_fail->sons.end())) { - - // - // q_node with two children, which are both partial. - // - - pq_tree::sons_iterator tmp = q_fail->sons.begin(); - pq_leaf* leaves[4]; - node nodes[2]; - leaves[0] = search_empty_leaf (*tmp); - nodes[0] = (*tmp)->up; - leaves[1] = search_full_leaf (*tmp); - - ++tmp; - leaves[2] = search_empty_leaf (*tmp); - nodes[1] = (*tmp)->up; - leaves[3] = search_full_leaf (*tmp); - - case_E (nodes, leaves, st_, to_father, G, q_fail); - - } else if (q_fail->partial_count == 2) { - GTL_debug::debug_message ("CASE D (non-root q_node with first and last pertinent children partial)\n"); - - // - // sons.begin() is empty, pert_begin is partial, pert_end is partial - // - - pq_tree::sons_iterator tmp = q_fail->sons.begin(); - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_empty_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_empty_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->pert_begin; - - if (tmp == q_fail->sons.begin()) { - ++tmp; - } - - leaves[1] = search_full_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_D (nodes, leaves, st_, to_father, G, q_fail); - - } else { - GTL_debug::debug_message ("CASE C (q_node with at least three partial children)\n"); - - // - // There must be at least one other partial child among the pertinent. - // - - pq_tree::sons_iterator tmp = q_fail->pert_begin; - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_full_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_full_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->partial_pos[2]; - leaves[1] = search_empty_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_C (nodes, leaves, st_, to_father, G, q_fail); - } - - } else { - - // - // At least one partial son is in between the pertinent sons. - // - - GTL_debug::debug_message ("CASE C (q_node with at least two partial children, at least one surrounded by pertinent)\n"); - pq_tree::sons_iterator tmp = q_fail->pert_begin; - pq_leaf* leaves[3]; - node nodes[3]; - leaves[0] = search_full_leaf (*tmp); - nodes[0] = (*tmp)->up; - - tmp = q_fail->pert_end; - leaves[2] = search_full_leaf (*tmp); - nodes[2] = (*tmp)->up; - - tmp = q_fail->partial_pos[0]; - - if (tmp == q_fail->pert_begin || tmp == q_fail->pert_end) { - tmp = q_fail->partial_pos[1]; - } - - leaves[1] = search_empty_leaf (*tmp); - nodes[1] = (*tmp)->up; - - case_C (nodes, leaves, st_, to_father, G, q_fail); - } - - } else { - - // - // pert_root is a P-Node ==> at least two partial children. - // - - p_node* p_fail = fail->P(); - - if (p_fail->partial_count == 2) { - GTL_debug::debug_message ("CASE B (non-root p-node with two partial children)\n"); - case_B (p_fail, act, st_, to_father, G); - - } else { - - // - // We have at least three partial children - // - - GTL_debug::debug_message ("CASE A (p-node with at least three partial children)\n"); - case_A (p_fail, act, st_, to_father, G); - } - } -} - - - -void planarity::case_A (p_node* p_fail, - node act, - st_number& st_, - node_map to_father, - graph& G) -{ - node art = p_fail->n; - ob_nodes.push_back (art); - ob_nodes.push_back (act); - node_map mark (G, 0); - mark[art] = 1; - pq_leaf* empty[3]; - pq_tree::sons_iterator part_pos = p_fail->partial_sons.begin(); - int i; - - for (i = 0; i < 3; ++i) { - q_node* q_part = (*part_pos)->Q(); - empty[i] = run_through_partial (q_part, mark, to_father, art); - ++part_pos; - } - - node t_node = st_.s_node().opposite (st_.st_edge()); - mark[t_node] = 1; - node tmp[3]; - - for (i = 0; i < 3; ++i) { - tmp[i] = up_until_marked (empty[i]->n, mark, st_); - } - - assert (tmp[0] == t_node); - node tmp_node; - - // - // The three paths found meet at the nodes tmp[1] and tmp[2]; the one - // one with the higher st_number is the one we are looking for. Since the - // first path always ends at t and it may happen that the paths meet below - // t, we might have to delete some of the edges found in the first path. - // - - if (st_[tmp[1]] < st_[tmp[2]]) { - tmp_node = tmp[2]; - ob_nodes.push_back (tmp[1]); - } else { - tmp_node = tmp[1]; - ob_nodes.push_back (tmp[2]); - } - - - if (tmp_node != t_node) { - edges_t::iterator it, end; - int max_st = st_[tmp_node]; - - it = ob_edges.begin(); - end = ob_edges.end(); - - while (it != end) { - edge cur = *it; - - if (st_[cur.source()] > max_st || st_[cur.target()] > max_st) { - it = ob_edges.erase (it); - } else { - ++it; - } - } - } -} - - - -void planarity::case_B (p_node* p_fail, - node act, - st_number& st_, - node_map to_father, - graph& G) -{ - // - // P-Node, which is not the root of the pertinent subtree, but has - // two partial children. - // - - node art = p_fail->n; - ob_nodes.push_back (art); - ob_nodes.push_back (act); - node_map mark (G, 0); - node_map below (G, 0); - mark[art] = 1; - - // - // mark edges leading to full leaves from full sons. - // - - pq_tree::sons_iterator it, end; - for (it = p_fail->full_sons.begin(), end = p_fail->full_sons.end(); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, below); - } - - // - // search paths from one full and one empty leaf to the articulation point - // in TBk. mark edges leading to full leaves from pertinent sons of part. - // - - pq_tree::sons_iterator part_pos = p_fail->partial_sons.begin(); - q_node* q_part = (*part_pos)->Q(); - pq_leaf* empty1 = run_through_partial (q_part, mark, to_father, art); - - for (it = q_part->pert_begin, end = ++(q_part->pert_end); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, below); - } - - // - // search paths from one full and one empty leaf to the articulation point - // in TBk. mark edges leading to full leaves from pertinent sons of part. - // - - ++part_pos; - q_part = (*part_pos)->Q(); - pq_leaf* empty2 = run_through_partial (q_part, mark, to_father, art); - - - for (it = q_part->pert_begin, end = ++(q_part->pert_end); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, below); - } - - // - // now that all the adjacent edges of act, that lead to art in TBk have been - // marked search an unmarked adj. edge of act, - // - - node::adj_edges_iterator a_it, a_end; - - for (a_it = act.adj_edges_begin(), a_end = act.adj_edges_end(); a_it != a_end; ++a_it) { - if (below[act.opposite (*a_it)] == 0 && st_[act.opposite (*a_it)] < st_[act]) { - break; - } - } - - assert (a_it != a_end); - mark[st_.s_node()] = 1; - mark[art] = 0; - node tmp = up_until_marked (art, mark, to_father); - assert (tmp == st_.s_node()); - tmp = up_until_marked (act.opposite (*a_it), mark, to_father); - assert(tmp != art); - ob_nodes.push_back (tmp); - ob_edges.push_back (*a_it); - ob_edges.push_back (st_.st_edge()); - - // - // search from empty1 and empty2 to t. - // - - node t_node = st_.s_node().opposite (st_.st_edge()); - mark[t_node] = 1; - tmp = up_until_marked (empty1->n, mark, st_); - assert (tmp == t_node); - tmp = up_until_marked (empty2->n, mark, st_); - ob_nodes.push_back (tmp); -} - - -void planarity::case_C (GTL::node* nodes, - pq_leaf** leaves, - st_number& st_, - node_map to_father, - graph& G, - q_node* q_fail) -{ - int i; - node_map mark (G, 0); - node y_0 = q_fail->n; - - for (i = 0; i < 3; ++i) { - mark[nodes[i]] = 1; - edge tmp_edge = leaves[i]->e; - node tmp_node = leaves[i]->n; - ob_edges.push_back (tmp_edge); - tmp_node = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - assert (tmp_node == nodes[i]); - ob_nodes.push_back (nodes[i]); - } - - ob_nodes.push_back (y_0); - mark[st_.s_node()] = 1; - node tmp = up_until_marked (y_0, mark, to_father); - assert (tmp == st_.s_node ()); - - ob_nodes.push_back (leaves[2]->n); - ob_edges.push_back (st_.st_edge()); - - node t_node = st_.s_node().opposite (st_.st_edge()); - mark[t_node] = 1; - tmp = up_until_marked (leaves[1]->n, mark, st_); - assert (tmp == t_node); - tmp = up_until_marked (leaves[2]->n, mark, st_); - ob_nodes.push_back (tmp); -} - - -void planarity::case_D (GTL::node* nodes, - pq_leaf** leaves, - st_number& st_, - node_map to_father, - graph& G, - q_node* q_fail) -{ - // - // Mark all edges from leaves leading to this component. - // - - node y_0 = q_fail->n; - pq_tree::sons_iterator it, end; - node_map below (G, 0); - node act = leaves[1]->n; - - for (it = q_fail->sons.begin(), end = q_fail->sons.end(); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, below); - } - - node::adj_edges_iterator a_it, a_end; - - for (a_it = act.adj_edges_begin(), a_end = act.adj_edges_end(); a_it != a_end; ++a_it) { - if (below[act.opposite (*a_it)] == 0 && st_[act.opposite (*a_it)] < st_[act]) { - break; - } - } - - - // - // Since q_fail can't be the root of the pertinent subtree, there must - // be at least one edge from act not leading to the component described by - // q_fail. - // - - assert (a_it != a_end); - - int i; - node_map mark (G, 0); - - for (i = 0; i < 3; ++i) { - mark[nodes[i]] = 1; - edge tmp_edge = leaves[i]->e; - node tmp_node = leaves[i]->n; - ob_edges.push_back (tmp_edge); - tmp_node = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - assert (tmp_node == nodes[i]); - ob_nodes.push_back (nodes[i]); - } - - ob_nodes.push_back (y_0); - mark[st_.s_node()] = 1; - node tmp = up_until_marked (y_0, mark, to_father); - assert (tmp == st_.s_node ()); - ob_edges.push_back (*a_it); - tmp = up_until_marked (act.opposite (*a_it), mark, to_father); - - - // - // The paths from y_0 and from act meet in tmp. If tmp != s_node we have - // to delete some edges. - // - - if (tmp != st_.s_node()) { - edges_t::iterator it, end; - int min_st = st_[tmp]; - it = ob_edges.begin(); - end = ob_edges.end(); - - while (it != end) { - edge cur = *it; - - if (st_[cur.source()] < min_st || st_[cur.target()] < min_st) { - it = ob_edges.erase (it); - } else { - ++it; - } - } - } - - ob_nodes.push_back (act); - - node t_node = st_.s_node().opposite (st_.st_edge()); - mark[t_node] = 1; - node tmp_nodes[3]; - - for (i = 0; i < 3; ++i) { - tmp_nodes[i] = up_until_marked (leaves[i]->n, mark, st_); - } - - assert (tmp_nodes[0] == t_node); - - // - // The three paths found meet at the nodes tmp[1] and tmp[2]; the one - // one with the higher st_number is the one we are looking for. Since the - // first path always ends at t and it may happen that the paths meet below - // t, we might have to delete some of the edges found in the first path. - // - - if (st_[tmp_nodes[1]] < st_[tmp_nodes[2]]) { - tmp = tmp_nodes[2]; - ob_nodes.push_back (tmp_nodes[1]); - } else { - tmp = tmp_nodes[1]; - ob_nodes.push_back (tmp_nodes[2]); - } - - - if (tmp != t_node) { - edges_t::iterator it, end; - int max_st = st_[tmp]; - it = ob_edges.begin(); - end = ob_edges.end(); - - while (it != end) { - edge cur = *it; - - if (st_[cur.source()] > max_st || st_[cur.target()] > max_st) { - it = ob_edges.erase (it); - } else { - ++it; - } - } - } -} - - -void planarity::case_E (GTL::node* nodes, - pq_leaf** leaves, - st_number& st_, - node_map to_father, - graph& G, - q_node* q_fail) -{ - - // - // Mark all edges from the act node leading to this component. - // - - node y_0 = q_fail->n; - pq_tree::sons_iterator it, end; - node_map below (G, 0); - node act = leaves[1]->n; - - for (it = q_fail->pert_begin, end = ++(q_fail->pert_end); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, below); - } - - node::adj_edges_iterator a_it, a_end; - - for (a_it = act.adj_edges_begin(), a_end = act.adj_edges_end(); a_it != a_end; ++a_it) { - if (below[act.opposite (*a_it)] == 0 && st_[act.opposite (*a_it)] < st_[act]) { - break; - } - } - - - // - // Since q_fail can't be the root of the pertinent subtree, there must - // be at least one edge from act not leading to the component described by - // q_fail. - // - - assert (a_it != a_end); - - // - // The list ob_edges at the moment contains the boundary. we need to know the paths - // from y_0 to nodes[0] ( = y_1), from nodes[0] to nodes[1] ( = y_2 ) and from nodes[1] - // back to y_0, because some of them will be eventually deleted later. - // - - edges_t::iterator paths_begin[3]; - edges_t::iterator l_it, l_end; - node next = y_0; - - for (l_it = ob_edges.begin(), l_end = ob_edges.end(); l_it != l_end; ++l_it) { - next = next.opposite (*l_it); - - if (next == nodes[1]) { - node tmp = nodes[1]; - nodes[1] = nodes[0]; - nodes[0] = tmp; - pq_leaf* tmp_leaf = leaves[2]; - leaves[2] = leaves[0]; - leaves[0] = tmp_leaf; - tmp_leaf = leaves[3]; - leaves[3] = leaves[1]; - leaves[1] = tmp_leaf; - - paths_begin[0] = l_it; - ++paths_begin[0]; - break; - } else if (next == nodes[0]) { - paths_begin[0] = l_it; - ++paths_begin[0]; - break; - } - } - - assert (l_it != l_end); - ++l_it; - assert (l_it != l_end); - - for (; l_it != l_end; ++l_it) { - next = next.opposite (*l_it); - - if (next == nodes[1]) { - paths_begin[1] = l_it; - ++paths_begin[1]; - break; - } - } - - assert (l_it != l_end); - - paths_begin[2] = --l_end; - - node y[3]; - int i; - node_map mark (G, 0); - edges_t from_act[3]; - edges_t::iterator pos; - - for (i = 0; i < 2; ++i) { - mark[nodes[i]] = 1; - edge tmp_edge = leaves[2 * i]->e; - node tmp_node = leaves[2 * i]->n; - ob_edges.push_back (tmp_edge); - tmp_node = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - assert (tmp_node == nodes[i]); - tmp_edge = leaves[2 * i + 1]->e; - tmp_node = leaves[2 * i + 1]->n; - pos = ob_edges.insert (ob_edges.end(), tmp_edge); - y[i + 1] = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - from_act[i + 1].splice (from_act[i + 1].begin(), ob_edges, pos, ob_edges.end()); - } - - mark[st_.s_node()] = 1; - node tmp = up_until_marked (y_0, mark, to_father); - assert (tmp == st_.s_node ()); - pos = ob_edges.insert (ob_edges.end(), *a_it); - y[0] = up_until_marked (act.opposite (*a_it), mark, to_father); - from_act[0].splice (from_act[0].begin(), ob_edges, pos, ob_edges.end()); - - node t_node = st_.s_node().opposite (st_.st_edge()); - mark[t_node] = 1; - node tmp_nodes[3]; - node_map from_where (G, 0); - - for (i = 0; i < 2; ++i) { - pos = --(ob_edges.end()); - tmp_nodes[i] = up_until_marked (leaves[2 * i]->n, mark, st_); - for (l_it = ++pos, l_end = ob_edges.end(); l_it != l_end; ++l_it) { - from_where[l_it->source()] = i + 1; - from_where[l_it->target()] = i + 1; - } - } - - assert (tmp_nodes[0] == t_node); - - if (y_0 != y[0]) { - ob_nodes.push_back (y_0); - ob_nodes.push_back (y[0]); - ob_nodes.push_back (y[1]); - ob_nodes.push_back (y[2]); - ob_nodes.push_back (act); - ob_nodes.push_back (tmp_nodes[1]); - - l_it = paths_begin[0]; - l_end = paths_begin[1]; - ob_edges.erase (l_it, l_end); - - for (i = 0; i < 3; ++i) { - ob_edges.splice (ob_edges.end(), from_act[i], from_act[i].begin(), from_act[i].end()); - } - - GTL_debug::debug_message ("CASE E(i)\n"); - - } else if (nodes[0] != y[1]) { - ob_nodes.push_back (y_0); - ob_nodes.push_back (y[1]); - ob_nodes.push_back (nodes[0]); - ob_nodes.push_back (y[2]); - ob_nodes.push_back (act); - ob_nodes.push_back (tmp_nodes[1]); - l_it = paths_begin[1]; - l_end = paths_begin[2]; - ++l_end; - ob_edges.erase (l_it, l_end); - - for (i = 0; i < 3; ++i) { - ob_edges.splice (ob_edges.end(), from_act[i], from_act[i].begin(), from_act[i].end()); - } - - GTL_debug::debug_message ("CASE E(ii)\n"); - - } else if (nodes[1] != y[2]) { - ob_nodes.push_back (y_0); - ob_nodes.push_back (y[1]); - ob_nodes.push_back (nodes[1]); - ob_nodes.push_back (y[2]); - ob_nodes.push_back (act); - ob_nodes.push_back (tmp_nodes[1]); - l_it = ob_edges.begin(); - l_end = paths_begin[0]; - ob_edges.erase (l_it, l_end); - - for (i = 0; i < 3; ++i) { - ob_edges.splice (ob_edges.end(), from_act[i], from_act[i].begin(), from_act[i].end()); - } - - GTL_debug::debug_message ("CASE E(ii)\n"); - - } else { - tmp_nodes[2] = up_until_marked (leaves[1]->n, mark, st_); - ob_nodes.push_back (y_0); - ob_nodes.push_back (y[1]); - ob_nodes.push_back (tmp_nodes[1]); - ob_nodes.push_back (y[2]); - ob_nodes.push_back (act); - - if (st_[tmp_nodes[1]] < st_[tmp_nodes[2]]) { - ob_nodes.push_back (tmp_nodes[2]); - l_it = paths_begin[0]; - l_end = paths_begin[1]; - ob_edges.erase (l_it, l_end); - - for (i = 1; i < 3; ++i) { - ob_edges.splice (ob_edges.end(), from_act[i], from_act[i].begin(), from_act[i].end()); - } - - GTL_debug::debug_message ("CASE E(iii) (1)\n"); - - } else if (st_[tmp_nodes[1]] > st_[tmp_nodes[2]]) { - edge last_edge = *(--(ob_edges.end())); - ob_nodes.push_back (tmp_nodes[2]); - ob_edges.splice (ob_edges.end(), from_act[0], from_act[0].begin(), from_act[0].end()); - int from; - - if (from_where[last_edge.source()] > 0) { - from = from_where[last_edge.source()]; - } else { - from = from_where[last_edge.target()]; - } - - assert (from > 0); - - if (from == 1) { - ob_edges.splice (ob_edges.end(), from_act[2], from_act[2].begin(), from_act[2].end()); - - l_it = paths_begin[1]; - l_end = paths_begin[2]; - ++l_end; - ob_edges.erase (l_it, l_end); - - } else { - ob_edges.splice (ob_edges.end(), from_act[1], from_act[1].begin(), from_act[1].end()); - - l_it = ob_edges.begin(); - l_end = paths_begin[0]; - ob_edges.erase (l_it, l_end); - } - - GTL_debug::debug_message ("CASE E(iii) (2)\n"); - - } else { - for (i = 0; i < 3; ++i) { - ob_edges.splice (ob_edges.end(), from_act[i], from_act[i].begin(), from_act[i].end()); - } - - GTL_debug::debug_message ("CASE E(iii) (3)\n"); - } - } - - ob_edges.push_back (st_.st_edge()); -} - - -pq_leaf* planarity::search_full_leaf (pq_node* n) -{ - switch (n->kind()) { - case pq_node::LEAF: - return n->L(); - - case pq_node::P_NODE: - case pq_node::Q_NODE: - return search_full_leaf (*(--(n->sons.end()))); - - default: - assert (false); - return 0; - } -} - - -pq_leaf* planarity::search_empty_leaf (pq_node* n) -{ - switch (n->kind()) { - case pq_node::LEAF: - return n->L(); - - case pq_node::Q_NODE: - case pq_node::P_NODE: - return search_empty_leaf (*(n->sons.begin())); - - default: - assert (false); - return 0; - } -} - - - -void planarity::mark_all_neighbors_of_leaves (pq_node* act, GTL::node_map& mark) -{ - if (act->kind() == pq_node::LEAF) { - mark[((pq_leaf*)act)->e.opposite(((pq_leaf*)act)->n)] = 1; - } else { - pq_tree::sons_iterator it, end; - - for (it = act->sons.begin(), end = act->sons.end(); it != end; ++it) { - mark_all_neighbors_of_leaves (*it, mark); - } - } -} - - -pq_leaf* planarity::run_through_partial (q_node* part, GTL::node_map& mark, GTL::node_map& to_father, GTL::node v) -{ - pq_leaf* tmp = search_full_leaf (part); - edge tmp_edge = tmp->e; - node tmp_node = tmp->n; - ob_edges.push_back (tmp_edge); - tmp_node = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - - tmp = search_empty_leaf (part); - tmp_node = tmp->n; - tmp_edge = tmp->e; - ob_edges.push_back (tmp_edge); - tmp_node = up_until_marked (tmp_node.opposite (tmp_edge), mark, to_father); - assert (tmp_node != v); - ob_nodes.push_back (tmp_node); - - return tmp->L(); -} - - -node planarity::up_until_marked (GTL::node act, GTL::node_map& mark, GTL::node_map& to_father) -{ - while (mark[act] == 0) { - mark[act] = 1; - edge next = to_father[act]; - ob_edges.push_back (next); - act = act.opposite (next); - } - - return act; -} - -node planarity::up_until_marked (GTL::node act, GTL::node_map& mark, st_number& st_) -{ - while (mark[act] == 0) { - mark[act] = 1; - node opp; - node::adj_edges_iterator it, end; - - for (it = act.adj_edges_begin(), end = act.adj_edges_end(); it != end; ++it) { - opp = act.opposite (*it); - if (st_[opp] > st_[act]) { - break; - } - } - - assert (it != end); - ob_edges.push_back (*it); - act = opp; - } - - return act; -} - -void planarity::attachment_cycle (GTL::node start, planar_embedding& em) -{ - edge act = em.adjacency(start).front(); - node next = start.opposite (act); - ob_edges.push_back (act); - - while (next != start) { - act = em.cyclic_next (next, act); - next = next.opposite (act); - ob_edges.push_back (act); - } -} - - -void planarity::dfs_bushform (GTL::node n, - node_map& used, - st_number& st_, - int stop, - node_map& to_father) -{ - used[n] = 1; - node::adj_edges_iterator it, end; - - for (it = n.adj_edges_begin(), end = n.adj_edges_end(); it != end; ++it) { - edge act = *it; - node other = n.opposite(act); - - if (used[other] == 0 && st_[other] < stop) { - to_father[other] = *it; - dfs_bushform (other, used, st_, stop, to_father); - } - } -} - -#ifdef _DEBUG - -void planarity::write_node(std::ostream& os, int id, int label, int mark) { - os << "node [\n" << "id " << id << std::endl; - os << "label \"" << label << "\"\n"; - os << "graphics [\n" << "x 100\n" << "y 100 \n"; - if (mark == 1) { - os << "outline \"#ff0000\"\n"; - } - os << "]\n"; - os << "]" << std::endl; -} -#endif - -#ifdef _DEBUG -void planarity::write_bushform(GTL::graph& G, st_number& st_, int k, const char* name, const node_map& mark, - const node_map& to_father) -{ - // create the bushform Bk for the k where the test failed - st_number::iterator st_it, st_end; - int id = 0; - node_map ids (G); - std::ofstream os(name, std::ios::out | std::ios::trunc); - - os << "graph [\n" << "directed 1" << std::endl; - - for (st_it = st_.begin(), st_end = st_.end(); st_it != st_end && st_[*st_it] <= k; ++st_it) { - write_node(os, id, st_[*st_it], mark[*st_it]); - ids[*st_it] = id; - id++; - } - - for (st_it = st_.begin(), st_end = st_.end(); st_it != st_end && st_[*st_it] <= k; ++st_it) { - node::adj_edges_iterator ait, aend; - - for (ait = st_it->adj_edges_begin(), aend = st_it->adj_edges_end(); ait != aend; ait++) { - node other = ait->opposite(*st_it); - int other_id; - if (st_[*st_it] < st_[other]) { - if(st_[other] > k) { - write_node(os, id, st_[other], mark[other]); - other_id = id; - id++; - } else { - other_id = ids[other]; - } - - os << "edge [\n" << "source " << ids[*st_it] << "\ntarget " << other_id << std::endl; - if (*ait == to_father[*st_it] || *ait == to_father[other]) { - os << "graphics [\n" << "fill \"#0000ff\"" << std::endl; - os << "width 4.0\n]" << std::endl; - } - os << "\n]" << std::endl; - } - } - } - - os << "]" << std::endl; -} - -#endif - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/pq_node.cpp b/src/Tracker/graph/GTL/src/pq_node.cpp deleted file mode 100644 index dabf49431..000000000 --- a/src/Tracker/graph/GTL/src/pq_node.cpp +++ /dev/null @@ -1,359 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// pq_node.cpp -// -//========================================================================== -// $Id: pq_node.cpp,v 1.12 2002/10/04 08:07:36 chris Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -pq_node::~pq_node () -{ - while (!sons.empty()) { - pq_node* tmp = sons.front(); - sons.erase (sons.begin()); - delete tmp; - } -} - - -//-------------------------------------------------------------------------- -// P-NODE -//-------------------------------------------------------------------------- - -p_node::p_node (GTL::node n_, int id_) : pq_node (n_, id_), partial_count (0), full_count (0) -{ -} - -p_node::p_node (GTL::node n_, int id_, symlist& s) : - pq_node (n_, id_), child_count (0), partial_count (0), full_count (0) -{ - sons.splice (sons.end(), s.begin(), s.end()); - - iterator it = sons.begin(); - iterator end = sons.end(); - - for (; it != end; ++it) { - ++child_count; - (*it)->is_endmost = true; - (*it)->father = this; - } -} - -void p_node::clear () -{ - pq_node::clear(); - partial_count = full_count = 0; - if (!full_sons.empty()) - sons.splice (sons.end(), full_sons.begin(), full_sons.end()); - - if (!partial_sons.empty()) - sons.splice (sons.end(), partial_sons.begin(), partial_sons.end()); -} - -inline void p_node::partial (iterator it) -{ - ++partial_count; - pert_leaves += (*it)->pert_leaves; - partial_sons.splice (partial_sons.end(), it); -} - -inline void p_node::full (iterator it) -{ - ++full_count; - pert_leaves += (*it)->pert_leaves; - full_sons.splice (full_sons.end(), it); -} - - -inline void p_node::write(std::ostream& os, int _id) -{ - os << "node [\n" << "id " << _id << std::endl; - os << "label \"" << id << "\nP" << "\"\n"; - os << "graphics [\n" << "x 100\n" << "y 100\n"; - if (mark == UNBLOCKED) { - os << "outline \"#0000ff\"\n"; - } else if (mark == BLOCKED) { - os << "outline \"#ff0000\"\n"; - } - os << "type \"oval\"\n" << "]" << std::endl; - os << "LabelGraphics [\n"; - os << "type \"text\"\n]\n]" << std::endl; -} - -//-------------------------------------------------------------------------- -// Q-NODE -//-------------------------------------------------------------------------- - -q_node::q_node (GTL::node n_, int id_) : pq_node (n_, id_), partial_count (0), full_count (0) -{ -} - -inline void q_node::partial (iterator it) -{ - if (partial_count < 3) { - partial_pos[partial_count] = it; - } - - pert_leaves += (*it)->pert_leaves; - ++partial_count; - - if (pert_begin == iterator()) { - pertinent (it); - } -} - - -inline void q_node::full (iterator it) -{ - ++full_count; - pert_leaves += (*it)->pert_leaves; - - - if (pert_begin == iterator()) { - pertinent (it); - } -} - - -void q_node::pertinent (iterator it) -{ - iterator end = sons.end(); - iterator tmp = it; - pq_node* first; - pq_node* last; - pert_end = it; - ++tmp; - int pert_block_count = 1; - - while (tmp != end) { - if ((*tmp)->mark != UNBLOCKED) { - break; - } - - if ((*tmp)->kind () != DIR) { - ++pert_block_count; - pert_end = tmp; - } - - ++tmp; - } - - last = *pert_end; - - pert_begin = tmp = it; - --tmp; - - while (tmp != end) { - if ((*tmp)->mark != UNBLOCKED) { - break; - } - - if ((*tmp)->kind () != DIR) { - ++pert_block_count; - pert_begin = tmp; - } - - --tmp; - } - - first = *pert_begin; - pert_cons = (pert_block_count == pert_children); - - // - // it must be true, that either first or last is in - // {sons.front(), sons.back()} (or both). Thus we are able to achive - // the following normalization: pert_end is *always* sons.last() and - // pert_begin is some other child, such that ++pert_begin leads towards - // pert_end. - // - - if (pert_cons) { - if (last == sons.front()) { - turn(); - } else if (last != sons.back()) { - tmp = pert_begin; - pert_begin = pert_end; - pert_end = tmp; - pert_end.reverse(); - pert_begin.reverse(); - - if (first == sons.front()) { - turn(); - } else if (first != sons.back()) { - - // - // This should not happen. In this case the pertinent children are - // BLOCKED and thus this method wouldt be called. - // - // 17.3. Now this can happen. - // - - // pert_cons = false; - - // assert (false); - } - } - - } else { - - // - // In case that there are more than one block of pertinent children although - // bubble up didnt fail (e.g. pp...pe...ep...pp or ee...ep...pe...ep...pp) - // we need some element of the second block in order to find K5 or K33. So we - // leave pert_begin as it is, but assign pert_end to something in the second - // block - // - - tmp = pert_begin; - --tmp; - - while (tmp != sons.end()) { - if ((*tmp)->mark == UNBLOCKED && (*tmp)->kind () != DIR) { - break; - } - - --tmp; - } - - - // - // We need an empty child. So we always keep the invariant that --pert_end - // leads to an empty child. Please note that --pert_end might be a DI. - // - - tmp.reverse(); - - if (tmp == sons.end()) { - tmp = pert_end; - ++tmp; - - while (tmp != sons.end()) { - if ((*tmp)->mark == UNBLOCKED && (*tmp)->kind () != DIR) { - break; - } - - ++tmp; - } - - assert (tmp != sons.end()); - } - - pert_end = tmp; - } - - // - // In the case that there is in fact only one pertinent child we so far - // only assured that it is the last child, but it is still possible - // that pert_begin (and pert_end, too) is headed the wrong way. - // - - if (pert_begin == pert_end && pert_cons && pert_end == --(sons.end())) { - pert_begin = pert_end = --(sons.end()); - } -} - -inline void q_node::clear () -{ - pq_node::clear(); - partial_count = full_count = 0; - pert_begin = symlist::iterator(); - pert_end = symlist::iterator(); -} - -inline void q_node::write(std::ostream& os, int _id) -{ - os << "node [\n" << "id " << _id << std::endl; - os << "label \"" << id << "\n" << "Q" << "\"\n"; - os << "graphics [\n" << "x 100\n" << "y 100 \n"; - if (mark == UNBLOCKED) { - os << "outline \"#0000ff\"\n"; - } else if (mark == BLOCKED) { - os << "outline \"#ff0000\"\n"; - } - os << "]\n"; - os << "LabelGraphics [\n"; - os << "type \"text\"\n]\n]" << std::endl; -} - -q_node* q_node::merge (iterator it) -{ - assert ((*it)->kind() == pq_node::Q_NODE); - q_node* part = (q_node*) *it; - - if (part == sons.front()) { - part->sons.front()->father = this; - part->sons.back()->is_endmost = false; - } else if (part == sons.back()){ - part->sons.back()->father = this; - part->sons.front()->is_endmost = false; - } else { - part->sons.front()->is_endmost = false; - part->sons.back()->is_endmost = false; - } - - sons.splice (it, part->sons.begin(), part->sons.end()); - sons.erase (it); - - return part; -} - - -void q_node::turn () -{ - sons.reverse(); -} - - -//-------------------------------------------------------------------------- -// LEAF -//-------------------------------------------------------------------------- - - -pq_leaf::pq_leaf (int id_, int other_, GTL::edge e_, GTL::node n_) : pq_node (n_, id_) -{ - up_id = other_; - up = n_.opposite (e_); - other_id = other_; - e = e_; -} - -inline void pq_leaf::write(std::ostream& os, int _id) -{ - os << "node [\n" << "id " << _id << std::endl; - os << "label \"" << other_id << "\n" << id << "\"\n"; - os << "graphics [\n" << "x 100\n" << "y 100 \n"; - if (mark == UNBLOCKED) { - os << "outline \"#0000ff\"\n"; - } else if (mark == BLOCKED) { - os << "outline \"#ff0000\"\n"; - } - os << "]\n"; - os << "LabelGraphics [\n"; - os << "type \"text\"\n]\n]" << std::endl; -} - - -void direction_indicator::write(std::ostream& os, int _id) -{ - os << "node [\n" << "id " << _id << std::endl; - os << "label \"DIR\n" << id << "\"\n"; - os << "graphics [\n" << "x 100\n" << "y 100 \n"; - if (mark == UNBLOCKED) { - os << "outline \"#0000ff\"\n"; - } else if (mark == BLOCKED) { - os << "outline \"#ff0000\"\n"; - } - os << "]\n"; - os << "LabelGraphics [\n"; - os << "type \"text\"\n]\n]" << std::endl; -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/pq_tree.cpp b/src/Tracker/graph/GTL/src/pq_tree.cpp deleted file mode 100644 index 0131e0f60..000000000 --- a/src/Tracker/graph/GTL/src/pq_tree.cpp +++ /dev/null @@ -1,1574 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// pq_tree.cpp -// -//========================================================================== -// $Id: pq_tree.cpp,v 1.22 2008/02/03 18:12:07 chris Exp $ - -#include -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - -pq_tree::pq_tree (int id, GTL::node n, const std::list& li) -{ -#ifdef _DEBUG - GTL_debug::init_debug(); -#endif - std::list::const_iterator it; - std::list::const_iterator end = li.end(); - sons_list sons; - pq_leaf* tmp; - - for (it = li.begin(); it != end; ++it) { - tmp = *it; - tmp->pos = sons.insert (sons.end(), tmp); - } - - root = new p_node(n, id, sons); - pert_root = 0; - fail = 0; - pseudo = 0; -} - -pq_tree::~pq_tree () -{ -#ifdef _DEBUG - GTL_debug::close_debug(); -#endif - reset (); - - if (root) { - delete root; - } -} - - -bool pq_tree::bubble_up (std::list& leaves) -{ - std::queue qu; - int block_count = 0; - int blocked_siblings = 0; - pert_leaves_count = 0; - int off_the_top = 0; - pq_node* tmp; - - assert (clear_me.empty()); - - std::list::const_iterator it = leaves.begin(); - std::list::const_iterator lend = leaves.end(); - - while (it != lend) { - qu.push (*it); - (*it)->lpos = clear_me.insert (clear_me.end(), *it); - pert_leaves_count++; - ++it; - } - - sons_iterator next, prev, end; - pq_node* father = nullptr; - int size = pert_leaves_count; - - while (size + block_count + off_the_top > 1) { - if (size == 0) { - return false; - } - - tmp = qu.front(); - qu.pop(); - size--; - tmp->pert_leaves = 0; - - if (tmp == root) { - off_the_top = 1; - tmp->mark = pq_node::UNBLOCKED; - continue; - } - - tmp->mark = pq_node::BLOCKED; - - if (tmp->is_endmost) { - father = tmp->father; - tmp->mark = pq_node::UNBLOCKED; - end = father->sons.end(); - - if (father->kind() == pq_node::Q_NODE) { - blocked_siblings = 0; - next = tmp->pos; - prev = tmp->pos; - ++next; - --prev; - - if (next != end) { - if ((*next)->mark == pq_node::BLOCKED) { - ++blocked_siblings; - } - } else if (prev != end) { - if ((*prev)->mark == pq_node::BLOCKED) { - ++blocked_siblings; - } - } - } - - } else { - next = tmp->pos; - prev = tmp->pos; - ++next; - --prev; - blocked_siblings = 0; - - if ((*prev)->mark == pq_node::UNBLOCKED) { - tmp->mark = pq_node::UNBLOCKED; - tmp->father = (*prev)->father; - father = tmp->father; - end = father->sons.end(); - } else if ((*prev)->mark == pq_node::BLOCKED) { - blocked_siblings++; - } - - if ((*next)->mark == pq_node::UNBLOCKED) { - tmp->mark = pq_node::UNBLOCKED; - tmp->father = (*next)->father; - father = tmp->father; - end = father->sons.end(); - } else if ((*next)->mark == pq_node::BLOCKED) { - blocked_siblings++; - } - } - - if (tmp->mark == pq_node::UNBLOCKED) { - ++(father->pert_children); - - if (father->mark == pq_node::UNMARKED) { - qu.push (father); - father->lpos = clear_me.insert (clear_me.end(), father); - size++; - father->mark = pq_node::QUEUED; - } - - if (father->kind() == pq_node::Q_NODE) { - pq_node* tmp; - - while (next != end) { - tmp = *next; - if (tmp->mark == pq_node::BLOCKED) { - tmp->father = father; - tmp->mark = pq_node::UNBLOCKED; - - if (tmp->kind () != pq_node::DIR) { - ++(father->pert_children); - } - } else if (tmp->kind () == pq_node::DIR && - tmp->mark == pq_node::UNMARKED) { - tmp->lpos = clear_me.insert (clear_me.end(), tmp); - tmp->father = father; - tmp->mark = pq_node::UNBLOCKED; - } else { - break; - } - - ++next; - } - - while (prev != end) { - tmp = *prev; - if (tmp->mark == pq_node::BLOCKED) { - tmp->father = father; - tmp->mark = pq_node::UNBLOCKED; - - if (tmp->kind () != pq_node::DIR) { - ++(father->pert_children); - } - } else if (tmp->kind () == pq_node::DIR && - tmp->mark == pq_node::UNMARKED) { - tmp->lpos = clear_me.insert (clear_me.end(), tmp); - tmp->father = father; - tmp->mark = pq_node::UNBLOCKED; - } else { - break; - } - - --prev; - } - - block_count -= blocked_siblings; - } - - } else { - - // - // tmp is BLOCKED - // - - while ((*next)->kind() == pq_node::DIR && - (*next)->mark == pq_node::UNMARKED) { - (*next)->mark = pq_node::BLOCKED; - (*next)->lpos = clear_me.insert (clear_me.end(), *next); - ++next; - } - - while ((*prev)->kind() == pq_node::DIR && - (*prev)->mark == pq_node::UNMARKED) { - (*prev)->mark = pq_node::BLOCKED; - (*prev)->lpos = clear_me.insert (clear_me.end(), *prev); - --prev; - } - - block_count += 1 - blocked_siblings; - } - } - - return true; -} - - -pq_node* pq_tree::where_bubble_up_failed (std::list& leaves) -{ - - // - // Search the first leaf that leads to an interior block. - // - - pq_leaf* le; - pq_node* blocked = 0; - - std::list::iterator l_it = leaves.begin(); - std::list::iterator l_end = leaves.end(); - q_node* father = 0; - - while (l_it != l_end ) { - le = *l_it; - blocked = leads_to_blocked (le); - - if (blocked != 0) { - // - // Search the father of this block. - // - - sons_iterator it = blocked->pos; - while (!(*it)->is_endmost) { - ++it; - } - - father = (*it)->father->Q(); - - // - // give all the children the right father. - // - - it = father->sons.begin(); - sons_iterator end = father->sons.end(); - - while (it != end) { - if ((*it)->mark == pq_node::BLOCKED) { - (*it)->father = father; - (*it)->mark = pq_node::UNBLOCKED; - if ((*it)->kind() != pq_node::DIR) { - ++(father->pert_children); - } - } - - ++it; - } - - // - // We have to assure that there isn't any other interior block in the - // subtree of father. - // - - pq_node* another = blocked_in_subtree (father); - - if (another == 0) { - break; - } - } - - ++l_it; - } - - assert (father != 0); - - // - // delete all pertinent leaves that do not lead to father - // - - l_it = leaves.begin(); - - while (l_it != l_end ) { - le = *l_it; - - if (!leads_to (le, father)) { - l_it = leaves.erase (l_it); - } else { - ++l_it; - } - } - - return father; -} - - -pq_node* pq_tree::blocked_in_subtree (pq_node* n) -{ - if (n->kind() == pq_node::LEAF) { - return 0; - } - - if (n->mark == pq_node::BLOCKED) { - return n; - } - - sons_iterator it = n->sons.begin(); - sons_iterator end = n->sons.end(); - - while (it != end) { - pq_node* bl = blocked_in_subtree (*it); - - if (bl) { - return bl; - } - - ++it; - } - - return 0; -} - - -bool pq_tree::leads_to (pq_node* le, pq_node* other) -{ - if (le == root) { - return false; - } else if (le->mark == pq_node::BLOCKED) { - return false; - } else if (le->mark == pq_node::UNMARKED) { - return false; - } else if (le->father == other) { - return true; - } else { - return leads_to (le->father, other); - } -} - -pq_node* pq_tree::leads_to_blocked (pq_node* le) -{ - if (le == root) { - return 0; - } else if (le->mark == pq_node::BLOCKED) { - return le; - } else if (le->mark == pq_node::UNMARKED) { - return 0; - } else { - return leads_to_blocked (le->father); - } -} - - -bool pq_tree::reduce (std::list& leaves) -{ - - GTL_debug::debug_message ("REDUCING %d\n", leaves.front()->id); - fail = 0; - - if (!bubble_up (leaves)) { - - // - // Find the node that has an interior block. - // - - GTL_debug::debug_message ("Bubble-Up failed !!\n"); - fail = where_bubble_up_failed (leaves); - } - - std::queue qu; - pq_leaf* le; - std::list::iterator l_it = leaves.begin(); - std::list::iterator l_end = leaves.end(); - - while (l_it != l_end ) { - le = *l_it; - qu.push (le); - le->pert_leaves = 1; - ++l_it; - } - - pq_node* tmp; - - while (!qu.empty()) { - tmp = qu.front(); - qu.pop(); - clear_me.erase (tmp->lpos); - - if (tmp->mark == pq_node::BLOCKED) { - pseudo = new q_node (GTL::node(), 0); - sons_iterator past = tmp->pos; - - // - // Get maximal connected block of BLOCKED siblings right of tmp - // - - while ((*past)->mark == pq_node::BLOCKED) { - (*past)->mark = pq_node::UNBLOCKED; - (*past)->father = pseudo; - - if ((*past)->kind() != pq_node::DIR) { - pseudo->pert_children++; - } - - ++past; - } - - - // - // Delete surrounding direction indicators - // - - --past; - - while ((*past)->kind() == pq_node::DIR) { - (*past)->clear(); - clear_me.erase ((*past)->lpos); - --past; - } - - pseudo->pert_end = past; - - // - // Get maximal connected block of BLOCKED siblings left of tmp - // - - sons_iterator first = tmp->pos; - --first; - - while ((*first)->mark == pq_node::BLOCKED) { - (*first)->mark = pq_node::UNBLOCKED; - (*first)->father = pseudo; - - if ((*first)->kind() != pq_node::DIR) { - pseudo->pert_children++; - } - - --first; - } - - - // - // Delete surrounding direction indicators - // - - ++first; - - while ((*first)->kind() == pq_node::DIR) { - (*first)->clear(); - clear_me.erase ((*first)->lpos); - ++first; - } - - pseudo->pert_begin = first; - - GTL_debug::debug_message ("creating pseudo-node as root\n"); - pseudo->mark = pq_node::UNBLOCKED; - ++past; - pseudo->sons.attach_sublist (first, past); - pseudo->pert_cons = true; - pseudo->lpos = clear_me.insert (clear_me.end(), pseudo); - } - - if (tmp->pert_leaves == pert_leaves_count) { - - // - // tmp is the root of the pertinent subtree - // - - if (tmp->kind() == pq_node::LEAF) { - pert_root = tmp; - GTL_debug::debug_message ("full leaf is root\n"); - } else if (tmp->kind() == pq_node::P_NODE) { - if (P1 (tmp->P(), true)) { - GTL_debug::debug_message ("P1 matched for root\n"); - } else if (P2 (tmp->P())) { - GTL_debug::debug_message ("P2 matched for root\n"); - } else if (P4 (tmp->P())) { - GTL_debug::debug_message ("P4 matched for root\n"); - } else if (P6 (tmp->P())) { - GTL_debug::debug_message ("P6 matched for root\n"); - } else { - GTL_debug::debug_message ("NO MATCHING FOR P-ROOT\n"); - fail = tmp; - failed_at_root = true; - return false; - } - } else { - if (!tmp->Q()->pert_cons) { - GTL_debug::debug_message ("pertinent children not consecutive\n"); - fail = tmp; - failed_at_root = true; - return false; - } else if (Q1 (tmp->Q(), true)) { - GTL_debug::debug_message ("Q1 matched for root\n"); - } else if (Q2 (tmp->Q(), true)) { - GTL_debug::debug_message ("Q2 matched for root\n"); - } else if (Q3 (tmp->Q())) { - GTL_debug::debug_message ("Q3 matched for root\n"); - } else { - GTL_debug::debug_message ("NO MATCHING FOR Q-ROOT\n"); - - if (tmp == pseudo) { - - // - // search the real father - // - - sons_iterator it = pseudo->sons.begin(); - pseudo->sons.front()->is_endmost = false; - pseudo->sons.back()->is_endmost = false; - pseudo->sons.detach_sublist(); - assert (pseudo->sons.empty()); - - while (!(*it)->is_endmost) { - --it; - } - - tmp = (*it)->father; - q_node* q_tmp = tmp->Q(); - q_tmp->pert_begin = pseudo->pert_begin; - q_tmp->pert_end = pseudo->pert_end; - q_tmp->partial_count = pseudo->partial_count; - q_tmp->full_count = pseudo->full_count; - q_tmp->pert_cons = pseudo->pert_cons; - - for (int i = 0; i < q_tmp->partial_count; ++i) { - q_tmp->partial_pos[i] = pseudo->partial_pos[i]; - } - - delete pseudo; - pseudo = 0; - } - - fail = tmp; - failed_at_root = true; - return false; - } - } - - } else { - - // - // tmp is not the root of the pertinent subtree. - // - - if (tmp == pseudo || tmp == root) { - - // - // This should not happen when bubble_up was true. - // - - assert (false); - - } else { - pq_node* father = tmp->father; - - if (tmp->kind() == pq_node::LEAF) { - father->full (tmp->pos); - tmp->clear(); - GTL_debug::debug_message ("full leaf processed\n"); - - } else if (tmp->kind() == pq_node::P_NODE) { - if (P1 (tmp->P(), false)) { - GTL_debug::debug_message ("P1 matched for non-root\n"); - } else if (P3 (tmp->P())) { - GTL_debug::debug_message ("P3 matched for non-root\n"); - } else if (P5 (tmp->P())) { - GTL_debug::debug_message ("P5 matched for non-root\n"); - } else { - GTL_debug::debug_message ("NO MATCHING FOR P-NON-ROOT\n"); - fail = tmp; - failed_at_root = false; - return false; - } - - } else { - if (!tmp->Q()->pert_cons) { - GTL_debug::debug_message ("pertinent children not consecutive\n"); - fail = tmp; - return false; - } else if (Q1 (tmp->Q(), false)) { - GTL_debug::debug_message ("Q1 matched for non-root\n"); - } else if (Q2 (tmp->Q(), false)) { - GTL_debug::debug_message ("Q2 matched for non-root\n"); - } else { - GTL_debug::debug_message ("NO MATCHING FOR Q-NON-ROOT\n"); - fail = tmp; - failed_at_root = false; - return false; - } - } - - - // - // If all the other pertinent siblings of tmp have already been - // matched father of tmp is queued. - // - - --(father->pert_children); - - if (father->pert_children == 0) { - if (father == fail) { - failed_at_root = false; - return false; - } else { - qu.push (father); - } - } - } - } - } - - return true; -} - - -void pq_tree::reset () -{ - pq_node* tmp; - - while (!clear_me.empty()) { - tmp = clear_me.front(); - GTL_debug::debug_message ("Clearing %d\n", tmp->id); - clear_me.pop_front(); - tmp->clear(); - tmp->pert_children = 0; - } - - if (pert_root) { - pert_root->clear(); - pert_root = 0; - } - - if (pseudo) { - pseudo->sons.front()->is_endmost = false; - pseudo->sons.back()->is_endmost = false; - pseudo->sons.detach_sublist(); - assert (pseudo->sons.empty()); - delete pseudo; - pseudo = 0; - } - - if (fail) { - fail->clear(); - fail = 0; - } -} - - -void pq_tree::dfs (pq_node* act, planar_embedding& em, - std::list& dirs) -{ - if (act->kind() == pq_node::LEAF) { - em.push_back (act->n, ((pq_leaf*) act)->e); - return; - } - - sons_iterator it = act->sons.begin(); - sons_iterator end = act->sons.end(); - - while (it != end) { - if ((*it)->kind() == pq_node::DIR) { - direction_indicator* dir = (*it)->D(); - if (dir->mark != pq_node::UNMARKED) { - clear_me.erase (dir->lpos); - } - sons_iterator tmp = it; - - if (++tmp == ++(dir->pos)) { - dir->direction = true; - } else { - dir->direction = false; - } - - dirs.push_back (*dir); - - } else { - dfs (*it, em, dirs); - } - - ++it; - } -} - - -void pq_tree::replace_pert (int id, GTL::node _n, const std::list& li, - planar_embedding* em, std::list* dirs) -{ - assert (pert_root); - assert (!li.empty()); - pq_leaf* tmp = 0; - std::list::const_iterator it; - std::list::const_iterator end = li.end(); - sons_list sons; - int size = 0; - - for (it = li.begin(); it != end; ++it) { - tmp = *it; - tmp->pos = sons.insert (sons.end(), tmp); - ++size; - } - - pq_node* ins; - - if (size == 1) { - sons.erase (tmp->pos); - ins = tmp; - } else { - ins = new p_node(_n, id, sons); - } - - if (pert_root->kind() == pq_node::Q_NODE) { - q_node* q_root = pert_root->Q(); - sons_iterator it = q_root->pert_begin; - sons_iterator end = q_root->pert_end; - sons_iterator tmp = it; - sons_iterator sons_end = q_root->sons.end(); - --tmp; - - while (tmp != sons_end) { - if ((*tmp)->kind() != pq_node::DIR) { - break; - } - - --tmp; - } - - it = ++tmp; - - tmp = end; - ++tmp; - - while (tmp != sons_end) { - if ((*tmp)->kind() != pq_node::DIR) { - break; - } - - ++tmp; - } - - end = --tmp; - - ins->is_endmost = (*end)->is_endmost; - ++end; - - while (it != end) { - if (em && dirs) { - if ((*it)->kind() == pq_node::DIR) { - direction_indicator* dir = (*it)->D(); - clear_me.erase (dir->lpos); - sons_iterator tmp = it; - - if (++tmp == ++(dir->pos)) { - dir->direction = true; - } else { - dir->direction = false; - } - - dirs->push_back (*dir); - } else { - dfs (*it, *em, *dirs); - } - } - - delete *it; - it = pert_root->sons.erase (it); - } - - if (pert_root->sons.empty() && pert_root != pseudo) { - ins->pos = pert_root->pos; - ins->father = pert_root->father; - ins->is_endmost = pert_root->is_endmost; - ins->up = pert_root->up; - ins->up_id = pert_root->up_id; - - if (root == pert_root) { - root = ins; - } else { - *(pert_root->pos) = ins; - } - - delete pert_root; - pert_root = 0; - - } else { - if (em && dirs) { - direction_indicator* ind = new direction_indicator (_n, id); - ind->is_endmost = false; - ind->pos = pert_root->sons.insert (end, ind); - } - - ins->pos = pert_root->sons.insert (end, ins); - ins->father = pert_root; - ins->up = _n; - ins->up_id = id; - } - - } else { - if (em && dirs) { - dfs (pert_root, *em, *dirs); - } - - ins->is_endmost = pert_root->is_endmost; - ins->father = pert_root->father; - ins->pos = pert_root->pos; - ins->up = pert_root->up; - ins->up_id = pert_root->up_id; - - if (root == pert_root) { - root = ins; - } else { - *(pert_root->pos) = ins; - } - - delete pert_root; - pert_root = 0; - } -} - -void pq_tree::get_frontier (planar_embedding& em, - std::list& dirs) -{ - dfs (root, em, dirs); -} - -//------------------------------------------------------------------------ P1 -// Requirements: -// -// * x is a P-node having only full children -// * wheter x is the root or not is specified by the second parameter -// - -bool pq_tree::P1 (p_node* x, bool is_root) -{ - if (x->child_count == x->full_count) { - if (!is_root) { - x->father->full (x->pos); - } else { - pert_root = x; - } - - x->sons.splice (x->sons.end(), x->full_sons.begin(), - x->full_sons.end()); - x->clear(); - return true; - } - - return false; -} - - -//----------------------------------------------------------------------- P2 -// Requirements: -// -// * x is a P-node having both full and empty children -// * x has no partial children -// * x is the root of the pertinent subtree -// ==> more than one pertinent child -// * P1 didn't match -// ==> at least one non-full child -// -bool pq_tree::P2 (p_node* x) -{ - if (x->partial_count != 0) { - return false; - } - - p_node* ins = new p_node(x->n, x->id, x->full_sons); - ins->father = x; - ins->is_endmost = true; - ins->pos = x->sons.insert (x->sons.end(), ins); - x->child_count -= (x->full_count - 1); - x->clear(); - pert_root = ins; - return true; -} - - -//------------------------------------------------------------------------ P3 -// Requirements: -// -// * x is a P-node having both full and empty children. -// * x isn't the root of the pertinent subtree. -// * P1 didn't match. -// ==> at least one non-full child. -// * x has no partial children -// - -bool pq_tree::P3 (p_node* x) -{ - if (x->partial_count != 0) { - return false; - } - - q_node* new_q = new q_node (x->n, x->id); - pq_node* father = x->father; - pq_node* ins; - - *(x->pos) = new_q; - new_q->pos = x->pos; - new_q->up = x->up; - new_q->up_id = x->up_id; - new_q->is_endmost = x->is_endmost; - new_q->father = father; - new_q->pert_leaves = x->pert_leaves; - - if (x->full_count > 1) { - ins = new p_node (x->n, x->id, x->full_sons); - } else { - ins = x->full_sons.front(); - x->full_sons.erase (x->full_sons.begin()); - assert (x->full_sons.empty()); - } - - ins->up = x->n; - ins->up_id = x->id; - ins->is_endmost = true; - ins->father = new_q; - ins->pos = new_q->sons.insert (new_q->sons.end(), ins); - new_q->pert_cons = true; - new_q->pert_begin = ins->pos; - new_q->pert_end = ins->pos; - - if (x->child_count - x->full_count > 1) { - ins = x; - ins->up = x->n; - ins->up_id = x->id; - x->child_count -= x->full_count; - x->clear(); - } else { - ins = x->sons.front(); - ins->up = x->n; - ins->up_id = x->id; - x->sons.erase (x->sons.begin()); - assert (x->sons.empty()); - delete x; - } - - ins->is_endmost = true; - ins->father = new_q; - ins->pos = new_q->sons.insert (new_q->pert_begin, ins); - father->partial (new_q->pos); - - return true; -} - -//------------------------------------------------------------------------ P4 -// Requirements: -// -// * x is a P-node and the root of the pertinent subtree. -// ==> more than one non-empty child, i.e. at least one full child. -// * x has excactly one partial child -// * P1 and P2 didn't match -// ==> at least one partial child -// -bool pq_tree::P4 (p_node* x) -{ - if (x->partial_count > 1) { - return false; - } - - q_node* part = x->partial_sons.front()->Q(); - part->n = x->n; - part->id = x->id; - pq_node* ins; - - if (x->full_count > 1) { - ins = new p_node (x->n, x->id, x->full_sons); - } else { - ins = x->full_sons.front(); - x->full_sons.erase (x->full_sons.begin()); - assert (x->full_sons.empty()); - } - - part->sons.back()->is_endmost = false; - ins->is_endmost = true; - - ins->up = x->n; - ins->up_id = x->id; - ins->father = part; - ins->pos = part->sons.insert (part->sons.end(), ins); - part->pert_end = ins->pos; - x->child_count -= x->full_count; - - if (x->child_count == 1) { - if (root == x) { - root = part; - } else { - *(x->pos) = part; - } - - part->pos = x->pos; - part->is_endmost = x->is_endmost; - part->father = x->father; - part->up = x->up; - part->up_id = x->up_id; - x->partial_sons.erase (x->partial_sons.begin()); - - delete x; - } else { - x->sons.splice (x->sons.end(), part->pos); - x->clear(); - } - - pert_root = part; - return true; -} - - -//------------------------------------------------------------------------ P5 -// Requirements: -// -// * x is a P-node and not the root of the pertinent subtree -// * x has exactly one partial child. -// * P1 and P3 didn't match -// ==> at least one partial child -// -bool pq_tree::P5 (p_node* x) -{ - if (x->partial_count > 1) { - return false; - } - - pq_node* father = x->father; - q_node* part = x->partial_sons.front()->Q(); - part->n = x->n; - part->id = x->id; - part->up = x->up; - part->up_id = x->up_id; - - x->partial_sons.erase (x->partial_sons.begin()); - part->is_endmost = x->is_endmost; - part->father = father; - *(x->pos) = part; - part->pos = x->pos; - part->pert_leaves = x->pert_leaves; - pq_node* ins; - - if (x->full_count > 1) { - ins = new p_node (x->n, x->id, x->full_sons); - } else if (x->full_count == 1) { - ins = x->full_sons.front(); - x->full_sons.erase (x->full_sons.begin()); - assert (x->full_sons.empty()); - } else { - ins = 0; - } - - if (ins) { - ins->up = x->n; - ins->up_id = x->id; - part->sons.back()->is_endmost = false; - ins->is_endmost = true; - ins->father = part; - ins->pos = part->sons.insert (part->sons.end(), ins); - part->pert_end = ins->pos; - } - - x->child_count -= (x->full_count + 1); - - if (x->child_count > 1) { - ins = x; - ins->up = x->n; - ins->up_id = x->id; - x->clear(); - } else if (x->child_count == 1) { - ins = x->sons.front(); - ins->up = x->n; - ins->up_id = x->id; - x->sons.erase (x->sons.begin()); - delete x; - } else { - ins = 0; - delete x; - } - - if (ins) { - part->sons.front()->is_endmost = false; - ins->is_endmost = true; - ins->father = part; - ins->pos = part->sons.insert (part->sons.begin(), ins); - } - - father->partial (part->pos); - return true; -} - - -//------------------------------------------------------------------------ P6 -// Requirements: -// -// * x is the root of the pertinent subtree and has two partial children. -// * P1, P2 and P4 didn't match -// ==> at least two partial children. -// -bool pq_tree::P6 (p_node* x) -{ - if (x->partial_count > 2) { - return false; - } - - - q_node* part2 = x->partial_sons.front()->Q(); - x->partial_sons.erase (x->partial_sons.begin()); - q_node* part1 = x->partial_sons.front()->Q(); - part1->n = x->n; - part1->id = x->id; - pq_node* ins; - - if (x->full_count > 1) { - ins = new p_node (x->n, x->id, x->full_sons); - } else if (x->full_count == 1) { - ins = x->full_sons.front(); - x->full_sons.erase (x->full_sons.begin()); - assert (x->full_sons.empty()); - } else { - ins = 0; - } - - part1->sons.back()->is_endmost = false; - - if (ins) { - ins->up = x->n; - ins->up_id = x->id; - ins->is_endmost = false; - ins->pos = part1->sons.insert (part1->sons.end(), ins); - } - - part2->turn (); - part2->sons.front()->is_endmost = false; - part2->sons.back()->father = part1; - part1->sons.splice (part1->sons.end(), part2->sons.begin(), - part2->sons.end()); - part1->pert_end = part2->pert_begin; - part1->pert_end.reverse(); - x->child_count -= (x->full_count + 1); - delete part2; - - if (x->child_count == 1) { - if (root == x) { - root = part1; - } else { - *(x->pos) = part1; - } - part1->pos = x->pos; - part1->is_endmost = x->is_endmost; - part1->father = x->father; - part1->up = x->up; - part1->up_id = x->up_id; - x->partial_sons.erase (x->partial_sons.begin()); - - delete x; - } else { - x->sons.splice (x->sons.end(), x->partial_sons.begin()); - x->clear(); - } - - pert_root = part1; - return true; -} - -//------------------------------------------------------------------------ Q1 -// Requirements: -// -// * x is a Q-node having only full children -// * wheter x is the root or not is specified by the second parameter -// -bool pq_tree::Q1 (q_node* x, bool is_root) -{ - if (x->partial_count > 0) return false; - - if (*(x->pert_begin) == x->sons.front() - && *(x->pert_end) == x->sons.back()) { - - if (!is_root) { - x->father->full (x->pos); - } else { - pert_root = x; - } - - return true; - } - - return false; -} - - -//------------------------------------------------------------------------ Q2 -// Requirements: -// -// * Q1 didn't match ==> x has at least one non-full child. -// * wheter x is the root or not is specified by the second parameter -// * x has at most one partial child -// * If x has empty children, the partial child must be at pert_begin; -// if x hasn't any empty children the partial child is allowed to be at -// pert_end, since this can be transformed into the former case. -// -bool pq_tree::Q2 (q_node* x, bool is_root) -{ - if (x->partial_count > 1) { - return false; - } - - if (x->partial_count == 1) { - if (x->partial_pos[0] == x->pert_end && - x->pert_begin == x->sons.begin() && - x->pert_begin != x->pert_end) - { - if (!is_root) { - q_node* part = (*(x->pert_end))->Q(); - x->turn(); - sons_iterator tmp = x->pert_begin; - x->pert_begin = x->pert_end; - x->pert_end = tmp; - x->pert_begin.reverse(); - x->pert_end.reverse(); - x->merge (x->pert_begin); - x->pert_begin = part->pert_begin; - delete part; - } else { - q_node* part = (*(x->pert_end))->Q(); - part->turn(); - x->merge (x->pert_end); - x->pert_end = x->pert_begin; - x->pert_begin = part->pert_begin; - x->pert_end.reverse(); - // x->pert_begin.reverse(); - delete part; - } - - } else if (x->partial_pos[0] != x->pert_begin) { - return false; - } else { - // - // Partial child is at pert_begin and x has at least one - // empty child (i.e. pert_begin != sons.begin()) - // - - q_node* part = x->merge (x->pert_begin); - - if (x->pert_begin == x->pert_end) { - x->pert_end = part->pert_end; - } - - x->pert_begin = part->pert_begin; - delete part; - } - } - - if (!is_root) { - x->father->partial (x->pos); - } else { - pert_root = x; - } - - return true; -} - - -//------------------------------------------------------------------------ Q3 -// Requirements: -// -// * x is the root of the pertinent subtree. -// * Q1 and Q2 didn't match -// ==> at least one partial child -// * if there is only one partial child it must be at pert_end, and x must -// have at least one empty and one full child. -// if there are two partial children they must be at pert_begin and -// pert_end. -// -bool pq_tree::Q3 (q_node* x) -{ - if (x->partial_count > 2 || x->partial_count < 1) return false; - - if (x->partial_count == 1) { - if (x->partial_pos[0] != x->pert_end) return false; - - // - // One partial child at pert_end. - // - - } else { - if (x->partial_pos[0] != x->pert_end) { - if (x->partial_pos[1] != x->pert_end || - x->partial_pos[0] != x->pert_begin) return false; - } else { - if (x->partial_pos[1] != x->pert_begin) return false; - } - - // - // One partial child at pert_begin and one at pert_end - // - } - - q_node* part = (*(x->pert_end))->Q(); - part->turn(); - x->merge (x->pert_end); - x->pert_end = part->pert_begin; - x->pert_end.reverse(); - delete part; - - if (x->partial_count == 2) { - part = x->merge (x->pert_begin); - x->pert_begin = part->pert_begin; - delete part; - } - - pert_root = x; - return true; -} - - - - -GTL_EXTERN std::ostream& operator<< (std::ostream& os, const pq_tree& tree) -{ - if (!tree.root) return os; - - int id = 0; - std::pair tmp; - std::queue > qu; - pq_node* act; - - os << "graph [\n" << "directed 1" << std::endl; - tree.root->write (os, id); - tmp.first = tree.root; - tmp.second = id; - ++id; - qu.push (tmp); - - while (!qu.empty()) { - tmp = qu.front(); - qu.pop(); - - if (tmp.first->kind() == pq_node::Q_NODE || tmp.first->kind() == pq_node::P_NODE) { - pq_tree::sons_iterator it = tmp.first->sons.begin(); - pq_tree::sons_iterator end = tmp.first->sons.end(); - - for (; it != end; ++it) { - act = *it; - act->write (os, id); - - os << "edge [\n" << "source " << tmp.second << std::endl; - os << "target " << id << "\n]" << std::endl; - - qu.push(std::pair(act, id)); - ++id; - } - } - - if (tmp.first->kind() == pq_node::P_NODE) { - p_node* P = tmp.first->P(); - pq_tree::sons_iterator it = P->full_sons.begin(); - pq_tree::sons_iterator end = P->full_sons.end(); - - for (; it != end; ++it) { - act = *it; - act->write (os, id); - - os << "edge [\n" << "source " << tmp.second << std::endl; - os << "target " << id << "\n]" << std::endl; - - qu.push(std::pair(act, id)); - ++id; - } - - it = P->partial_sons.begin(); - end = P->partial_sons.end(); - - for (; it != end; ++it) { - act = *it; - act->write (os, id); - - os << "edge [\n" << "source " << tmp.second << std::endl; - os << "target " << id << "\n]" << std::endl; - - qu.push(std::pair(act, id)); - ++id; - } - } - } - - os << "]" << std::endl; - - return os; -} - -pq_tree::sons_iterator -pq_tree::remove_dir_ind(q_node* q_fail, sons_iterator s_it) -{ - direction_indicator* dir = (*s_it)->D(); - sons_iterator res = q_fail->sons.erase(s_it); - clear_me.erase(dir->lpos); - delete dir; - return res; -} - - -//-------------------------------------------------------------------------- -// DEBUGGING -//-------------------------------------------------------------------------- - -bool pq_tree::integrity_check () const -{ - if (!root) return true; - - std::queue qu; - qu.push (root); - pq_node* tmp; - - while (!qu.empty()) { - tmp = qu.front(); - qu.pop(); - - if (tmp->kind() == pq_node::LEAF) continue; - if (tmp->kind() == pq_node::DIR) continue; - - sons_iterator it = tmp->sons.begin(); - sons_iterator end = tmp->sons.end(); - int count = 0; - int endmost_count = 0; - - for (; it != end; ++it) { - ++count; - if ((*it)->is_endmost) { - ++endmost_count; - - if ((*it)->father != tmp) { - GTL_debug::debug_message ("Wrong father !!!\n"); - GTL_debug::close_debug(); - return false; - } - } - - if ((*it)->pos != it) { - GTL_debug::debug_message ("Wrong position !!\n"); - GTL_debug::close_debug(); - return false; - } - - qu.push (*it); - } - - if (tmp->kind() == pq_node::P_NODE - && count != (tmp->P()->child_count)) { - GTL_debug::debug_message ("Wrong number of children !!!\n"); - GTL_debug::close_debug(); - return false; - } - - if (tmp->kind() == pq_node::Q_NODE && count < 2) { - GTL_debug::debug_message ("Q-Node with too few children !!\n"); - GTL_debug::close_debug(); - return false; - } - - if (tmp->kind() == pq_node::P_NODE && count < 2) { - GTL_debug::debug_message ("P-Node with too few children !!\n"); - GTL_debug::close_debug(); - return false; - } - - if (tmp->kind() == pq_node::Q_NODE) { - if (endmost_count == 2) { - if (!(tmp->sons.front()->is_endmost && - tmp->sons.back()->is_endmost)) { - GTL_debug::debug_message ("Q-node with inner children labeled endmost\n"); - GTL_debug::close_debug(); - return false; - } - } else { - GTL_debug::debug_message ("Q-node with too many or too few endmost children\n"); - GTL_debug::close_debug(); - return false; - } - } - } - - return true; -} - -/* -void pq_tree::insert (pq_node* father, pq_node* ins) { - ins->father = father; - ins->is_endmost = true; - - if (father->kind() == pq_node::Q_NODE) { - father->sons.back()->is_endmost = false; - } else { - ((p_node*)father)->child_count++; - } - - ins->pos = father->sons.insert (father->sons.end(), ins); -} - - -p_node* pq_tree::insert_P (pq_node* father, sons_list& sons) -{ - p_node* p = new p_node(); - insert (father, p); - pq_node* tmp; - - sons_iterator it = sons.begin(); - sons_iterator end = sons.end(); - - for (; it != end; ++it) { - p->child_count++; - tmp = *it; - tmp->father = p; - tmp->is_endmost = true; - tmp->pos = p->sons.insert (p->sons.end(), tmp); - - if (tmp->kind() == pq_node::LEAF) { - leaves.push_back ((pq_leaf*)tmp); - } - } - - return p; -} - - -q_node* pq_tree::insert_Q (pq_node* father, sons_list& sons) -{ - q_node* q = new q_node(); - insert (father, q); - pq_node* tmp; - sons_iterator it = sons.begin(); - sons_iterator end = sons.end(); - - for (; it != end; ++it) { - tmp = *it; - tmp->is_endmost = false; - tmp->pos = q->sons.insert (q->sons.end(), tmp); - - if (tmp->kind() == pq_node::LEAF) { - leaves.push_back (tmp->L()); - } - } - - q->sons.front()->father = q; - q->sons.front()->is_endmost = true; - q->sons.back()->father = q; - q->sons.back()->is_endmost = true; - - return q; -} - -*/ - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/ratio_cut_partition.cpp b/src/Tracker/graph/GTL/src/ratio_cut_partition.cpp deleted file mode 100644 index f238ed481..000000000 --- a/src/Tracker/graph/GTL/src/ratio_cut_partition.cpp +++ /dev/null @@ -1,1579 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// ratio_cut_partition.cpp -// -//========================================================================== -// $Id: ratio_cut_partition.cpp,v 1.9 2001/11/07 13:58:11 pick Exp $ - -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -__GTL_BEGIN_NAMESPACE - - -const ratio_cut_partition::side_type ratio_cut_partition::A = 0; -const ratio_cut_partition::side_type ratio_cut_partition::B = 1; - - -const ratio_cut_partition::fix_type ratio_cut_partition::FIXA = 0; -const ratio_cut_partition::fix_type ratio_cut_partition::FIXB = 1; -const ratio_cut_partition::fix_type ratio_cut_partition::UNFIXED = 2; - - -ratio_cut_partition::ratio_cut_partition() -{ - set_vars_executed = false; - enable_cut_edges_storing = false; - enable_nodesAB_storing = false; -} - - -ratio_cut_partition::~ratio_cut_partition() -{ -} - - -void ratio_cut_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - set_vars_executed = true; - provided_st = false; - provided_fix = false; - this->fixed.init(G, UNFIXED); - provided_initial_part = false; - side.init(G); -} - - -void ratio_cut_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight, - const node source_node, const node target_node) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->source_node = source_node; - this->target_node = target_node; - set_vars_executed = true; - provided_st = true; - provided_fix = false; - this->fixed.init(G, UNFIXED); - provided_initial_part = false; - side.init(G); -} - - -void ratio_cut_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight, - const node source_node, const node target_node, - const node_map& init_side) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->source_node = source_node; - this->target_node = target_node; - this->side = init_side; - set_vars_executed = true; - provided_st = true; - provided_fix = false; - this->fixed.init(G, UNFIXED); - provided_initial_part = true; -} - - -void ratio_cut_partition::set_vars(const graph& G, - const node_map& node_weight, const edge_map& edge_weight, - const node source_node, const node target_node, - const node_map& fixed) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->source_node = source_node; - this->target_node = target_node; - this->fixed = fixed; - set_vars_executed = true; - provided_st = true; - provided_fix = true; - provided_initial_part = false; - side.init(G); -} - - -void ratio_cut_partition::set_vars(const graph& /*G*/, - const node_map& node_weight, const edge_map& edge_weight, - const node source_node, const node target_node, - const node_map& init_side, const node_map& fixed) -{ - this->node_weight = node_weight; - this->edge_weight = edge_weight; - this->source_node = source_node; - this->target_node = target_node; - this->side = init_side; - this->fixed = fixed; - set_vars_executed = true; - provided_st = true; - provided_fix = true; - provided_initial_part = true; -} - - -void ratio_cut_partition::store_cut_edges(const bool set) -{ - enable_cut_edges_storing = set; -} - - -void ratio_cut_partition::store_nodesAB(const bool set) -{ - enable_nodesAB_storing = set; -} - - -int ratio_cut_partition::check(GTL::graph& G) -{ - if ((!set_vars_executed) || (!G.is_undirected())) - { - return GTL_ERROR; - } - - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (edge_weight[*edge_it] < 0) - { - return GTL_ERROR; - } - ++edge_it; - } - int real_node_weights = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (node_weight[*node_it] > 0) - { - ++real_node_weights; - } - if (node_weight[*node_it] < 0) - { - return GTL_ERROR; - } - ++node_it; - } - if ((G.number_of_nodes() >= 2) && (real_node_weights < 2)) - { - return GTL_ERROR; - } - - if ((provided_st) && (source_node == target_node) && - (G.number_of_nodes() > 1)) - { - return GTL_ERROR; - } - - if ((provided_initial_part) && ((side[source_node] != A) || - (side[target_node] != B))) - { - return GTL_ERROR; - } - - if ((provided_fix) && ((fixed[source_node] == FIXB) || - (fixed[target_node] == FIXA))) - { - return GTL_ERROR; - } - - if ((provided_st) && (node_weight[source_node] == 0 || - node_weight[target_node] == 0)) - { - return GTL_ERROR; - } - - return GTL_OK; -} - - -int ratio_cut_partition::run(GTL::graph& G) -{ - cur_cutsize = 0; - cur_cutratio = 0.0; - if (G.number_of_nodes() == 0) - { - return GTL_OK; // nothing to do - } - if (G.number_of_nodes() == 1) - { - side[*G.nodes_begin()] = A; - return GTL_OK; - } - - edges_t artificial_edges; - if (!G.is_connected()) - { - make_connected(G, artificial_edges); - } - - if (provided_fix) - { - divide_up(G); - } - - if (!provided_st) - { - determine_source_node(G); - compute_target_node(G); - } - - if (provided_initial_part) - { - init_variables(G); - init_data_structure(G); - direction = LEFT_SHIFT; - clean_step(G); - } - else - { - initialization(G); - } - iterative_shifting(G); - group_swapping(G); - - if (enable_cut_edges_storing) - { - compute_cut_edges(G); - } - if (enable_nodesAB_storing) - { - compute_nodesAB(G); - } - restore(G, artificial_edges); - - return GTL_OK; -} - - -int ratio_cut_partition::get_cutsize() -{ - return cur_cutsize; -} - - -double ratio_cut_partition::get_cutratio() -{ - return cur_cutratio; -} - - -ratio_cut_partition::side_type -ratio_cut_partition::get_side_of_node(const node& n) const -{ - return side[n]; -} - - -ratio_cut_partition::side_type ratio_cut_partition::operator [] -(const node& n) const -{ - return side[n]; -} - - -int ratio_cut_partition::get_weight_on_sideA(const graph& G) const -{ - int nwA = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - { - nwA += node_weight[*node_it]; - } - ++node_it; - } - return nwA; -} - - -int ratio_cut_partition::get_weight_on_sideB(const graph& G) const -{ - int nwB = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == B) - { - nwB += node_weight[*node_it]; - } - ++node_it; - } - return nwB; -} - - -ratio_cut_partition::cut_edges_iterator -ratio_cut_partition::cut_edges_begin() const -{ - return cut_edges.begin(); -} - - -ratio_cut_partition::cut_edges_iterator -ratio_cut_partition::cut_edges_end() const -{ - return cut_edges.end(); -} - - -ratio_cut_partition::nodes_of_one_side_iterator -ratio_cut_partition::nodes_of_sideA_begin() const -{ - return nodesA.begin(); -} - - -ratio_cut_partition::nodes_of_one_side_iterator -ratio_cut_partition::nodes_of_sideA_end() const -{ - return nodesA.end(); -} - - -ratio_cut_partition::nodes_of_one_side_iterator -ratio_cut_partition::nodes_of_sideB_begin() const -{ - return nodesB.begin(); -} - - -ratio_cut_partition::nodes_of_one_side_iterator -ratio_cut_partition::nodes_of_sideB_end() const -{ - return nodesB.end(); -} - - -void ratio_cut_partition::reset() -{ - set_vars_executed = false; - cut_edges.clear(); - nodesA.clear(); - nodesB.clear(); -} - - -void ratio_cut_partition::divide_up(const graph& G) -{ - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (fixed[*node_it] == FIXA) - { - side[*node_it] = A; - } - else if (fixed[*node_it] == FIXB) - { - side[*node_it] = B; - } - ++node_it; - } -} - - -void ratio_cut_partition::make_connected(GTL::graph& G, - edges_t& artificial_edges) -{ - dfs conn; - conn.scan_whole_graph(true); - conn.check(G); - conn.run(G); - - // connect dfs roots with zero edges - dfs::roots_iterator root_it = conn.roots_begin(); - dfs::roots_iterator rootes_end = conn.roots_end(); - while (root_it != rootes_end) - { - node edge_start = **root_it; - ++root_it; - if (root_it != rootes_end) - { - edge ne = G.new_edge(edge_start, **root_it); - edge_weight[ne] = 0; // this edge has no cut costs - artificial_edges.push_back(ne); - } - } -} - - -void ratio_cut_partition::restore(GTL::graph& G, edges_t& artificial_edges) -{ - edges_t::iterator edge_it = artificial_edges.begin(); - edges_t::iterator edges_end = artificial_edges.end(); - while (edge_it != edges_end) - { - G.del_edge(*edge_it); - ++edge_it; - } -} - - -void ratio_cut_partition::initialization(const graph& G) -{ - int cutsize_A2B, cutsize_B2A; - double cutratio_A2B, cutratio_B2A; - node_map side_B2A(G); - - init_variables(G); - - // start with moves from B to A - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (fixed[*node_it] == UNFIXED) - { - side[*node_it] = B; - } - ++node_it; - } - side[source_node] = A; - side[target_node] = B; - init_data_structure(G); - if (fixed[target_node] == UNFIXED) - { - bucketB[range_up(gain_value[target_node])]. - erase(position_in_bucket[target_node]); - update_max_gain(B); - } - left_shift_op(G); - clean_step(G); - cutsize_B2A = cur_cutsize; - cutratio_B2A = cur_cutratio; - copy_side_node_map(G, side_B2A, side); - - // continue with moves from A to B - node_it = G.nodes_begin(); - while (node_it != nodes_end) - { - if (fixed[*node_it] == UNFIXED) - { - side[*node_it] = A; - } - ++node_it; - } - side[source_node] = A; - side[target_node] = B; - init_data_structure(G); - if (fixed[source_node] == UNFIXED) - { - bucketA[range_up(gain_value[source_node])]. - erase(position_in_bucket[source_node]); - update_max_gain(A); - } - right_shift_op(G); - clean_step(G); - cutsize_A2B = cur_cutsize; - cutratio_A2B = cur_cutratio; - - if (cutratio_B2A < cutratio_A2B) - { - copy_side_node_map(G, side, side_B2A); - cur_cutsize = cutsize_B2A; - cur_cutratio = cutratio_B2A; - direction = LEFT_SHIFT; - } - else - { - // copy_side_node_map(...) not necessary - cur_cutsize = cutsize_A2B; - cur_cutratio = cutratio_A2B; - direction = RIGHT_SHIFT; - } -} - - -void ratio_cut_partition::init_data_structure(const graph& G) -{ - aside.init(G); - bside.init(G); - unlockedA.init(G); - unlockedB.init(G); - cur_cutsize = 0; - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if ((side[edge_it->source()] == A) && - (side[edge_it->target()] == A)) - { - aside[*edge_it] = 2; - bside[*edge_it] = 0; - unlockedA[*edge_it].push_back(edge_it->source()); - unlockedA[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == B) && - (side[edge_it->target()] == B)) - { - aside[*edge_it] = 0; - bside[*edge_it] = 2; - unlockedB[*edge_it].push_back(edge_it->source()); - unlockedB[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == A) && - (side[edge_it->target()] == B)) - { - aside[*edge_it] = 1; - bside[*edge_it] = 1; - cur_cutsize += edge_weight[*edge_it]; - unlockedA[*edge_it].push_back(edge_it->source()); - unlockedB[*edge_it].push_back(edge_it->target()); - } - else if ((side[edge_it->source()] == B) && - (side[edge_it->target()] == A)) - { - aside[*edge_it] = 1; - bside[*edge_it] = 1; - cur_cutsize += edge_weight[*edge_it]; - unlockedA[*edge_it].push_back(edge_it->target()); - unlockedB[*edge_it].push_back(edge_it->source()); - } - ++edge_it; - } - - bucketA.resize(2 * max_vertex_degree * max_edge_weight + 1); - bucketB.resize(2 * max_vertex_degree * max_edge_weight + 1); - - init_filling_buckets(G); - cur_cutratio = cutratio(); -} - - -void ratio_cut_partition::init_filling_buckets(const graph &G) -{ - node_weight_on_sideA = 0; - node_weight_on_sideB = 0; - nodes_on_sideA = 0; - nodes_on_sideB = 0; - bucketA_empty = true; - bucketB_empty = true; - bool first_A_node = true; - bool first_B_node = true; - int index; - // position_in_bucket.init(G); - gain_value.init(G); - - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - { - node_weight_on_sideA += node_weight[*node_it]; - ++nodes_on_sideA; - gain_value[*node_it] = inital_gain_of_node_on_sideA(*node_it); - if (fixed[*node_it] == UNFIXED) - { - if (first_A_node) - { - bucketA_empty = false; - max_gainA = gain_value[*node_it]; - first_A_node = false; - } - else - { - if (max_gainA < gain_value[*node_it]) - { - max_gainA = gain_value[*node_it]; - } - } - index = range_up(gain_value[*node_it]); - position_in_bucket[*node_it] = bucketA[index].insert( - bucketA[index].begin(), *node_it); - } - } - else // side[*node_it] == B - { - node_weight_on_sideB += node_weight[*node_it]; - ++nodes_on_sideB; - gain_value[*node_it] = inital_gain_of_node_on_sideB(*node_it); - if (fixed[*node_it] == UNFIXED) - { - if (first_B_node) - { - bucketB_empty = false; - max_gainB = gain_value[*node_it]; - first_B_node = false; - } - else - { - if (max_gainB < gain_value[*node_it]) - { - max_gainB = gain_value[*node_it]; - } - } - index = range_up(gain_value[*node_it]); - position_in_bucket[*node_it] = bucketB[index].insert( - bucketB[index].begin(), *node_it); - } - } - ++node_it; - } -} - - -int ratio_cut_partition::inital_gain_of_node_on_sideA(const node cur_node) -{ - int node_gain = 0; - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - if (aside[*adj_edge_it] == 1) - { - node_gain += edge_weight[*adj_edge_it]; - } - if (bside[*adj_edge_it] == 0) - { - node_gain -= edge_weight[*adj_edge_it]; - } - ++adj_edge_it; - } - return node_gain; -} - - -int ratio_cut_partition::inital_gain_of_node_on_sideB(const node cur_node) -{ - int node_gain = 0; - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - if (bside[*adj_edge_it] == 1) - { - node_gain += edge_weight[*adj_edge_it]; - } - if (aside[*adj_edge_it] == 0) - { - node_gain -= edge_weight[*adj_edge_it]; - } - ++adj_edge_it; - } - return node_gain; -} - - -void ratio_cut_partition::init_variables(const graph& G) -{ - compute_max_vertex_degree(G); - bool first_edge_found = true; - max_edge_weight = 0; - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (first_edge_found) - { - max_edge_weight = edge_weight[*edge_it]; - first_edge_found = false; - } - else if (edge_weight[*edge_it] > max_edge_weight) - { - max_edge_weight = edge_weight[*edge_it]; - } - ++edge_it; - } -} - - -void ratio_cut_partition::compute_max_vertex_degree(const graph& G) -{ - max_vertex_degree = 0; - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (max_vertex_degree < node_it->degree()) - { - max_vertex_degree = node_it->degree(); - } - ++node_it; - } -} - - -void ratio_cut_partition::determine_source_node(const graph& G) -{ - srand((unsigned)time(NULL)); - rand(); // necessary, otherwise the next rand() returns always 0 ?-) - int node_id = (int)floor((((double)rand() / (double)RAND_MAX) * - (double)(G.number_of_nodes() - 1)) + 0.5); - graph::node_iterator node_it = G.nodes_begin(); - for (int i = 1; i <= node_id; i++) - { - ++node_it; - } - source_node = *node_it; - if (node_weight[source_node] == 0) - { - node_it = G.nodes_begin(); - while (node_weight[*node_it] == 0) - { - ++node_it; - } - source_node = *node_it; - } -} - - -void ratio_cut_partition::compute_target_node(const graph& G) -{ - node cur_node, next; - node_map visited(G, false); - std::queue next_nodes; - next_nodes.push(source_node); - visited[source_node] = true; - - while (!next_nodes.empty()) - { - cur_node = next_nodes.front(); - next_nodes.pop(); - - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - if (adj_edge_it->target() != cur_node) - { - next = adj_edge_it->target(); - } - else - { - next = adj_edge_it->source(); - } - if (!visited[next]) - { - next_nodes.push(next); - visited[next] = true; - } - ++adj_edge_it; - } - } - target_node = cur_node; - if (node_weight[target_node] == 0) - { - graph::node_iterator node_it = G.nodes_begin(); - while ((node_weight[*node_it] == 0) || (*node_it == source_node)) - { - ++node_it; - } - target_node = *node_it; - } -} - - -void ratio_cut_partition::right_shift_op(const graph& G) -{ - int step_number = 0; - int best_tentative_move = 0; - int best_bal = node_weight_on_sideA * node_weight_on_sideB; - std::vector tentative_moves(G.number_of_nodes() + 1); - std::vector tentative_cutratio(G.number_of_nodes() + 1); - node moved_node; - tentative_cutratio[0] = cur_cutratio; - int best_cutsize = cur_cutsize; - - while (move_vertex_A2B(G, moved_node)) - { - ++step_number; - tentative_cutratio[step_number] = cur_cutratio; - tentative_moves[step_number] = moved_node; - if (tentative_cutratio[best_tentative_move] > cur_cutratio) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - else if (tentative_cutratio[best_tentative_move] == cur_cutratio) - { - if (node_weight_on_sideA * node_weight_on_sideB > best_bal) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - } - } - - for (int i = 1; i <= best_tentative_move; i++) - { - if (side[tentative_moves[i]] == A) - { - side[tentative_moves[i]] = B; - } - else // side[tentative_moves[i]] == B - { - side[tentative_moves[i]] = A; - } - } - cur_cutratio = tentative_cutratio[best_tentative_move]; - cur_cutsize = best_cutsize; -} - - -void ratio_cut_partition::left_shift_op(const graph& G) -{ - int step_number = 0; - int best_tentative_move = 0; - int best_bal = node_weight_on_sideA * node_weight_on_sideB; - std::vector tentative_moves(G.number_of_nodes() + 1); - std::vector tentative_cutratio(G.number_of_nodes() + 1); - node moved_node; - tentative_cutratio[0] = cur_cutratio; - int best_cutsize = cur_cutsize; - - while (move_vertex_B2A(G, moved_node)) - { - ++step_number; - tentative_cutratio[step_number] = cur_cutratio; - tentative_moves[step_number] = moved_node; - if (tentative_cutratio[best_tentative_move] > cur_cutratio) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - } - else if (tentative_cutratio[best_tentative_move] == cur_cutratio) - { - if (node_weight_on_sideA * node_weight_on_sideB > best_bal) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - } - } - - for (int i = 1; i <= best_tentative_move; i++) - { - if (side[tentative_moves[i]] == A) - { - side[tentative_moves[i]] = B; - } - else // side[tentative_moves[i]] == B - { - side[tentative_moves[i]] = A; - } - } - cur_cutratio = tentative_cutratio[best_tentative_move]; - cur_cutsize = best_cutsize; -} - - -bool ratio_cut_partition::move_vertex_A2B(const graph &/*G*/, GTL::node& moved_node) -{ - if (!bucketA_empty) - { - node cons_nodeA = - compute_highest_ratio_node(bucketA[range_up(max_gainA)]); - bucketA[range_up(max_gainA)].erase(position_in_bucket[cons_nodeA]); - update_data_structure_A2B(cons_nodeA, true); - moved_node = cons_nodeA; - } - else - { - return false; // no more vertices can be moved - } - update_max_gain(A); - return true; -} - - -bool ratio_cut_partition::move_vertex_B2A(const graph &/*G*/, GTL::node& moved_node) -{ - if (!bucketB_empty) - { - node cons_nodeB = - compute_highest_ratio_node(bucketB[range_up(max_gainB)]); - bucketB[range_up(max_gainB)].erase(position_in_bucket[cons_nodeB]); - update_data_structure_B2A(cons_nodeB, true); - moved_node = cons_nodeB; - } - else - { - return false; // no more vertices can be moved - } - update_max_gain(B); - return true; -} - - -node ratio_cut_partition::compute_highest_ratio_node(nodes_t node_list) -{ - node cons_node = node_list.front(); - double ratio, best_ratio; - if (side[cons_node] == A) - { - best_ratio = ratio_of_node_A2B(cons_node); - } - else // side[cons_node] == B - { - best_ratio = ratio_of_node_B2A(cons_node); - } - - nodes_t::iterator node_it = node_list.begin(); - nodes_t::iterator nodes_end = node_list.end(); - while (node_it != nodes_end) - { - if (side[cons_node] == A) - { - ratio = ratio_of_node_A2B(*node_it); - } - else // side[cons_node] == B - { - ratio = ratio_of_node_B2A(*node_it); - } - if (ratio > best_ratio) // choose node with highest ratio - { - best_ratio = ratio; - cons_node = *node_it; - } - ++node_it; - } - return cons_node; -} - - -double ratio_cut_partition::cutratio() -{ - double number_of_nodes = (double)(nodes_on_sideA + nodes_on_sideB); - return ((double)cur_cutsize + number_of_nodes) / (double) - (node_weight_on_sideA * node_weight_on_sideB); -} - - -double ratio_cut_partition::ratio_of_node_A2B(const node cur_node) -{ - return (double)gain_value[cur_node] / - ((double)((node_weight_on_sideB + node_weight[cur_node]) * - (node_weight_on_sideA - node_weight[cur_node]))); -} - - -double ratio_cut_partition::ratio_of_node_B2A(const node cur_node) -{ - return (double)gain_value[cur_node] / - ((double)((node_weight_on_sideA + node_weight[cur_node]) * - (node_weight_on_sideB - node_weight[cur_node]))); -} - - -inline int ratio_cut_partition::range_up(const int gain_value) const -{ - return gain_value + (max_vertex_degree * max_edge_weight); -} - - -inline int ratio_cut_partition::range_down(const int index) const -{ - return index - (max_vertex_degree * max_edge_weight); -} - - -void ratio_cut_partition::update_data_structure_A2B(const node cur_node, - const bool init_mode) -{ - node_weight_on_sideA -= node_weight[cur_node]; - node_weight_on_sideB += node_weight[cur_node]; - --nodes_on_sideA; - ++nodes_on_sideB; - cur_cutsize -= gain_value[cur_node]; - cur_cutratio = cutratio(); - - // updating gain values - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - // delete cur_node from side A -#if 1 - unlockedA[*adj_edge_it].remove(cur_node); -#else - auto& ua = unlockedA[*adj_edge_it]; - ua.erase(std::remove(ua.begin(), ua.end(), cur_node), ua.end()); -#endif - --aside[*adj_edge_it]; - if (aside[*adj_edge_it] == 0) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (aside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - // add cur_node to side B - ++bside[*adj_edge_it]; - if (bside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (bside[*adj_edge_it] == 2) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - ++adj_edge_it; - } -} - - -void ratio_cut_partition::update_data_structure_B2A(const node cur_node, - const bool init_mode) -{ - node_weight_on_sideA += node_weight[cur_node]; - node_weight_on_sideB -= node_weight[cur_node]; - ++nodes_on_sideA; - --nodes_on_sideB; - cur_cutsize -= gain_value[cur_node]; - cur_cutratio = cutratio(); - - // updating gain values - node::adj_edges_iterator adj_edge_it = cur_node.adj_edges_begin(); - node::adj_edges_iterator adj_edges_end = cur_node.adj_edges_end(); - while (adj_edge_it != adj_edges_end) - { - // delete cur_node from side B -#if 1 - unlockedB[*adj_edge_it].remove(cur_node); -#else - auto& ub = unlockedB[*adj_edge_it]; - ub.erase(std::remove(ub.begin(), ub.end(), cur_node), ub.end()); -#endif - bside[*adj_edge_it] -= 1; - if (bside[*adj_edge_it] == 0) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (bside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - // add cur_node to side A - aside[*adj_edge_it] += 1; - if (aside[*adj_edge_it] == 1) - { - nodes_t::iterator node_it = unlockedB[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedB[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketB(*node_it, gain_value[*node_it], - gain_value[*node_it] + edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] += edge_weight[*adj_edge_it]; - ++node_it; - } - } - else if (aside[*adj_edge_it] == 2) - { - nodes_t::iterator node_it = unlockedA[*adj_edge_it].begin(); - nodes_t::iterator nodes_end = unlockedA[*adj_edge_it].end(); - while (node_it != nodes_end) - { - update_bucketA(*node_it, gain_value[*node_it], - gain_value[*node_it] - edge_weight[*adj_edge_it], - init_mode); - gain_value[*node_it] -= edge_weight[*adj_edge_it]; - ++node_it; - } - } - ++adj_edge_it; - } -} - - -void ratio_cut_partition::update_bucketA(const node cur_node, - const int old_gain, const int new_gain, const bool init_mode) -{ - if ((init_mode) && (cur_node == source_node)) - { - return; // this one needs no update with init_mode - } - if (fixed[cur_node] != UNFIXED) - { - return; // fixed nodes need no update - } - bucketA[range_up(old_gain)].erase(position_in_bucket[cur_node]); - bucketA[range_up(new_gain)].push_front(cur_node); - position_in_bucket[cur_node] = bucketA[range_up(new_gain)].begin(); - if (max_gainA < new_gain) - { - max_gainA = new_gain; - } -} - - -void ratio_cut_partition::update_bucketB(const node cur_node, - const int old_gain, const int new_gain, const bool init_mode) -{ - if ((init_mode) && (cur_node == target_node)) - { - return; // this one needs no update with init_mode - } - if (fixed[cur_node] != UNFIXED) - { - return; // fixed nodes need no update - } - bucketB[range_up(old_gain)].erase(position_in_bucket[cur_node]); - bucketB[range_up(new_gain)].push_front(cur_node); - position_in_bucket[cur_node] = bucketB[range_up(new_gain)].begin(); - if (max_gainB < new_gain) - { - max_gainB = new_gain; - } -} - - -void ratio_cut_partition::update_max_gain(const side_type side) -{ - if ((side == A) && (!bucketA_empty)) - { - while (bucketA[range_up(max_gainA)].begin() == - bucketA[range_up(max_gainA)].end()) - { - --max_gainA; - if (range_up(max_gainA) < 0) - { - bucketA_empty = true; - return; - } - } - bucketA_empty = false; - } - if ((side == B) && (!bucketB_empty)) - { - while (bucketB[range_up(max_gainB)].begin() == - bucketB[range_up(max_gainB)].end()) - { - --max_gainB; - if (range_up(max_gainB) < 0) - { - bucketB_empty = true; - return; - } - } - bucketB_empty = false; - } -} - - -void ratio_cut_partition::clean_step(const graph& G) -{ - // clean unlocked* lists - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - unlockedA[*edge_it].clear(); - unlockedB[*edge_it].clear(); - ++edge_it; - } - - // clean buckets - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - bucketA[i].clear(); - bucketB[i].clear(); - } - bucketA.clear(); - bucketB.clear(); -} - - -void ratio_cut_partition::copy_side_node_map(const graph& G, - node_map& dest, const node_map source) const -{ - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - dest[*node_it] = source[*node_it]; - ++node_it; - } -} - - -void ratio_cut_partition::iterative_shifting(const graph& G) -{ - bool continue_loop = true; - double old_cutratio = cur_cutratio; - - while (continue_loop) - { - if (direction == LEFT_SHIFT) - { - init_data_structure(G); - if (fixed[source_node] == UNFIXED) - { - bucketA[range_up(gain_value[source_node])]. - erase(position_in_bucket[source_node]); - update_max_gain(A); - } - right_shift_op(G); - clean_step(G); - if (cur_cutratio < old_cutratio) - { - continue_loop = true; - direction = RIGHT_SHIFT; - old_cutratio = cur_cutratio; - } - else - { - continue_loop = false; - } - } - else // direction == RIGHT_SHIFT - { - init_data_structure(G); - if (fixed[target_node] == UNFIXED) - { - bucketB[range_up(gain_value[target_node])]. - erase(position_in_bucket[target_node]); - update_max_gain(B); - } - left_shift_op(G); - clean_step(G); - if (cur_cutratio < old_cutratio) - { - continue_loop = true; - direction = LEFT_SHIFT; - old_cutratio = cur_cutratio; - } - else - { - continue_loop = false; - } - } - } -} - - -void ratio_cut_partition::group_swapping(const graph& G) -{ - bool improved_cutratio; - - do - { - init_data_structure(G); - improved_cutratio = move_manager(G); - clean_step(G); - } while (improved_cutratio); -} - - -bool ratio_cut_partition::move_manager(const graph& G) -{ - int step_number = 0; - int best_tentative_move = 0; - int best_bal = node_weight_on_sideA * node_weight_on_sideB; - std::vector tentative_moves(G.number_of_nodes() + 1); - std::vector tentative_cutratio(G.number_of_nodes() + 1); - node moved_node; - tentative_cutratio[0] = cur_cutratio; - int best_cutsize = cur_cutsize; - - while (move_vertex(G, moved_node)) - { - ++step_number; - tentative_moves[step_number] = moved_node; - tentative_cutratio[step_number] = cur_cutratio; - if (tentative_cutratio[best_tentative_move] > cur_cutratio) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - else if (tentative_cutratio[best_tentative_move] == cur_cutratio) - { - if (node_weight_on_sideA * node_weight_on_sideB > best_bal) - { - best_tentative_move = step_number; - best_cutsize = cur_cutsize; - best_bal = node_weight_on_sideA * node_weight_on_sideB; - } - } - } - - for (int i = 1; i <= best_tentative_move; i++) - { - if (side[tentative_moves[i]] == A) - { - side[tentative_moves[i]] = B; - } - else // side[tentative_moves[i]] == B - { - side[tentative_moves[i]] = A; - } - } - cur_cutratio = tentative_cutratio[best_tentative_move]; - cur_cutsize = best_cutsize; - if (best_tentative_move > 0) // cutratio improved - { - return true; - } - return false; // best_move == 0 --> cutratio not improved -} - - -bool ratio_cut_partition::move_vertex(const graph &/*G*/, GTL::node& moved_node) -{ - bool consA_ok = false, consB_ok = false; - node cons_nodeA, cons_nodeB; - - if (!bucketA_empty) - { - cons_nodeA = - compute_highest_ratio_node(bucketA[range_up(max_gainA)]); - consA_ok = true; - if (node_weight_on_sideA - node_weight[cons_nodeA] == 0) - { - node temp_node = cons_nodeA; - bucketA[range_up(gain_value[cons_nodeA])]. - erase(position_in_bucket[cons_nodeA]); - update_max_gain(A); - if (!bucketA_empty) // nodes with smaller weight available? - { - cons_nodeA = compute_highest_ratio_node - (bucketA[range_up(max_gainA)]); - } - else - { - consA_ok = false; - } - bucketA_empty = false; - bucketA[range_up(gain_value[temp_node])].push_front(temp_node); - position_in_bucket[temp_node] = - bucketA[range_up(gain_value[temp_node])].begin(); - max_gainA = gain_value[temp_node]; - } - } - if (!bucketB_empty) - { - cons_nodeB = - compute_highest_ratio_node(bucketB[range_up(max_gainB)]); - consB_ok = true; - if (node_weight_on_sideB - node_weight[cons_nodeB] == 0) - { - node temp_node = cons_nodeB; - bucketB[range_up(gain_value[cons_nodeB])]. - erase(position_in_bucket[cons_nodeB]); - update_max_gain(B); - if (!bucketB_empty) // nodes with smaller weight available? - { - cons_nodeB = compute_highest_ratio_node - (bucketB[range_up(max_gainB)]); - } - else - { - consB_ok = false; - } - bucketB_empty = false; - bucketB[range_up(gain_value[temp_node])].push_front(temp_node); - position_in_bucket[temp_node] = - bucketB[range_up(gain_value[temp_node])].begin(); - max_gainB = gain_value[temp_node]; - } - } - - if (consA_ok && consB_ok) - { - double ratio_A2B = ratio_of_node_A2B(cons_nodeA); - double ratio_B2A = ratio_of_node_B2A(cons_nodeB); - if (ratio_A2B > ratio_B2A) - { - moved_node = cons_nodeA; - bucketA[range_up(max_gainA)]. - erase(position_in_bucket[cons_nodeA]); - update_data_structure_A2B(cons_nodeA, false); - } - else // ratio_A2B <= ratio_B2A - { - moved_node = cons_nodeB; - bucketB[range_up(max_gainB)]. - erase(position_in_bucket[cons_nodeB]); - update_data_structure_B2A(cons_nodeB, false); - } - } - else if (consA_ok) - { - moved_node = cons_nodeA; - bucketA[range_up(max_gainA)].erase(position_in_bucket[cons_nodeA]); - update_data_structure_A2B(cons_nodeA, false); - } - else if (consB_ok) - { - moved_node = cons_nodeB; - bucketB[range_up(max_gainB)].erase(position_in_bucket[cons_nodeB]); - update_data_structure_B2A(cons_nodeB, false); - } - else - { - return false; // no more vertices can be moved - } - update_max_gain(A); - update_max_gain(B); - return true; -} - - -void ratio_cut_partition::compute_cut_edges(const graph& G) -{ - cut_edges.clear(); - graph::edge_iterator edge_it = G.edges_begin(); - graph::edge_iterator edges_end = G.edges_end(); - while (edge_it != edges_end) - { - if (side[edge_it->source()] != side[edge_it->target()]) - { - cut_edges.push_back(*edge_it); - } - ++edge_it; - } -} - - -void ratio_cut_partition::compute_nodesAB(const graph& G) -{ - nodesA.clear(); - nodesB.clear(); - graph::node_iterator node_it = G.nodes_begin(); - graph::node_iterator nodes_end = G.nodes_end(); - while (node_it != nodes_end) - { - if (side[*node_it] == A) - { - nodesA.push_back(*node_it); - } - else // side[*node_it] == B - { - nodesB.push_back(*node_it); - } - ++node_it; - } -} - - -#ifdef _DEBUG -void ratio_cut_partition::print_bucketA() -{ - GTL_debug::init_debug(); - GTL_debug::os() << std::endl << "bucketA:" << std::endl; - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - GTL_debug::os() << range_down(i) << ": "; - nodes_t::iterator node_it = bucketA[i].begin(); - nodes_t::iterator nodes_end = bucketA[i].end(); - while (node_it != nodes_end) - { - GTL_debug::os() << *node_it << " "; - ++node_it; - } - GTL_debug::os() << std::endl; - } - GTL_debug::os() << std::endl; - GTL_debug::close_debug(); -} - - -void ratio_cut_partition::print_bucketB() -{ - GTL_debug::init_debug(); - GTL_debug::os() << std::endl << "bucketB:" << std::endl; - for (int i = 0; i <= 2 * max_vertex_degree * max_edge_weight; i++) - { - GTL_debug::os() << range_down(i) << ": "; - nodes_t::iterator node_it = bucketB[i].begin(); - nodes_t::iterator nodes_end = bucketB[i].end(); - while (node_it != nodes_end) - { - GTL_debug::os() << *node_it << " "; - ++node_it; - } - GTL_debug::os() << std::endl; - } - GTL_debug::os() << std::endl; - GTL_debug::close_debug(); -} -#endif // _DEBUG - - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/st_number.cpp b/src/Tracker/graph/GTL/src/st_number.cpp deleted file mode 100644 index a2ff89906..000000000 --- a/src/Tracker/graph/GTL/src/st_number.cpp +++ /dev/null @@ -1,259 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// st_number.cpp -// -//========================================================================== -// $Id: st_number.cpp,v 1.10 2001/11/07 13:58:11 pick Exp $ - -#include - -#include - -__GTL_BEGIN_NAMESPACE - -pathfinder::pathfinder (const graph& G, GTL::edge st, GTL::node s) -{ - node t = s.opposite (st); - dfs_num.init (G, 0); - low_num.init (G); - tree.init(G, edges_t()); - back.init(G, edges_t()); - forward.init(G, edges_t()); - - // - // There is a problem with node/edge maps of iterators with Visual C++ - // which I dont fully understand at the moment. Anyway the init for the - // maps below is only needed to allocate memory, which is done anyway, when - // values are assigned to it. - // - -#ifndef __GTL_MSVCC - to_low.init (G); - to_father.init (G); - pos.init (G); -#endif - - used.init (G,0); - act_dfs_num = 1; - new_nodes = G.number_of_nodes(); - is_biconn = true; - - // - // Do DFS with biconnectivity extensions. - // - - dfs_num[t] = act_dfs_num++; - low_num[t] = dfs_num[t]; - new_nodes--; - - dfs_sub (s, t); - - if (new_nodes != 0) { - is_biconn = false; - } - - used[t] = used[s] = 1; -} - - -void pathfinder::dfs_sub (GTL::node& curr, GTL::node& father) -{ - low_num[curr] = dfs_num[curr] = act_dfs_num++; - new_nodes--; - - node::adj_edges_iterator it = curr.adj_edges_begin(); - node::adj_edges_iterator end = curr.adj_edges_end(); - - while (it != end) { - edge adj = *it; - node opp = curr.opposite(adj); - - if (dfs_num[opp] == 0) { - - edges_t::iterator tmp = tree[curr].insert (tree[curr].end(), adj); - to_father[opp] = tmp; - - dfs_sub (opp, curr); - - if (low_num[opp] < low_num[curr]) { - low_num[curr] = low_num[opp]; - to_low[curr] = tmp; - } - - if (low_num[opp] >= dfs_num[curr]) { - is_biconn = false; - } - - } else if (opp != father && dfs_num[opp] < dfs_num[curr]) { - edges_t::iterator back_pos = back[curr].insert (back[curr].end(), adj); - edges_t::iterator forward_pos = forward[opp].insert (forward[opp].end(), adj); - pos[adj] = pos_pair (forward_pos, back_pos); - - if (dfs_num[opp] < low_num[curr]) { - low_num[curr] = dfs_num[opp]; - to_low[curr] = back_pos; - } - } - - ++it; - } -} - - -//-------------------------------------------------------------------------- -// ITERATOR -//-------------------------------------------------------------------------- - -pathfinder::const_iterator::const_iterator (pathfinder& _pf, GTL::node n) : - pf (_pf) -{ - if (!pf.back[n].empty()) { - edge back = pf.back[n].front(); - curr = n.opposite (back); - pf.used[curr] = 1; - pf.back[n].pop_front(); - pf.forward[curr].erase (pf.pos[back].first); - state = END; - - } else if (!pf.tree[n].empty()) { - curr = n.opposite (pf.tree[n].front()); - pf.used[curr] = 1; - pf.tree[n].pop_front(); - state = DOWN; - - } else if (!pf.forward[n].empty()) { - edge forward = pf.forward[n].front(); - curr = n.opposite (forward); - pf.forward[n].pop_front(); - pf.back[curr].erase (pf.pos[forward].second); - - if (pf.used[curr]) { - state = END; - } else { - pf.used[curr] = 1; - state = UP; - } - } -} - -pathfinder::const_iterator& pathfinder::const_iterator::operator++ () -{ - edges_t::iterator tmp; - edge adj; - node opp; - - switch (state) { - case END : - curr = node(); - break; - - case UP : - tmp = pf.to_father[curr]; - curr = curr.opposite (*tmp); - pf.tree[curr].erase (tmp); - - if (pf.used[curr]) { - state = END; - } else { - pf.used[curr] = 1; - } - - break; - - case DOWN : - tmp = pf.to_low[curr]; - adj = *tmp; - opp = curr.opposite (adj); - - if (pf.used[opp]) { - pf.forward[opp].erase (pf.pos[adj].first); - pf.back[curr].erase (tmp); - state = END; - } else { - pf.tree[curr].erase (tmp); - pf.used[opp] = 1; - } - - curr = opp; - break; - - default: - assert (0); - } - - return *this; -} - - -pathfinder::const_iterator pathfinder::const_iterator::operator++ (int) -{ - const_iterator tmp = *this; - operator++(); - return tmp; -} - - -//-------------------------------------------------------------------------- -// ST-NUMBER -//-------------------------------------------------------------------------- - -int st_number::check (GTL::graph& G) -{ - if (G.is_directed()) return GTL_ERROR; - - pf = new pathfinder (G, st, s); - - return pf->is_valid() ? GTL_OK : GTL_ERROR; -} - - -int st_number::run (GTL::graph& /*G*/) -{ - nodes_t order; - node t = s.opposite (st); - order.push_back (t); - node tmp = s; - pathfinder::const_iterator end = pf->end(); - int act_st = 1; - - while (tmp != t) - { - pathfinder::const_iterator it = pf->path(tmp); - nodes_t::iterator pos; - - if (it == end) - { - st_num[tmp] = act_st++; - st_ord.push_back(tmp); - tmp = order.back(); - order.pop_back(); - - } - else - { - pos = order.end(); - - while (it != end) - { - pos = order.insert(pos, *it); - ++it; - } - - order.erase(pos); - } - } - - st_num[t] = act_st; - st_ord.push_back (t); - - delete pf; - - return GTL_OK; -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/GTL/src/topsort.cpp b/src/Tracker/graph/GTL/src/topsort.cpp deleted file mode 100644 index 4e30ed115..000000000 --- a/src/Tracker/graph/GTL/src/topsort.cpp +++ /dev/null @@ -1,63 +0,0 @@ -/* This software is distributed under the GNU Lesser General Public License */ -//========================================================================== -// -// topsort.cpp -// -//========================================================================== -// $Id: topsort.cpp,v 1.7 2001/11/07 13:58:12 pick Exp $ - -#include - -__GTL_BEGIN_NAMESPACE - -//-------------------------------------------------------------------------- -// algorithm - interface -//-------------------------------------------------------------------------- - - -void topsort::reset () -{ - dfs::reset(); - acyclic = true; - top_order.erase (top_order.begin(), top_order.end());; -} - -int topsort::check (GTL::graph& G) -{ - return G.is_directed() ? GTL_OK : GTL_ERROR; -} - - - -//-------------------------------------------------------------------------- -// Handler -//-------------------------------------------------------------------------- - - -void topsort::init_handler (GTL::graph& G) -{ - top_numbers.init (G, 0); - act_top_num = G.number_of_nodes(); -} - - -void topsort::leave_handler (GTL::graph& /*G*/, GTL::node& n, GTL::node& /*f*/) -{ - top_numbers[n] = act_top_num; - act_top_num--; - top_order.push_front (n); -} - - -void topsort::old_adj_node_handler (GTL::graph& /*G*/, GTL::edge& /*adj*/, GTL::node& opp) -{ - if (top_numbers[opp] == 0) { - acyclic = false; - } -} - -__GTL_END_NAMESPACE - -//-------------------------------------------------------------------------- -// end of file -//-------------------------------------------------------------------------- diff --git a/src/Tracker/graph/components.cpp b/src/Tracker/graph/components.cpp deleted file mode 100644 index b78ce8ed0..000000000 --- a/src/Tracker/graph/components.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// $Id: components.cpp,v 1.1 2006/01/18 17:40:47 rdmp1c Exp $ - -// -// Simple program to extract all the components of a GML format graph -// - - -#include -#include - -#include -#include - -#include "mygraph.h" - - - -int main (int argc, const char * argv[]) -{ - if (argc < 2) - { - cout << "Usage: components " << endl; - exit(1); - } - char filename[256]; - strcpy (filename, argv[1]); - - // --------------------------------------------------------- - // Read graph - - MyGraph G; - - GML_error err = G.load (filename); - if (err.err_num != GML_OK) - { - cerr << "Error (" << err.err_num << ") loading graph from file \"" << filename << "\""; - switch (err.err_num) - { - case GML_FILE_NOT_FOUND: cerr << "A file with that name doesn't exist."; break; - case GML_TOO_MANY_BRACKETS: cerr << "A mismatch of brackets was detected, i.e. there were too many closing brackets (])."; break; - case GML_OPEN_BRACKET: cerr << "Now, there were too many opening brackets ([)"; break; - case GML_TOO_MANY_DIGITS: cerr << "The number of digits a integer or floating point value can have is limited to 1024, this should be enough :-)"; break; - case GML_PREMATURE_EOF: cerr << "An EOF occured, where it wasn't expected, e.g. while scanning a string."; break; - case GML_SYNTAX: cerr << "The file isn't a valid GML file, e.g. a mismatch in the key-value pairs."; break; - case GML_UNEXPECTED: cerr << "A character occured, where it makes no sense, e.g. non-numerical characters"; break; - case GML_OK: break; - } - cerr << endl; - exit(1); - } - else - { - cout << "Graph read from file \"" << filename << "\" has " << G.number_of_nodes() << " nodes and " << G.number_of_edges() << " edges" << endl; - } - - // Components - G.make_undirected(); - - if (!G.is_connected()) - { - // 2. Get components - components cp; - if (cp.check(G) != algorithm::GTL_OK) - { - cerr << "component check failed at line " << __LINE__ << endl; - exit(1); - } - else - { - if (cp.run(G) != algorithm::GTL_OK) - { - cerr << "component algorithm failed at line " << __LINE__ << endl; - exit(1); - } - else - { - cout << "Graph has " << cp.number_of_components() << " components" << endl; - - G.make_directed(); - - // Dump components - int count = 0; - components::component_iterator it = cp.components_begin (); - components::component_iterator end = cp.components_end (); - while (it != end) - { - - list comp = (*it).first; - - G.induced_subgraph (comp); - - char buf[64]; - sprintf (buf, "%d.%d.gml", comp.size(), count); - - G.save(buf); - - count++; - - G.restore_graph(); - - it++; - } - - - } - } - } - - - return 0; -} diff --git a/src/Tracker/graph/fheap.c b/src/Tracker/graph/fheap.c deleted file mode 100644 index 98295fda0..000000000 --- a/src/Tracker/graph/fheap.c +++ /dev/null @@ -1,488 +0,0 @@ -// $Id: fheap.c,v 1.1.1.1 2003/11/05 15:19:14 rdmp1c Exp $ - -/** - * @file fheap.c - * - * Fibonacci heap - * - */ - -/* -This code comes from -A Comparison of Data Structures for Dijkstra's -Single Source Shortest Path Algorithm by Shane Saunders (Department -of Computer Science, University of Canterbury, NZ). -The code itself is available from Tadao Takaoka's - Algorithm Repository Home Page -*/ - - -/*** Fibonacci Heap Implementation ***/ -/* - * Shane Saunders - */ -#include -#include -#if FHEAP_DUMP -#include -#endif -#include "fheap.h" - - - -/*** Prototypes of functions only visible within this file. ***/ -void fh_dump_nodes(fheap_node_t *ptr, int level); -void fh_meld(fheap_t *h, fheap_node_t *tree_list); - - - -/*** Definitions for functions that are visible outside this file. ***/ - -/* fh_alloc() - creates and and returns a pointer to a F-heap which can contian - * up to max_nodes nodes. - */ -fheap_t *fh_alloc(int max_nodes) -{ - fheap_t *h; -#if FHEAP_DUMP -printf("alloc, "); -#endif - - /* Create the heap. */ - h = (fheap_t *)malloc(sizeof(fheap_t)); - - h->max_trees = (int)(1.0 + 1.44 * log(max_nodes)/log(2.0)); - h->max_nodes = max_nodes; - h->trees = (fheap_node_t **)calloc(h->max_trees, sizeof(fheap_node_t *)); - h->nodes = (fheap_node_t **)calloc(max_nodes, sizeof(fheap_node_t *)); - h->n = 0; - - /* The value of the heap helps to keep track of the maximum rank while - * nodes are inserted or deleted. - */ - h->value = 0; - - /* For experimental purposes, we keep a count of the number of key - * comparisons. - */ - h->key_comps = 0; - -#if FHEAP_DUMP -printf("alloc-exited, "); -#endif - return h; -} - - -/* fh_free() - destroys the heap pointed to by h, freeing up any space that was - * used by it. - */ -void fh_free(fheap_t *h) -{ - int i; - -#if FHEAP_DUMP -printf("free, "); -#endif - - for(i = 0; i < h->max_nodes; i++) { - free(h->nodes[i]); - } - - free(h->nodes); - free(h->trees); - free(h); - -#if FHEAP_DUMP -printf("free-exited, "); -#endif -} - - -/* fh_insert() - creates and inserts new a node representing vertex_no with key - * k into the heap h. - */ -void fh_insert(fheap_t *h, int vertex_no, long k) -{ - fheap_node_t *newn; - -#if FHEAP_DUMP -printf("insert, "); -#endif - - /* Create an initialise the new node. */ - newn = (fheap_node_t *)malloc(sizeof(fheap_node_t)); - newn->child = NULL; - newn->left = newn->right = newn; - newn->rank = 0; - newn->vertex_no = vertex_no; - newn->key = k; - - /* Maintain a pointer vertex_no's new node in the heap. */ - h->nodes[vertex_no] = newn; - - /* Meld the new node into the heap. */ - fh_meld(h, newn); - - /* Update the heaps node count. */ - h->n++; - -#if FHEAP_DUMP -printf("insert-exited, "); -#endif -} - - -/* fh_delete_min() - deletes the minimum node from the heap pointed to by h and - * returns its vertex number. - */ -int fh_delete_min(fheap_t *h) -{ - fheap_node_t *min_node, *child, *next; - long k, k2; - int r, v, vertex_no; - -#if FHEAP_DUMP -printf("delete_min, "); -#endif - - /* First we determine the maximum rank in the heap. */ - v = h->value; - r = -1; - while(v) { - v = v >> 1; - r++; - }; - - /* Now determine which root node is the minimum. */ - min_node = h->trees[r]; - k = min_node->key; - while(r > 0) { - r--; - next = h->trees[r]; - if(next) { - if((k2 = next->key) < k) { - k = k2; - min_node = next; - } - h->key_comps++; - } - } - - /* We remove the minimum node from the heap but keep a pointer to it. */ - r = min_node->rank; - h->trees[r] = NULL; - h->value -= (1 << r); - - child = min_node->child; - if(child) fh_meld(h, child); - - /* Record the vertex no of the old minimum node before deleting it. */ - vertex_no = min_node->vertex_no; - h->nodes[vertex_no] = NULL; - free(min_node); - h->n--; - -#if FHEAP_DUMP -printf("delete_min-exited, "); -#endif - - return vertex_no; -} - - -/* fh_decrease_key() - decreases the key used for vertex, vertex_no, to - * new_value. No check is made to ensure that new_value is in-fact less than - * the current value so it is up to the user of this function to ensure that - * it is. - */ -void fh_decrease_key(fheap_t *h, int vertex_no, long new_value) -{ - fheap_node_t *cut_node, *parent, *new_roots, *r, *l; - int prev_rank; - -#if FHEAP_DUMP -printf("decrease_key on vn = %d, ", vertex_no); -#endif - - /* Obtain a pointer to the decreased node and its parent then decrease the - * nodes key. - */ - cut_node = h->nodes[vertex_no]; - parent = cut_node->parent; - cut_node->key = new_value; - - /* No reinsertion occurs if the node changed was a root. */ - if(!parent) { -#if FHEAP_DUMP -printf("decrease_key-exited, "); -#endif - return; - } - - /* Update the left and right pointers of cut_node and its two neighbouring - * nodes. - */ - l = cut_node->left; - r = cut_node->right; - l->right = r; - r->left = l; - cut_node->left = cut_node->right = cut_node; - - /* Initially the list of new roots contains only one node. */ - new_roots = cut_node; - - /* While there is a parent node that is marked a cascading cut occurs. */ - while(parent && parent->marked) { - - /* Decrease the rank of cut_node's parent an update its child pointer. - */ - parent->rank--; - if(parent->rank) { - if(parent->child == cut_node) parent->child = r; - } - else { - parent->child = NULL; - } - - /* Update the cut_node and parent pointers to the parent. */ - cut_node = parent; - parent = cut_node->parent; - - /* Update the left and right pointers of cut_nodes two neighbouring - * nodes. - */ - l = cut_node->left; - r = cut_node->right; - l->right = r; - r->left = l; - - /* Add cut_node to the list of nodes to be reinserted as new roots. */ - l = new_roots->left; - new_roots->left = l->right = cut_node; - cut_node->left = l; - cut_node->right = new_roots; - new_roots = cut_node; - } - - /* If the root node is being relocated then update the trees[] array. - * Otherwise mark the parent of the last node cut. - */ - if(!parent) { - prev_rank = cut_node->rank + 1; - h->trees[prev_rank] = NULL; - h->value -= (1 << prev_rank); - } - else { - /* Decrease the rank of cut_node's parent an update its child pointer. - */ - parent->rank--; - if(parent->rank) { - if(parent->child == cut_node) parent->child = r; - } - else { - parent->child = NULL; - } - - parent->marked = 1; - } - - /* Meld the new roots into the heap. */ - fh_meld(h, new_roots); - -#if FHEAP_DUMP -printf("decrease_key-exited, "); -#endif -} - - - -/*** Definitions of functions that are only visible within this file. ***/ - -/* fh_meld() - melds the linked list of trees pointed to by *tree_list into - * the heap pointed to by h. - */ -void fh_meld(fheap_t *h, fheap_node_t *tree_list) -{ - fheap_node_t *first, *next, *node_ptr, *new_root, *temp, *temp2, *lc, *rc; - int r; - -#if FHEAP_DUMP -printf("meld: "); -#endif - - /* We meld each tree in the circularly linked list back into the root level - * of the heap. Each node in the linked list is the root node of a tree. - * The circularly linked list uses the sibling pointers of nodes. This - * makes melding of the child nodes from a delete_min operation simple. - */ - node_ptr = first = tree_list; - - do { - -#if FHEAP_DUMP -printf("%d, ", GTL::node_ptr->vertex_no); -#endif - - /* Keep a pointer to the next node and remove sibling and parent links - * from the current node. node_ptr points to the current node. - */ - next = node_ptr->right; - node_ptr->right = node_ptr->left = node_ptr; - node_ptr->parent = NULL; - - /* We merge the current node, GTL::node_ptr, by inserting it into the - * root level of the heap. - */ - new_root = node_ptr; - r = node_ptr->rank; - - /* This loop inserts the new root into the heap, possibly restructuring - * the heap to ensure that only one tree for each degree exists. - */ - do { - - /* Check if there is already a tree of degree r in the heap. - * If there is then we need to link it with new_root so it will be - * reinserted into a new place in the heap. - */ - if((temp = h->trees[r])) { - - /* temp will be linked to new_root and relocated so we no - * longer will have a tree of degree r. - */ - h->trees[r] = NULL; - h->value -= (1 << r); - - /* Swap temp and new_root if necessary so that new_root always - * points to the root node which has the smaller key of the - * two. - */ - if(temp->key < new_root->key) { - temp2 = new_root; - new_root = temp; - temp = temp2; - } - h->key_comps++; - - /* Link temp with new_root, making sure that sibling pointers - * get updated if rank is greater than 0. Also, increase r for - * the next pass through the loop since the rank of new has - * increased. - */ - if(r++ > 0) { - rc = new_root->child; - lc = rc->left; - temp->left = lc; - temp->right = rc; - lc->right = rc->left = temp; - } - new_root->child = temp; - new_root->rank = r; - temp->parent = new_root; - temp->marked = 0; - } - /* Otherwise if there is not a tree of degree r in the heap we - * allow new_root, which possibly carries moved trees in the heap, - * to be a tree of degree r in the heap. - */ - else { - - h->trees[r] = new_root; - h->value += (1 << r);; - - /* NOTE: Because new_root is now a root we ensure it is - * marked. - */ - new_root->marked = 1; - } - - /* Note that temp will be NULL if and only if there was not a tree - * of degree r. - */ - } while(temp); - - node_ptr = next; - - } while(node_ptr != first); - -#if FHEAP_DUMP -printf("meld-exited, "); -#endif -} - - - -/*** Debugging functions ***/ - -/* Recursively print the nodes of a Fibonacci heap. */ -#define FHEAP_DUMP 0 -#if FHEAP_DUMP -void fh_dump_nodes(fheap_node_t *ptr, int level) -{ - fheap_node_t *child_ptr, *partner; - int i, ch_count; - - /* Print leading whitespace for this level. */ - for(i = 0; i < level; i++) printf(" "); - - printf("%d(%ld)[%d]\n", ptr->vertex_no, ptr->key, ptr->rank); - - if((child_ptr = ptr->child)) { - child_ptr = ptr->child->right; - - ch_count = 0; - - do { - fh_dump_nodes(child_ptr, level+1); - if(child_ptr->dim > ptr->dim) { - for(i = 0; i < level+1; i++) printf(" "); - printf("error(dim)\n"); exit(1); - } - if(child_ptr->parent != ptr) { - for(i = 0; i < level+1; i++) printf(" "); - printf("error(parent)\n"); - } - child_ptr = child_ptr->right; - ch_count++; - } while(child_ptr != ptr->child->right); - - if(ch_count != ptr->dim) { - for(i = 0; i < level; i++) printf(" "); - printf("error(ch_count)\n"); exit(1); - } - } - else { - if(ptr->dim != 0) { - for(i = 0; i < level; i++) printf(" "); - printf("error(dim)\n"); exit(1); - } - } - -} -#endif - -/* Print out a Fibonacci heap. */ -#if FHEAP_DUMP -void fh_dump(fheap_t *h) -{ - int i; - fheap_node_t *ptr; - - printf("\n"); - printf("value = %d\n", h->value); - printf("array entries 0..max_trees ="); - for(i=0; imax_trees; i++) { - printf(" %d", h->trees[i] ? 1 : 0 ); - } - printf("\n\n"); - for(i=0; imax_trees; i++) { - if((ptr = h->trees[i])) { - printf("tree %d\n\n", i); - fh_dump_nodes(ptr, 0); - printf("\n"); - } - } - fflush(stdout); -} -#endif diff --git a/src/Tracker/graph/fheap.h b/src/Tracker/graph/fheap.h deleted file mode 100644 index 15a014fe5..000000000 --- a/src/Tracker/graph/fheap.h +++ /dev/null @@ -1,149 +0,0 @@ -// $Id : $ - -/** - * @file fheap.h - * - * Fibonacci heap - * - */ - -#ifndef FHEAP_H -#define FHEAP_H - -// rdmp -#ifdef __cplusplus -extern "C" { -#endif - - -/* -This code comes from -A Comparison of Data Structures for Dijkstra's -Single Source Shortest Path Algorithm by Shane Saunders (Department -of Computer Science, University of Canterbury, NZ). -The code itself is available from Tadao Takaoka's - Algorithm Repository Home Page -*/ - -/*** Header File for the Fibonacci Heap Implementation ***/ -/* - * Shane Saunders - */ - - - -/* Option to allow printing of debugging information. Use 1 for yes, or 0 for - * no. - */ -#define FHEAP_DUMP 0 - -#if FHEAP_DUMP - #include -#endif - - - -/*** Definitions of structure types. ***/ - -/* The structure type for Fibonacci heap nodes. - * - * Nodes have the following pointers: - * parent - a pointer to the nodes parent node (if any). - * child - a pointer to a child node (typically the highest rank child). - * left, right - sibling pointers which provide a circular doubly linked list - * containing all the parents nodes children. - * - * The remaining structure fields are: - * rank - the nodes rank, that is, the number of children it has. - * `key' - the nodes key. - * vertex_no - the number of the graph vertex that the node corresponds to. - * Vertex numbering in the graph should be: - * 1, 2, 3, ... max_vertex. - */ -typedef struct fheap_node { - struct fheap_node *parent; - struct fheap_node *left, *right; - struct fheap_node *child; - int rank; - int marked; - long key; - int vertex_no; -} fheap_node_t; - -/* The structure type for a Fibonacci heap. - * - * trees - An array of pointers to trees at root level in the heap. Entry i - * in the array points to the root node of a tree that has nodes of - * dimension i on the main trunk. - * nodes - An array of pointers to nodes in the heap. Nodes are indexed - * according to their vertex number. This array can then be used to - * look up the node for corresponding to a vertex number, and is - * useful when freeing space taken up by the heap. - * max_nodes - The maximum number of nodes allowed in the heap. - * max_trees - The maximum number of trees allowed in the heap (calculated from - * max_nodes). - * n - The current number of nodes in the heap. - * value - The binary value represented by trees in the heap. - * By maintaining this it is easy to keep track of the maximum rank - * tree in the heap. - * key_comps - can be used for experimental purposes when counting the number - * of key comparisons. - */ -typedef struct fheap { - fheap_node_t **trees; - fheap_node_t **nodes; - int max_nodes, max_trees, n, value; - long key_comps; -} fheap_t; - - - -/*** Function prototypes. ***/ - -/* Fibonacci heap functions. */ -/* Note that fheap_t pointers are used so that function definitions are compatible - * with those of other heaps. This allows any type heap to be given as an - * argument to a particular algorithm. It is up to the user to ensure the - * correct heap type is passed to the given functions. - */ - -/* fh_alloc() - creates and and returns a pointer to a F-heap which can contain - * up to max_nodes nodes. - */ -fheap_t *fh_alloc(int max_nodes); - -/* fh_free() - destroys the heap pointed to by h, freeing up any space that was - * used by it. - */ -void fh_free(fheap_t *h); - -/* fh_insert() - creates and inserts new a node representing vertex_no with key - * k into the heap h. - */ -void fh_insert(fheap_t *h, int vertex_no, long k); - -/* fh_delete_min() - deletes the minimum node from the heap pointed to by h and - * returns its vertex number. - */ -int fh_delete_min(fheap_t *h); - -/* fh_decrease_key() - decreases the key used for vertex, vertex_no, to - * new_value. No check is made to ensure that new_value is in-fact less than - * the current value so it is up to the user of this function to ensure that - * it is. - */ -void fh_decrease_key(fheap_t *h, int vertex_no, long new_value); - -/* Debugging functions. */ -#if FHEAP_DUMP -void fh_dump(fheap_t *h); -#endif - - -// rdmp -#ifdef __cplusplus -} -#endif - - -#endif diff --git a/src/Tracker/graph/gdefs.h b/src/Tracker/graph/gdefs.h deleted file mode 100644 index 8eada9fed..000000000 --- a/src/Tracker/graph/gdefs.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef GDEFH -#define GDEFH - -// Determine which platform we are building for - - - -#if __BORLANDC__ // Borland specific options - #define GPORT_WINDOWS 1 // Windows - #define GPORT_MAC 0 -#endif - -#ifdef __MWERKS__ // Metrowerks specific options - #ifdef __INTEL__ - #define GPORT_WINDOWS 1 // Windows - #define GPORT_MAC 0 - #define __WIN32__ // MetroWerks only supports Win32 - #endif /* __INTEL__ */ - - #ifdef macintosh // MacOS - #ifdef __WXMAC__ - #define USE_WXWINDOWS 1 // wxWindows - #define GPORT_MAC 0 - #define GPORT_WINDOWS 0 - #else - #define GPORT_MAC 1 // Macintosh - #define GPORT_WINDOWS 0 - #endif - #endif /* macintosh */ -#endif - -#ifdef __GNUC__ - #define GPORT_MAC 0 // Assume gcc implies X windows - #define GPORT_WINDOWS 0 -#endif - -#endif - diff --git a/src/Tracker/graph/gml2dot.cpp b/src/Tracker/graph/gml2dot.cpp deleted file mode 100644 index 5720300e1..000000000 --- a/src/Tracker/graph/gml2dot.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// $Id: gml2dot.cpp,v 1.1 2004/03/30 21:45:45 rdmp1c Exp $ - -// -// Simple program to convert a GML format graph into a DOT format graph -// for display by GraphViz -// - - -#include -#include - -#include -#include - -#include "mygraph.h" - - - -int main (int argc, const char * argv[]) -{ - if (argc < 2) - { - cout << "Usage: gml2dot " << endl; - exit(1); - } - char filename[256]; - strcpy (filename, argv[1]); - - // --------------------------------------------------------- - // Read graph - - MyGraph G; - - GML_error err = G.load (filename); - if (err.err_num != GML_OK) - { - cerr << "Error (" << err.err_num << ") loading graph from file \"" << filename << "\""; - switch (err.err_num) - { - case GML_FILE_NOT_FOUND: cerr << "A file with that name doesn't exist."; break; - case GML_TOO_MANY_BRACKETS: cerr << "A mismatch of brackets was detected, i.e. there were too many closing brackets (])."; break; - case GML_OPEN_BRACKET: cerr << "Now, there were too many opening brackets ([)"; break; - case GML_TOO_MANY_DIGITS: cerr << "The number of digits a integer or floating point value can have is limited to 1024, this should be enough :-)"; break; - case GML_PREMATURE_EOF: cerr << "An EOF occured, where it wasn't expected, e.g. while scanning a string."; break; - case GML_SYNTAX: cerr << "The file isn't a valid GML file, e.g. a mismatch in the key-value pairs."; break; - case GML_UNEXPECTED: cerr << "A character occured, where it makes no sense, e.g. non-numerical characters"; break; - case GML_OK: break; - } - cerr << endl; - exit(1); - } - else - { - cout << "Graph read from file \"" << filename << "\" has " << G.number_of_nodes() << " nodes and " << G.number_of_edges() << " edges" << endl; - } - - char dotfilename[256]; - strcpy (dotfilename, filename); - strcat (dotfilename, ".dot"); - G.save_dot (dotfilename); - - return 0; -} diff --git a/src/Tracker/graph/gml2nestedsql.cpp b/src/Tracker/graph/gml2nestedsql.cpp deleted file mode 100644 index a0f768c19..000000000 --- a/src/Tracker/graph/gml2nestedsql.cpp +++ /dev/null @@ -1,288 +0,0 @@ -// $Id: gml2nestedsql.cpp,v 1.2 2006/03/24 15:06:57 rdmp1c Exp $ - - -#include -#include - -#include -#include - -#include "mygraph.h" -#include "mytree.h" - -#include - - -void Tokenise (std::string s, std::string delimiters, std::vector &tokens); - -void Tokenise (std::string s, std::string delimiters, std::vector &tokens) -{ - tokens.erase (tokens.begin(), tokens.end()); - int start, stop; - int n = s.length(); - start = s.find_first_not_of (delimiters); - while ((start >= 0) && (start < n)) - { - stop = s.find_first_of (delimiters, start); - if ((stop < 0) || (stop > n)) stop = n; - tokens.push_back (s.substr(start, stop - start)); - start = stop + delimiters.length(); - } -} - -string escape_string (string s); - -//------------------------------------------------- -// Escape string so it can be INSERTed into a SQL database -string escape_string (string s) -{ - if (s.size() == 0) return s; - - string result = ""; - for (unsigned int i = 0;i left_visitation; -node_map right_visitation; -node_map num_children; -node_map child_number; -node_map path; -node_map depth; - -// All purpose traversal of tree (ugly) -class mydfs : public dfs -{ -public: - mydfs () : dfs () { visit = 1; current_path = ""; height = 0; }; - virtual void entry_handler (graph &G, node &n, node &f) - { - // Depth of node - depth[n] = height; - height++; - - // SQL visitation number - left_visitation[n] = visit++; - - // Path as a string of child numbers - string p = current_path; - if (n.indeg() > 0) - { - char buf[32]; - sprintf (buf, "/%d", child_number[n]); - p += buf; - path[n] = p; - } - current_path = p; - - } - virtual void leave_handler (graph &G, node &n, node &f) - { - height--; - right_visitation[n] = visit++; - - // Parent - if (n.indeg() == 0) - current_path = ""; - else - { - edge e = (*n.in_edges_begin()); - current_path = path[e.source()]; - } - } -protected: - int visit; - int height; - string current_path; -}; - - -int main (int argc, const char * argv[]) -{ - if (argc < 2) - { - cout << "Usage: graph " << endl; - exit(1); - } - char filename[256]; - strcpy (filename, argv[1]); - - // --------------------------------------------------------- - // Read graph - - MyTree G; - - G.read_labels_as_weights(); - t0 = clock(); - GML_error err = G.load (filename); - t1 = clock(); - if (err.err_num != GML_OK) - { - cerr << "Error (" << err.err_num << ") loading graph from file \"" << filename << "\""; - switch (err.err_num) - { - case GML_FILE_NOT_FOUND: cerr << "A file with that name doesn't exist."; break; - case GML_TOO_MANY_BRACKETS: cerr << "A mismatch of brackets was detected, i.e. there were too many closing brackets (])."; break; - case GML_OPEN_BRACKET: cerr << "Now, there were too many opening brackets ([)"; break; - case GML_TOO_MANY_DIGITS: cerr << "The number of digits a integer or floating point value can have is limited to 1024, this should be enough :-)"; break; - case GML_PREMATURE_EOF: cerr << "An EOF occured, where it wasn't expected, e.g. while scanning a string."; break; - case GML_SYNTAX: cerr << "The file isn't a valid GML file, e.g. a mismatch in the key-value pairs."; break; - case GML_UNEXPECTED: cerr << "A character occured, where it makes no sense, e.g. non-numerical characters"; break; - case GML_OK: break; - } - cerr << endl; - exit(1); - } - else - { - cout << "Graph read from file \"" << filename << "\" has " << G.number_of_nodes() << " nodes and " << G.number_of_edges() << " edges" << endl; - } - ShowTimeUsed (t0, t1); - - // --------------------------------------------------------- - // Test that it is a tree - if (is_tree (G)) - { - cout << "Is a tree" << endl; - } - else - { - cout << "Graph is not a tree" << endl; - node v; - forall_nodes(v,G) - if ( v.indeg () < 1 ) cout << G.get_node_label(v) << " has no parent" << endl; - if (!G.is_connected() ) - cout << "Not connected"; - - exit(1); - } - - node root = G.root(); - - // --------------------------------------------------------- - // Assign ids to nodes - node_map id (G,0); - - // Extract id's from name - { - node v; - forall_nodes(v,G) - { - string s = G.get_node_label(v); - int n = atol (s.c_str()); - id[v] = n; - } - } - - // --------------------------------------------------------- - // Compute number of children for each node, and - // assign each node its child number - { - node v; - forall_nodes(v,G) - { - num_children[v] = 0; - node::adj_nodes_iterator it = v.adj_nodes_begin(); - node::adj_nodes_iterator end = v.adj_nodes_end(); - int j = 0; - while (it != end) - { - j++; - child_number[(*it)] = j; - num_children[v]++; - it++; - } - } - } - - // --------------------------------------------------------- - // Get visitation numbers for SQL queries - - mydfs d; - d.start_node (G.root()); - if (d.check(G) != algorithm::GTL_OK) - { - cerr << "dfs check failed at " << __LINE__ << " in " << __FILE__ << endl; - } - else - { - if (d.run(G) != algorithm::GTL_OK) - { - cerr << "dfs algorithm failed at " << __LINE__ << " in " << __FILE__ << endl; - } - } - - // --------------------------------------------------------- - // Ensure root path = / - path[G.root()] = "/"; - - - // --------------------------------------------------------- - // SQL Dump - - ofstream sql("tree.sql"); - node v; - forall_nodes(v,G) - { - sql << "INSERT INTO ncbi_tree (tax_id, parent_id, left_id, right_id, path) "; - sql << "VALUES (" << id[v] << ", "; - - // Ensure the node label is acceptable to SQL (NCBI names may have quotes, etc.) -// string s = G.get_node_label(v); -// sql << "'" << escape_string (s) << "', "; - // For Oracle to work we need to ensure the root of the tree has a NULL parent - if (G.parent(v) == v) - sql << "NULL"; - else - { - sql << id[G.parent(v)]; - } - - sql << ", " << left_visitation[v] << ", " << right_visitation[v] << ", '" << path[v] << "');" << endl; - } - sql.close(); - - return 0; -} diff --git a/src/Tracker/graph/gport.cpp b/src/Tracker/graph/gport.cpp deleted file mode 100644 index 97f1882e0..000000000 --- a/src/Tracker/graph/gport.cpp +++ /dev/null @@ -1,333 +0,0 @@ -#include "gport.h" - - -// System specific defines - -#if GPORT_MAC - // From MacTech 4(6) "Comments about PICTs" - #define picGrpBeg 140 - #define picGrpEnd 141 -#endif - - -// The global port -GBasePort *Port = NULL; - - -// A sensible default font -GBaseFont::GBaseFont () -{ - description = "Times-Roman"; - name = "Times-Roman"; - size = 10; - bold = false; - italic = true; -} - -GPostscriptPort::GPostscriptPort () -{ - PenWidth = 1; - DocumentFonts = ""; - Device = devPostscript; - DisplayRect.SetRect (0, 0, 595-144, 842-144); - fill_r = 1; - fill_g = fill_b = 0; -} - -void GPostscriptPort::DrawArc (const GPoint &pt, const int radius, - const double startAngleDegrees, const double endAngleDegrees) -{ - PostscriptStream << "newpath" << std::endl; - PostscriptStream << pt.GetX() << " " << -pt.GetY() - << " " << radius - << " " << (360.0 -startAngleDegrees) - << " " << (360.0 - endAngleDegrees) - << " arcn" << std::endl; - PostscriptStream << "stroke" << std::endl; - PostscriptStream << std::endl; -} - - -void GPostscriptPort::DrawLine (const int x1, const int y1, const int x2, const int y2) -{ - //PostscriptStream << x2 << " " << -y2 << " " << x1 << " " << -y1 << " " << PenWidth << " DrawLine" << endl; - - PostscriptStream << " gsave" << std::endl; - PostscriptStream << PenWidth << " setlinewidth" << std::endl; - // We may not always want to set this as it works best with rectangular trees... -// PostscriptStream << " 2 setlinecap" << endl; -// PostscriptStream << " 0 setgray" << endl; - //PostscriptStream << " 0.7 setgray" << endl; - PostscriptStream << fill_r << " " << fill_g << " " << fill_b << " setrgbcolor" << std::endl; - PostscriptStream << x2 << " " << -y2 << " moveto" << std::endl; - PostscriptStream << x1 << " " << -y1 << " lineto" << std::endl; - PostscriptStream << " stroke" << std::endl; - PostscriptStream << " grestore" << std::endl; - -} - -void GPostscriptPort::DrawCircle (const GPoint &pt, const int radius) -{ - PostscriptStream << "newpath" << std::endl; - PostscriptStream << pt.GetX() << " " << -pt.GetY() << " " << radius << " 0 360 arc" << std::endl; - PostscriptStream << "stroke" << std::endl; - PostscriptStream << std::endl; -} - -void GPostscriptPort::FillCircle (const GPoint &pt, const int radius) -{ - PostscriptStream << "newpath" << std::endl; - PostscriptStream << pt.GetX() << " " << -pt.GetY() << " " << radius << " 0 360 arc" << std::endl; - //PostscriptStream << "gsave" << endl; - //PostscriptStream << "0.90 setgray" << endl; - PostscriptStream << fill_r << " " << fill_g << " " << fill_b << " setrgbcolor" << std::endl; - - PostscriptStream << "fill" << std::endl; - //PostscriptStream << "grestore" << endl; - PostscriptStream << std::endl; -} - - -void GPostscriptPort::DrawRect (const GRect &r) -{ - PostscriptStream << r.GetLeft() << " " << -r.GetTop() << " moveto" << std::endl; - PostscriptStream << r.GetWidth() << " 0 rlineto" << std::endl; - PostscriptStream << "0 " << -r.GetHeight() << " rlineto" << std::endl; - PostscriptStream << -r.GetWidth() << " 0 rlineto" << std::endl; - PostscriptStream << "0 " << r.GetHeight() << " rlineto" << std::endl; - PostscriptStream << "closepath" << std::endl; - PostscriptStream << "stroke" << std::endl; - PostscriptStream << std::endl; -} - -void GPostscriptPort::DrawText (const int x, const int y, const char *text) -{ - PostscriptStream << "(" << text << ") " << x << " " << -y << " DrawText" << std::endl; -} - - -void GPostscriptPort::GetPrintingRect (GRect &r) -{ - // A4, with 1" margin - r.SetRect (0, 0, 595-144, 842-144); -} - - -void GPostscriptPort::SetCurrentFont (GBaseFont &font) -{ - std::string face = font.GetName(); - if (font.IsBold() || font.IsItalic()) - { - face += "-"; - if (font.IsBold()) - face += "Bold"; - if (font.IsItalic()) - face += "Italic"; - } -/* - // Duh -- need to do this earlier, perhaps scan the list of - // fonts already created and output those... - // Store this font in the list of fonts we need for our document - int found = DocumentFonts.find_first_of (face, 0); - if ((found < 0) || (found > DocumentFonts.length())) - { - if (DocumentFonts.length() > 0) - DocumentFonts += ", "; - DocumentFonts += face; - } -*/ - PostscriptStream << std::endl; - PostscriptStream << "/" << face << " findfont" << std::endl; - PostscriptStream << font.GetSize () << " scalefont" << std::endl; - PostscriptStream << "setfont" << std::endl; - PostscriptStream << std::endl; -} - - -// Mac -// Win -// Postscript - -void GPostscriptPort::SetPenWidth (int w) -{ - PenWidth = w; - PostscriptStream << w << " setlinewidth" << std::endl; - PostscriptStream << std::endl; -} - -void GPostscriptPort::StartPicture (char *pictFileName) -{ - PostscriptStream.open (pictFileName); - - - // Postscript header - PostscriptStream << "%!PS-Adobe-2.0" << std::endl; - PostscriptStream << "%%Creator: Roderic D. M. Page" << std::endl; - PostscriptStream << "%%DocumentFonts: Times-Roman" << std::endl; - PostscriptStream << "%%Title:" << pictFileName << std::endl; - PostscriptStream << "%%BoundingBox: 0 0 595 842" << std::endl; // A4 - PostscriptStream << "%%Pages: 1" << std::endl; - PostscriptStream << "%%EndComments" << std::endl; - PostscriptStream << std::endl; - - // Move origin to top left corner - PostscriptStream << "0 842 translate" << std::endl; - PostscriptStream << "72 -72 translate" << std::endl; // one inch margin - - // Some definitions for drawing lines, etc. - - // Drawline draws text with encaps that project... - PostscriptStream << "% Encapsulate drawing a line" << std::endl; - PostscriptStream << "% arguments x1 y1 x2 xy2 width" << std::endl; - PostscriptStream << "/DrawLine {" << std::endl; - PostscriptStream << " gsave" << std::endl; - PostscriptStream << " setlinewidth" << std::endl; - // We may not always want to set this as it works best with rectangular trees... -// PostscriptStream << " 2 setlinecap" << endl; - PostscriptStream << " 0 setgray" << std::endl; - //PostscriptStream << " 0.7 setgray" << endl; - PostscriptStream << " moveto" << std::endl; - PostscriptStream << " lineto" << std::endl; - PostscriptStream << " stroke" << std::endl; - PostscriptStream << " grestore" << std::endl; - PostscriptStream << " } bind def" << std::endl; - PostscriptStream << std::endl; - - PostscriptStream << "% Encapsulate drawing text" << std::endl; - PostscriptStream << "% arguments x y text" << std::endl; - PostscriptStream << "/DrawText {" << std::endl; - PostscriptStream << " gsave 1 setlinewidth 0 setgray" << std::endl; - PostscriptStream << " moveto" << std::endl; - PostscriptStream << " show grestore" << std::endl; - PostscriptStream << "} bind def" << std::endl; - PostscriptStream << std::endl; - -} - -void GPostscriptPort::EndPicture () -{ - PostscriptStream << "showpage" << std::endl; - PostscriptStream << "%%Trailer" << std::endl; - PostscriptStream << "%%end" << std::endl; - PostscriptStream << "%%EOF" << std::endl; - PostscriptStream.close (); -} - - - -#if GPORT_MAC -// Macintosh -void GMacPort::BeginGroup () -{ -// ::PicComment (picGrpBeg, 0, NULL); -} - -void GMacPort::EndGroup () -{ -// ::PicComment (picGrpEnd, 0, NULL); -} -#endif - -SVGPort::SVGPort () -{ - fontString = "font-family:Times;font-size:12"; - DisplayRect.SetRect (0, 0, 400, 400); -} - -void SVGPort::DrawLine (const int x1, const int y1, const int x2, const int y2) -{ - svgStream << ""; - - -// svgStream << ""; -// svgStream << "" << endl; -} - -void SVGPort::DrawCircle (const GPoint &/*pt*/, const int /*radius*/) -{ -/* PostscriptStream << "newpath" << endl; - PostscriptStream << pt.GetX() << " " << -pt.GetY() << " " << radius << " 0 360 arc" << endl; - PostscriptStream << "stroke" << endl; - PostscriptStream << endl; -*/ -} - - -void SVGPort::DrawRect (const GRect &/*r*/) -{ -/* PostscriptStream << r.GetLeft() << " " << -r.GetTop() << " moveto" << endl; - PostscriptStream << r.GetWidth() << " 0 rlineto" << endl; - PostscriptStream << "0 " << -r.GetHeight() << " rlineto" << endl; - PostscriptStream << -r.GetWidth() << " 0 rlineto" << endl; - PostscriptStream << "0 " << r.GetHeight() << " rlineto" << endl; - PostscriptStream << "closepath" << endl; - PostscriptStream << "stroke" << endl; - PostscriptStream << endl; -*/ -} - -void SVGPort::DrawText (const int x, const int y, const char *text) -{ - svgStream << "" - << text << "" << std::endl; -} - - -void SVGPort::StartPicture (char *pictFileName) -{ - svgStream.open (pictFileName); - - svgStream << "" << std::endl; - svgStream << " " << std::endl; - - svgStream << "" << std::endl; - -// test -// test -} - -void SVGPort::EndPicture () -{ - svgStream << "" << std::endl; - svgStream.close (); -} - - -void SVGPort::GetPrintingRect (GRect &r) -{ - r = DisplayRect; -} - -void SVGPort::SetCurrentFont (GBaseFont &font) -{ - fontString = ""; - fontString += "font-family:"; - fontString += font.GetName(); - - char buf[32]; - sprintf (buf, ";font-size:%d", font.GetSize()); - fontString += buf; - - if (font.IsItalic()) - { - fontString += ";font-style:italic"; - } - if (font.IsBold()) - { - fontString += ";font-weight:bold"; - } - -} - - - diff --git a/src/Tracker/graph/gport.h b/src/Tracker/graph/gport.h deleted file mode 100644 index 7f4ae1b46..000000000 --- a/src/Tracker/graph/gport.h +++ /dev/null @@ -1,323 +0,0 @@ -#ifndef GPORTH -#define GPORTH - -#ifdef __BORLANDC__ - // Undefine __MINMAX_DEFINED so that min and max are correctly defined - #ifdef __MINMAX_DEFINED - #undef __MINMAX_DEFINED - #endif - // Ignore "Cannot create precompiled header: code in header" message - // generated when compiling string.cc - #pragma warn -pch -#endif - -#include -#include -#include - -#ifdef __BORLANDC__ - #pragma warn .pch -#endif - - -#include "gdefs.h" - -// System specific includes here -#if GPORT_WINDOWS -#endif - -#if GPORT_MAC -// #include -#endif - -enum GPortDevice {devScreen, devPrinter, devPicture, devPostscript}; - - - - -/* Note that we always draw using the following coordinate system: - -(0,0)--------->(+x,0) - | - | - | - \/ -(+y,0) - - Hence the origin is the top left hand corner, and y goes down rather than up. - This is typical for drawing to a window. Some systems have other coordinate - systems (such as Postscript). We make the translation internally. - -*/ - -// A point -class GPoint -{ -public: - GPoint () { SetPoint (0, 0); }; - GPoint (const GPoint &p) { X = p.X; Y = p.Y; }; - GPoint (const int x, const int y) { SetPoint (x, y); }; - virtual int GetX () const { return X; }; - virtual int GetY () const { return Y; }; - virtual void Offset (const int xoff, const int yoff) { X += xoff; Y += yoff; }; - virtual void SetPoint (const int x, const int y) { X = x; Y = y; }; - virtual void SetX (int x) { X = x; }; - virtual void SetY (int y) { Y = y; }; - - int operator== (const GPoint &p) { return (int) ( (X == p.X) && ( Y == p.Y)); }; - int operator!= (const GPoint &p) { return (int) ( (X != p.X) || ( Y != p.Y)); }; -protected: - int X; - int Y; -}; - -// A rectangle -class GRect -{ -public: - GRect () { left = top = right = bottom = 0; }; - GRect (const int l, const int t, const int r, const int b) { SetRect (l, t, r, b); }; - virtual int GetLeft () const { return left; }; - virtual int GetTop () const { return top; }; - virtual int GetRight () const { return right; }; - virtual int GetBottom () const { return bottom; }; - virtual int GetWidth () const { return right - left; }; - virtual int GetHeight () const { return bottom - top; }; - - virtual void Inset (const int dx, const int dy) { left += dx; right -= dx; top += dy; bottom -= dy; }; - virtual void Offset (const int dx, const int dy) { left += dx; right += dx; top += dy; bottom += dy; }; - virtual bool PointInRect (GPoint &pt) - { - return (((pt.GetX() >= left) && (pt.GetX() <= right)) && - ((pt.GetY() >= top) && (pt.GetY() <= bottom))); - } - - virtual void SetLeft (const int l) {left = l; }; - virtual void SetTop (const int t) {top = t; }; - virtual void SetRight (const int r) {right = r; }; - virtual void SetBottom (const int b) {bottom = b; }; - virtual void SetRect (const int l, const int t, const int r, const int b) - { left = l; top = t; right = r; bottom = b; }; - virtual void SetRectWH (const int l, const int t, const int w, const int h) - { left = l; top = t; right = l + w; bottom = t + h; }; - -protected: - int left, top, right, bottom; -}; - -// Base class for system specific fonts -class GBaseFont -{ -public: - GBaseFont (); - virtual ~GBaseFont () {}; - virtual std::string GetName () { return description; }; - virtual std::string GetDescription () { return description; }; - virtual int GetSize () { return size; }; - virtual bool IsBold () { return bold; }; - virtual bool IsItalic () { return italic; }; -private: - std::string description; - std::string name; - int size; - bool bold; - bool italic; -}; -typedef GBaseFont *GBaseFontPtr; - -typedef GBaseFont GFont ; // for now -typedef GFont *GFontPtr; - - - -// Windows needs the two handles for screen and printer fonts -// Mac just sets things -// Postscript writes to the postscript stream - - -// Virtual class to encapsulate printing -class GBasePrinter -{ -public: - GBasePrinter () {}; - virtual ~GBasePrinter () {}; - virtual void PrinterSetup () = 0; - virtual void AbortPrinting () = 0; - virtual void EndDoc (); - virtual bool EndPage (); - virtual void GetPrintingRect (GRect &r) = 0; - virtual void GetPhysicalPageRect (GRect &r) = 0; - virtual bool StartPage () = 0; - virtual bool StartDoc (char *jobname) = 0; // "GBasePrinter" -}; - -// Windows port VPort -// Mac port VPort -// Postscript - just write to file - - - -// Encapsulates the complete graphics system (screen drawing, picture files, -// printing, clipboard). -class GBasePort -{ -public: - GBasePort () { Device = devScreen; PenWidth = 1;}; - virtual ~GBasePort() {}; - virtual void DrawArc (const GPoint &pt, const int radius, - const double startAngleDegrees, const double endAngleDegrees) = 0; - virtual void DrawCircle (const GPoint &pt, const int radius) = 0; - virtual void DrawLine (const int x1, const int y1, const int x2, const int y2) = 0; - virtual void DrawLinePts (const GPoint &pt1, const GPoint &pt2) - { DrawLine (pt1.GetX(), pt1.GetY(), pt2.GetX(), pt2.GetY()); }; - virtual void DrawRect (const GRect &r) = 0; - virtual void DrawText (const int x, const int y, const char *s) = 0; - - // Display - virtual GPortDevice GetCurrentDevice () { return Device; }; - virtual void GetDisplayRect (GRect &r) { r = DisplayRect; }; - virtual void SetDisplayRect (GRect &r) { DisplayRect = r; }; - - // Pen - virtual int GetPenWidth () { return PenWidth; }; - virtual void SetPenWidth (int w) { PenWidth = w; }; - - // Fonts - virtual void SetCurrentFont (GBaseFont &font) = 0; - - // Pictures - virtual void StartPicture (char *pictFileName) = 0; - virtual void EndPicture () = 0; - - // Groups - virtual void BeginGroup () = 0; - virtual void EndGroup () = 0; - - // Printing - virtual void GetPrintingRect (GRect &r) = 0; - - // Colour - virtual void SetFillColorRGB (int /*r*/, int /*g*/, int /*b*/) {}; - - -protected: - // list of fonts - // printer class - //pens - - int PenWidth; - - // Device info - GPortDevice Device; - GRect DisplayRect; -}; - -// Mac -// Win -// Postscript - -class GPostscriptPort : public GBasePort -{ -public: - GPostscriptPort (); - virtual ~GPostscriptPort () {}; - virtual void DrawArc (const GPoint &pt, const int radius, - const double startAngleDegrees, const double endAngleDegrees); - virtual void DrawCircle (const GPoint &pt, const int radius); - virtual void DrawLine (const int x1, const int y1, const int x2, const int y2); - virtual void DrawRect (const GRect &r); - virtual void DrawText (const int x, const int y, const char *text); - - - virtual void FillCircle (const GPoint &pt, const int radius); - - - - // Pen - virtual void SetPenWidth (int w); - - - // Fonts - virtual void SetCurrentFont (GBaseFont &font); - - // Pictures - virtual void StartPicture (char *pictFileName); - virtual void EndPicture (); - - // Groups - virtual void BeginGroup () {}; - virtual void EndGroup () {}; - - // Printing - virtual void GetPrintingRect (GRect &r); - - virtual void SetFillColorRGB (int r, int g, int b) - { - fill_r = (double)r/255.0; - fill_g = (double)g/255.0; - fill_b = (double)b/255.0; - }; - -protected: - std::ofstream PostscriptStream; - std::string DocumentFonts; - - double fill_r, fill_g, fill_b; -}; - -class GMacPort : public GBasePort -{ -public: - // Groups - virtual void BeginGroup (); - virtual void EndGroup (); -protected: -}; - -class SVGPort : public GBasePort -{ -public: - SVGPort (); - virtual void DrawArc (const GPoint &/*pt*/, const int /*radius*/, - const double /*startAngleDegrees*/, const double /*endAngleDegrees*/) {}; - virtual void DrawCircle (const GPoint &pt, const int radius); - virtual void DrawLine (const int x1, const int y1, const int x2, const int y2); - virtual void DrawRect (const GRect &r); - virtual void DrawText (const int x, const int y, const char *text); - - // Pen - virtual void SetPenWidth (int /*w*/) {}; - - - // Fonts - virtual void SetCurrentFont (GBaseFont &font); - - // Pictures - virtual void StartPicture (char *pictFileName); - virtual void EndPicture (); - - // Groups - virtual void BeginGroup () {}; - virtual void EndGroup () {}; - - // Printing - virtual void GetPrintingRect (GRect &r); -protected: - std::ofstream svgStream; - std::string fontString; -}; - - -#ifndef USE_VC2 -extern GBasePort *Port; // for now -#endif - -#ifdef __BORLANDC__ - // Redefine __MINMAX_DEFINED so Windows header files compile - #ifndef __MINMAX_DEFINED - #define __MINMAX_DEFINED - #endif -#endif - - -#endif diff --git a/src/Tracker/graph/mincut.cpp b/src/Tracker/graph/mincut.cpp deleted file mode 100644 index 4a85c7c92..000000000 --- a/src/Tracker/graph/mincut.cpp +++ /dev/null @@ -1,219 +0,0 @@ -// $Id: mincut.cpp,v 1.1.1.1 2003/11/05 15:19:13 rdmp1c Exp $ - -#include "mincut.h" - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "fheap.h" - -mincut::mincut () : GTL::algorithm () -{ - set_vars_executed = false; -} - - -mincut::~mincut () -{ -} - -void mincut::set_vars(const GTL::edge_map& edge_weight) -{ - this->edge_weight = edge_weight; - min_cut = 0; - set_vars_executed = true; -} - -int mincut::check (GTL::graph& G) -{ - if (!set_vars_executed) - return(GTL_ERROR); - - if ((G.number_of_nodes() <= 1) || (!G.is_connected()) || (G.is_directed())) - return(GTL_ERROR); - - return GTL_OK; -} - -void mincut::reset () -{ - st_list.erase (st_list.begin(), st_list.end()); -} - -int mincut::run(GTL::graph& G) -{ - GTL::graph g; - g.make_undirected(); - - // Make a local copy of the graph as mincut modifies the original graph - - // List of nodes in the original graph - GTL::node_map partner (G); - GTL::node_map orig (g); - - GTL::node x; - forall_nodes (x, G) - { - partner[x] = g.new_node(); - orig[partner[x]] = x; // so we can look up original node - } - - // Create edges and associated weights - GTL::edge_map w(g, 0); - GTL::edge e; - forall_edges (e, G) - { - if (e.source() != e.target()) - { - GTL::edge ec = g.new_edge (partner[e.source()], partner[e.target()]); - w[ec] = edge_weight[e]; - } - } - - // Start of algorithm. $a$ is an arbitrary single node in $g$. The set $A$ - // of nodes initially comprises $a$ - GTL::graph::node_iterator na = g.nodes_begin(); - GTL::node a = *na; - int n = g.number_of_nodes(); - int cut_weight = std::numeric_limits::max(); - int best_value = std::numeric_limits::max(); - while (n >= 2 ) - { - GTL::node t = a; - GTL::node s, v; - GTL::edge e; - GTL::node::adj_edges_iterator it; - GTL::node::adj_edges_iterator end; - - fheap_t *pq = fh_alloc (n); - GTL::node_map vertex_number (g, 0); - std::map > nv; - int vertex_count = 0; - - // Nodes in $A$ are not in the queue - GTL::node_map in_PQ(g, false); - forall_nodes (v, g) - { - vertex_number[v] = vertex_count; - nv[vertex_count] = v; - vertex_count++; - if (v != a) - { - in_PQ[v] = true; - fh_insert (pq, vertex_number[v], 0); - } - } - GTL::node_map inf (g, 0); - // Get weight of edges adjacent to $a$ - it = a.adj_edges_begin(); - end = a.adj_edges_end(); - while (it != end) - { - v = a.opposite (*it); - inf[v] += w[*it]; - it++; - } - // Store weights in a queue - it = a.adj_edges_begin(); - end = a.adj_edges_end(); - while (it != end) - { - v = a.opposite (*it); - fh_decrease_key (pq, vertex_number[v], -inf[v]); - it++; - } - - while (pq->n > 0) - { - s = t; - - // Get the node that is most tightly connected to $A$ - t = nv[fh_delete_min (pq)]; - cut_weight = inf[t]; - in_PQ[t] = false; - - // Increase the key of nodes adjacent to t and not in $A$ by adding the - // weights of edges connecting t with nodes not in $A$ - it = t.adj_edges_begin(); - end = t.adj_edges_end(); - while (it != end) - { - v = t.opposite (*it); - if (in_PQ[v]) - { - inf[v] += w[*it]; - fh_decrease_key (pq, vertex_number[v], -inf[v]); - } - it++; - } - } - fh_free (pq); - - //cout << " cut-of-the-phase = " << cut_weight << endl; - - if (cut_weight <= best_value) - { - if (cut_weight < best_value) - { - // Clear list of (s,t) pairs - st_list.erase (st_list.begin(), st_list.end()); - best_value = cut_weight; - } - st_list.push_back (node_pair (orig[s], orig[t])); - } - - // Nodes s and t are the last two nodes to be added to A - //cout << "s=" << s << " t=" << t << endl; - - // Get list of edges adjacent to s - GTL::edge dummy; - GTL::node_map s_edge(g, dummy); - it = s.adj_edges_begin(); - end = s.adj_edges_end(); - while (it != end) - { - s_edge[s.opposite(*it)] = *it; - it++; - } - - // Merge s and t - it = t.adj_edges_begin(); - end = t.adj_edges_end(); - - // Iterate over edges adjacent to t. If a node v adjacent to - // t is also adjacent to s, then add w(it) to e(s,v) - // otherwise make a new edge e(s,v) - while (it != end) - { - v = t.opposite (*it); - - if (s_edge[v] != dummy) - { - w[s_edge[v]] += w[*it]; - } - else if (s != v) - { - GTL::edge ne = g.new_edge (s, v); - w[ne] = w[*it]; - } - ++it; - } - - // Delete node t from graph - g.del_node(t); - n--; - } - - min_cut = best_value; - - return(GTL_OK); -} diff --git a/src/Tracker/graph/mincut.h b/src/Tracker/graph/mincut.h deleted file mode 100644 index 6fed1b5b4..000000000 --- a/src/Tracker/graph/mincut.h +++ /dev/null @@ -1,87 +0,0 @@ -// $Id: mincut.h,v 1.1.1.1 2003/11/05 15:19:13 rdmp1c Exp $ - -#ifndef MINCUT_H -#define MINCUT_H - - -#include - -/** - * @typedef node_pair - * A pair of nodes - */ - -typedef std::pair node_pair; - -class GTL_EXTERN mincut : public GTL::algorithm -{ -public: - mincut (); - virtual ~mincut(); - - /** - * Sets weight of every edge for mincut calculation. - * - * @param edge_weight weight of every edge. - */ - void set_vars(const GTL::edge_map& edge_weight); - - /** - * Finds a mincut of G. - * - * @param G graph. - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise. - * @see algorithm#run - */ - int run (GTL::graph& G); - - - /** - * Checks whether the preconditions for mincut are satisfied. - * - * @param G graph. - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise. - * @see algorithm#check - */ - virtual int check (GTL::graph& G); - - /** - * Reset. - * - * @see algorithm#reset - */ - virtual void reset (); - - /** - * Returns the mincut for the graph G. - * - * @return mincut value - * - */ - int get_mincut() const { return min_cut; }; - -protected: - /** - * @internal - */ - int min_cut = 0; - - /** - * @internal - */ - bool set_vars_executed = false; - - /** - * @internal - */ - GTL::edge_map edge_weight; - - /** - * @internal - */ - std::list st_list; -}; - -#endif diff --git a/src/Tracker/graph/mwbmatching.cpp b/src/Tracker/graph/mwbmatching.cpp deleted file mode 100644 index 3aedf8066..000000000 --- a/src/Tracker/graph/mwbmatching.cpp +++ /dev/null @@ -1,300 +0,0 @@ -// $Id: mwbmatching.cpp,v 1.3 2007/10/28 08:47:20 rdmp1c Exp $ - -#include "mwbmatching.h" - - -#include -#include -#include -#include -#include -#include -#include -#include - -#ifdef __GNUC__ -#include -#endif - -#ifdef __BORLANDC__ - #include -#endif -#if (defined __MWERKS__) || (defined __GNUC__) - #include - #define MAXINT INT_MAX -#endif - -mwbmatching::mwbmatching () - : - algorithm(), - mwbm(0), - set_vars_executed(false), - pq(NULL) -{ -} - -mwbmatching::~mwbmatching () -{ -} - -void mwbmatching::set_vars(const GTL::edge_map& edge_weight) -{ - this->edge_weight = edge_weight; - mwbm = 0; - set_vars_executed = true; -} - -int mwbmatching::check (GTL::graph& G) -{ - if (!set_vars_executed) - return(GTL_ERROR); - - if ((G.number_of_nodes() <= 1) || (!G.is_connected()) || (!G.is_directed())) - return(GTL_ERROR); - - return GTL_OK; -} - -int mwbmatching::run(GTL::graph& G) -{ - // Initialise - pot.init (G, 0); - free.init (G, true); - dist.init (G, 0); - - GTL::nodes_t A; - GTL::nodes_t B; - GTL::node n; - // Partition graph based on direction of edges - forall_nodes (n, G) - { - if (n.outdeg() == 0) - B.push_back (n); - else - A.push_back(n); - - node_from_id[n.id()] = n; - } - - // Simple heuristic - int C = 0; - GTL::edge e; - forall_edges (e, G) - { - edge_from_id[e.id()] = e; - if (edge_weight[e] > C) - C = edge_weight[e]; - } - - GTL::nodes_t::iterator it = A.begin(); - GTL::nodes_t::iterator end = A.end(); - while (it != end) - { - pot[*it] = C; - it++; - } - - it = A.begin(); - - while (it != end) - { - if (free[*it]) - augment (G, *it); - - it++; - } - - // Get edges in matching - it = B.begin(); - end = B.end(); - while (it != end) - { - GTL::edge e; - forall_out_edges (e, *it) - { - result.push_back (e); - mwbm += edge_weight[e]; - } - it++; - } - return(GTL_OK); -} - -int mwbmatching::augment(GTL::graph& G, GTL::node a) -{ - // Initialise - pred.init(G, -1); - pq = fh_alloc(G.number_of_nodes()); - - dist[a] = 0; - GTL::node best_node_in_A = a; - long minA = pot[a]; - long delta; - - std::stack > RA; - RA.push(a); - std::stack > RB; - - GTL::node a1 = a; - GTL::edge e; - - // Relax - forall_adj_edges (e, a1) - { - const GTL::node& b = e.target_(); - long db = dist[a1] + (pot[a1] + pot[b] - edge_weight[e]); - if (pred[b] == -1) - { - dist[b] = db; - pred[b] = e.id(); - RB.push(b); - - fh_insert (pq, b.id(), db); - } - else - { - if (db < dist[b]) - { - dist[b] = db; - pred[b] = e.id(); - - fh_decrease_key (pq, b.id(), db); - } - } - } - - for (;;) - { - // Find node with minimum distance db - int node_id = -1; - long db = 0; - if (pq->n != 0) - { - node_id = fh_delete_min (pq); - db = dist[node_from_id[node_id]]; - } - - if (node_id == -1 || db >= minA) - { - delta = minA; - // augmentation by best node in A - augment_path_to (G, best_node_in_A); - free[a] = false; - free[best_node_in_A] = true; - break; - } - else - { - GTL::node b = node_from_id[node_id]; - if (free[b]) - { - delta = db; - // augmentation by path to b, so a and b are now matched - augment_path_to (G, b); - free[a] = false; - free[b] = false; - break; - } - else - { - // continue shortest path computation - GTL::edge e = (*b.adj_edges_begin()); - const GTL::node& a1 = e.target_(); - pred[a1] = e.id(); - RA.push(a1); - dist[a1] = db; - - if (db + pot[a1] < minA) - { - best_node_in_A = a1; - minA = db + pot[a1]; - } - - // Relax - forall_adj_edges (e, a1) - { - const GTL::node& b = e.target_(); - long db = dist[a1] + (pot[a1] + pot[b] - edge_weight[e]); - if (pred[b] == -1) - { - dist[b] = db; - pred[b] = e.id(); - RB.push(b); - - fh_insert (pq, b.id(), db); - } - else - { - if (db < dist[b]) - { - dist[b] = db; - pred[b] = e.id(); - - fh_decrease_key (pq, b.id(), db); - } - } - } - } - } - } - - while (!RA.empty()) - { - GTL::node a = RA.top(); - RA.pop(); - pred[a] = -1; - long pot_change = delta - dist[a]; - if (pot_change <= 0) continue; - pot[a] = pot[a] - pot_change; - } - while (!RB.empty()) - { - GTL::node b = RB.top(); - RB.pop(); - pred[b] = -1; - - long pot_change = delta - dist[b]; - if (pot_change <= 0) continue; - pot[b] = pot[b] + pot_change; - } - - // Clean up - fh_free(pq); - return 0; -} - -void mwbmatching::augment_path_to (GTL::graph &/*G*/, GTL::node v) -{ - int i = pred[v]; - while (i != -1) - { - GTL::edge e = edge_from_id[i]; - e.reverse(); - i = pred[e.target()]; - } -} - -GTL::edges_t MAX_WEIGHT_BIPARTITE_MATCHING(GTL::graph &G, GTL::edge_map weights) -{ - GTL::edges_t L; - - mwbmatching mwbm; - mwbm.set_vars(weights); - - //if (mwbm.check(G) != algorithm::GTL_OK) - //{ - // cout << "Maximum weight bipartite matching algorithm check failed" << endl; - //exit(1); - //} - //else - { - if (mwbm.run(G) != GTL::algorithm::GTL_OK) - { - std::cout << "Error running maximum weight bipartite matching algorithm" << std::endl; - //exit(1); - } - else - L = mwbm.get_match(); - } - return L; -} diff --git a/src/Tracker/graph/mwbmatching.h b/src/Tracker/graph/mwbmatching.h deleted file mode 100644 index d66cfe18d..000000000 --- a/src/Tracker/graph/mwbmatching.h +++ /dev/null @@ -1,113 +0,0 @@ -// $Id: mwbmatching.h,v 1.3 2007/10/28 08:47:21 rdmp1c Exp $ - -#ifndef MWBMATCHING_H -#define MWBMATCHING_H - -#include -#include -#include "fheap.h" - -class GTL_EXTERN mwbmatching : public GTL::algorithm -{ -public: - mwbmatching (); - virtual ~mwbmatching(); - - /** - * Sets weight of every edge for maximum weight bipartite matching calculation. - * - * @param edge_weight weight of every edge. - */ - void set_vars(const GTL::edge_map& edge_weight); - - /** - * Finds a maximum weight bipartite matching of G. - * - * @param G graph. - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise. - * @see algorithm#run - */ - int run (GTL::graph& G); - - - /** - * Checks whether the preconditions for maximum weight bipartite matching are satisfied. - * - * @param G graph. - * @return algorithm::GTL_OK on success, - * algorithm::GTL_ERROR otherwise. - * @see algorithm#check - */ - virtual int check (GTL::graph& G); - - /** - * Reset. - * - * @see algorithm#reset - */ - virtual void reset () {}; - - /** - * Returns the value of the maximum weight bipartite matching for the graph G. - * - * @return maximum weight bipartite matching value - * - */ - int get_mwbm() const { return mwbm; }; - - /** - * Returns the maximum weight bipartite matching for the graph G as a list of - * edges. - * - * @return list of edges in maximum weight bipartite matching - * - */ - GTL::edges_t get_match() { return result; }; - -protected: - /** - * @internal - */ - long mwbm; - - /** - * @internal - */ - bool set_vars_executed; - - /** - * @internal - */ - GTL::edge_map edge_weight; - - GTL::edges_t result; - - GTL::node_map pot; - GTL::node_map free; - GTL::node_map dist; - GTL::node_map pred; - std::map > node_from_id; - std::map > edge_from_id; - - fheap_t *pq; - - int augment(GTL::graph& G, GTL::node a); - inline void augment_path_to (GTL::graph &G, GTL::node v); -}; - - -/** - * Wrapper around the maximum weight bipartite matching algorithm to simplify - * it's use. Note that the algorithm expects a directed graph where nodes in one - * partition are sources and nodes in the other are targets. It uses this to determine - * how to partition the nodes. - * - */ -GTL::edges_t MAX_WEIGHT_BIPARTITE_MATCHING(GTL::graph &G, GTL::edge_map weights); - - - - -#endif - diff --git a/src/Tracker/graph/mygraph.cpp b/src/Tracker/graph/mygraph.cpp deleted file mode 100644 index 94f38f968..000000000 --- a/src/Tracker/graph/mygraph.cpp +++ /dev/null @@ -1,393 +0,0 @@ -// $Id: mygraph.cpp,v 1.6 2005/03/16 09:51:26 rdmp1c Exp $ - -#include "mygraph.h" - -#ifdef __GNUC__ - #if __GNUC__ == 3 - #include - #endif -#endif -#include -#include - -void MyGraph::load_edge_info_handler (GTL::edge e, GTL::GML_pair* list) -{ - if (list) - { - // Iterate over the list of GML_pair values - struct GTL::GML_pair *p = list; - while (p) - { - switch (p->kind) - { - case GTL::GML_STRING: - store_edge_string (e, p->key, p->value.str); - break; - case GTL::GML_INT: - store_edge_integer (e, p->key, p->value.integer); - break; - case GTL::GML_DOUBLE: - store_edge_double (e, p->key, p->value.floating); - break; - default: - break; - } - p = p->next; - } - } -} - -void MyGraph::store_edge_double (GTL::edge /*e*/, char * /*key*/, double /*value*/) -{ -} - - -void MyGraph::store_edge_integer (GTL::edge e, char *key, int value) -{ - if (strcmp (key, "weight") == 0) - { - weight[e] = value; - } - -} - -void MyGraph::store_edge_string (GTL::edge e, char *key, char *value) -{ - if (strcmp (key, "label") == 0) - { - if (labels_as_weights) - { - // Treat edge label as a weight - std::istringstream iss(value); - iss >> weight[e]; - } - else - { - // Store edge label as a label - edge_label[e] = value; - } - } -} - - -void MyGraph::load_node_info_handler(GTL::node n, GTL::GML_pair* list ) -{ - if (list) - { - // Iterate over the list of GML_pair values - struct GTL::GML_pair *p = list; - while (p) - { - switch (p->kind) - { - case GTL::GML_STRING: - store_node_string (n, p->key, p->value.str); - break; - case GTL::GML_INT: - store_node_integer (n, p->key, p->value.integer); - break; - case GTL::GML_DOUBLE: - store_node_double (n, p->key, p->value.floating); - break; - default: - break; - } - p = p->next; - } - } -} - -void MyGraph::store_node_double (GTL::node /*n*/, char * /*key*/, double /*value*/) -{ -} - - -void MyGraph::store_node_integer (GTL::node /*n*/, char * /*key*/, int /*value*/) -{ -} - -void MyGraph::store_node_string (GTL::node n, char *key, char *value) -{ - if (strcmp (key, "label") == 0) - { - label[n] = value; - } -} - - -//------------------------------------------------------------------------------ -void MyGraph::save_edge_info_handler(std::ostream *os, GTL::edge e) const -{ - graph::save_edge_info_handler (os, e); - *os << "weight " << weight[e] << std::endl; - *os << "label \"" << edge_label[e] << "\"" << std::endl; - - // Line width 1 pt - *os << "graphics [" << std::endl; - *os << "width 1.0" << std::endl; - *os << "]" << std::endl; - - // Use standard Postscript font - *os << "LabelGraphics [" << std::endl; - *os << "type \"text\"" << std::endl; - *os << "font \"Helvetica\"" << std::endl; - *os << "]" << std::endl; -} - -//------------------------------------------------------------------------------ -void MyGraph::save_node_info_handler(std::ostream *os, GTL::node n) const -{ - graph::save_node_info_handler (os, n); - if (label[n] != "") - *os << "label \"" << label[n] << "\"" << std::endl; - - // Use standard Postscript font - *os << "LabelGraphics [" << std::endl; - *os << "type \"text\"" << std::endl; - *os << "font \"Helvetica\"" << std::endl; - *os << "]" << std::endl; - -} - - -//------------------------------------------------------------------------------ -void MyGraph::save_dot (char *fname, bool weights) -{ - std::ofstream f (fname); - save_dot (f, weights); - f.close (); -} - -//------------------------------------------------------------------------------ -void MyGraph::save_dot(std::ostream &f, bool weights) -{ - GTL::node_map index; - graph::node_iterator nit = nodes_begin(); - graph::node_iterator nend = nodes_end(); - int count = 0; - while (nit != nend) - { - index[*nit] = count++; - nit++; - } - - if (is_directed()) - f << "digraph"; - else - f << "graph"; - - f << " G {" << std::endl; - - // Try and make the graph look nice - f << " node [width=.2,height=.2,fontsize=10];" << std::endl; - f << " edge [fontsize=10,len=2];" << std::endl; - - // Write node labels - nit = nodes_begin(); - while (nit != nend) - { - f << " " << index[*nit] << " [label=\"" << label[*nit] << "\""; - - if (node_colour[*nit] != "white") - { - f << ", color=" << node_colour[*nit] << ", style=filled"; - } - f << "];" << std::endl; - - nit++; - } - - - // Write edges - graph::edge_iterator it = edges_begin(); - graph::edge_iterator end = edges_end(); - while (it != end) - { - f << " " << index[it->source()]; - if (is_directed()) - f << " -> "; - else - f << " -- "; - f << index[it->target()]; - - f << " ["; - - if (weights) - { - f << "label=\"" << weight[*it] << "\", "; - } - else - { - std::string s = edge_label[*it]; - if (s != "") - f << "label=\"" << s << "\", "; - } - - f << " color=" << edge_colour[*it] << "];" << std::endl; - - - - it++; - } - - f << "}" << std::endl; -} - -//------------------------------------------------------------------------------ -bool MyGraph::edge_exists (GTL::node n1, GTL::node n2) -{ - bool result = false; - - if (is_undirected ()) - { - // Visit all edges adjacent to n1 and ask whether any - // is connect to n2 - GTL::node::adj_edges_iterator eit = n1.adj_edges_begin(); - GTL::node::adj_edges_iterator eend = n1.adj_edges_end(); - GTL::node found = n1; - while ((found == n1) && (eit != eend)) - { - if (n1.opposite (*eit) == n2) - found = n2; - else - eit++; - } - if (found == n2) - { - result = true; - } - } - else - { - // Visit all edges that have n1 as their source and ask - // whether any is connected to n2 - GTL::node::out_edges_iterator eit = n1.out_edges_begin(); - GTL::node::out_edges_iterator eend = n1.out_edges_end(); - GTL::node found = n1; - while ((found == n1) && (eit != eend)) - { - if (n1.opposite (*eit) == n2) - found = n2; - else - eit++; - } - if (found == n2) - { - result = true; - } - } - - return result; -} - -//------------------------------------------------------------------------------ -void MyGraph::delete_edge (GTL::node n1, GTL::node n2) -{ - GTL::edge e; - bool exists = false; - - if (is_undirected ()) - { - // Visit all edges adjacent to n1 and ask whether any - // is connect to n2 - GTL::node::adj_edges_iterator eit = n1.adj_edges_begin(); - GTL::node::adj_edges_iterator eend = n1.adj_edges_end(); - GTL::node found = n1; - while ((found == n1) && (eit != eend)) - { - if (n1.opposite (*eit) == n2) - { - found = n2; - e = *eit; - } - else - eit++; - } - exists = (found == n2); - } - else - { - // Visit all edges that have n1 as their source and ask - // whether any is connected to n2 - GTL::node::out_edges_iterator eit = n1.out_edges_begin(); - GTL::node::out_edges_iterator eend = n1.out_edges_end(); - GTL::node found = n1; - while ((found == n1) && (eit != eend)) - { - if (n1.opposite (*eit) == n2) - { - found = n2; - e = *eit; - } - else - eit++; - } - exists = (found == n2); - } - - if (exists) - del_edge(e); -} - - -double MyGraph::node_cliqueishness (GTL::node &n) -{ - double c = 1.0; - - int numneighbours = n.degree(); - int possconnections = numneighbours * (numneighbours - 1) / 2; - int actualconnections = 0; - if (possconnections > 0) - { - // Build list of all nodes adjacent to n (n's neighbours) - GTL::nodes_t neighbours; - - GTL::node::adj_nodes_iterator nit = n.adj_nodes_begin (); - GTL::node::adj_nodes_iterator nend = n.adj_nodes_end (); - while (nit != nend) - { - GTL::node ne = (*nit); - neighbours.push_back (ne); - nit++; - } - - // Count number of edges between neighbours - GTL::nodes_t::iterator i = neighbours.begin(); - GTL::nodes_t::iterator iend = neighbours.end(); - while (i != iend) - { - GTL::nodes_t::iterator j = i; - j++; - - while (j != iend) - { - if (edge_exists (*i, *j)) - actualconnections++; - j++; - } - i++; - } - c = (double) actualconnections / (double) possconnections; - } - return c; -} - -double MyGraph::cliqueishness () -{ - double sum = 0.0; - graph::node_iterator nit = nodes_begin(); - graph::node_iterator nend = nodes_end(); - - while (nit != nend) - { - GTL::node n = (*nit); - sum += node_cliqueishness (n); - nit++; - } - - return ( sum / (double)number_of_nodes() ); -} - - - - diff --git a/src/Tracker/graph/mygraph.h b/src/Tracker/graph/mygraph.h deleted file mode 100644 index e2b87c674..000000000 --- a/src/Tracker/graph/mygraph.h +++ /dev/null @@ -1,215 +0,0 @@ - // $Id: mygraph.h,v 1.6 2006/01/02 16:03:14 rdmp1c Exp $ - -#ifndef MYGRAPH_H -#define MYGRAPH_H - -// STL -#include -#include -#include -#include - -// GTL -#include - - - -/** - * @class MyGraph - * MyGrap extends the GTL class graph to provide support for graphs - * with weighted edges. - * - */ -class MyGraph : public GTL::graph -{ -public: - MyGraph () { labels_as_weights = false; }; - - int get_edge_weight (GTL::edge e) const { return weight[e]; }; - void set_edge_weight (GTL::edge e, int w) { weight[e] = w; }; - - - std::string get_edge_label(GTL::edge e) const { return edge_label[e]; }; - void set_edge_label(GTL::edge e, std::string s) { edge_label[e] = s; }; - - /** - * Returns true if an edge exists between a pair of nodes. - * - * @param n1 a node. - * @param n2 another node. - */ - virtual bool edge_exists (GTL::node n1, GTL::node n2); - - - /** - * Delete the edge (if it exists) between a pair of nodes. - * - * @param n1 a node. - * @param n2 another node. - */ - virtual void delete_edge (GTL::node n1, GTL::node n2); - - - /** - * Sets the labels_as_weights flag to true (the default is false). - * If this flag is set the load_edge_info_handler will - * read any labels associated with an edge in a GML file an integer - * weight. This is useful if you want to import a GML graph generated - * by LEDA. - * - */ - void read_labels_as_weights () { labels_as_weights = true; }; - - /** - * Extends graph::load_edge_info_handler to read in edge weights. These are - * stored in the list of key-value-pairs, where the key is the sttring "weight" - * and the value is the integer weight of that edge. - * - * @param e edge parsed - * @param list pointer to the list of key-value-pairs of - * this edge. - * @see graph#load_edge_info_handler - */ - - virtual void load_edge_info_handler (GTL::edge e, GTL::GML_pair* list); - - virtual void store_edge_double (GTL::edge e, char *key, double value); - - /** - * Handles an integer value associated with an edge. By default, it - * process the "weight" key by storing value in the - * weight edge map. This method is called by - * load_edge_info_handler. - * @param n the node - * @param key the name of the item (e.g., "weight") - * @param value the contents of the key (e.g., "5") - * - */ - virtual void store_edge_integer (GTL::edge e, char *key, int value); - /** - * Handles a string value associated with an edge. By default, it - * process the "label" key by storing value in the - * label edge map. If labels_as_weights - * is true, then converts label to integer and - * sets the edge weight to that value. This method is called by - * load_edge_info_handler. - * @param e the edge - * @param key the name of the item (e.g., "label") - * @param value the contents of the key (e.g., "branch") - * - */ - virtual void store_edge_string (GTL::edge e, char *key, char *value); - - - /** - * Extends graph::post_new_edge_handler to ensure by default edge has weight 1, - * and edge_label is "". - * - * @param e created edge - * @see graph#new_edge - */ - virtual void post_new_edge_handler(GTL::edge /*e*/) { - //weight[e] = 1; - //edge_label[e] = ""; - //edge_colour[e] = "black"; - } - - /** - * Extends graph::save_edge_info_handler to write the weight of the edge - * as a label when saving the graph to a GML file. - * @param ostream the stream being written to - * @param e the edge - * - */ - virtual void save_edge_info_handler(std::ostream *os, GTL::edge e) const; - - - /** - * Extends graph::load_node_info_handler iterator over the list - * of values associated with node n (if any) in the GML file. After - * determining the type of the associated value (integer, floating - * point, or string), the method calls the appropriate handler from - * store_node_double, store_node_double, or store_node_string. - * @param n the node being read - * @param list pointer to the list of paired values - * - */ - virtual void load_node_info_handler(GTL::node n, GTL::GML_pair* list ); - - - virtual void store_node_double (GTL::node n, char *key, double value); - - virtual void store_node_integer (GTL::node n, char *key, int value); - /** - * Handles a string value associated with a node. By default, it - * process the "label" key by storing value in the - * label node map. This method is called by - * load_node_info_handler. - * @param n the node - * @param key the name of the item (e.g., "label") - * @param value the contents of the key (e.g., "root") - * - */ - virtual void store_node_string (GTL::node n, char *key, char *value); - - /** - * Extends graph::post_new_node_handler to ensure by default node label is "". - * - * @param n created node - * @see graph#new_node - */ - virtual void post_new_node_handler(GTL::node /*n*/) - { - //label[n] = ""; - //node_colour[n] = "white"; - } - - virtual void save_node_info_handler(std::ostream *os, GTL::node n) const; - - /** - * @param f output stream - * - * Write the graph in dot format (as used by programs in the - * GraphViz - * package. - */ - virtual void save_dot(std::ostream &f, bool weights = false); - - /** - * @param fname output file name - * - * Write the graph in dot format (as used by programs in the - * GraphViz - * package. - */ - virtual void save_dot (char *fname, bool weights = false); - - std::string get_node_label(GTL::node n) { return label[n]; }; - void set_node_label(GTL::node n, std::string s) { label[n] = s; }; - - void set_edge_colour(GTL::edge e, std::string colour) { edge_colour[e] = colour; }; - void set_node_colour(GTL::node n, std::string colour) { node_colour[n] = colour; }; - - - double node_cliqueishness (GTL::node &n); - double cliqueishness (); - - -protected: - /** - * A map between edges and an integer weight, being the weight of that edge - * in the graph. - * - */ - GTL::edge_map weight; - GTL::edge_map edge_label; - GTL::node_map label; - - bool labels_as_weights; - - // Styles - GTL::node_map node_colour; - GTL::edge_map edge_colour; -}; - -#endif diff --git a/src/Tracker/graph/mytree.cpp b/src/Tracker/graph/mytree.cpp deleted file mode 100644 index f5371c0f8..000000000 --- a/src/Tracker/graph/mytree.cpp +++ /dev/null @@ -1,90 +0,0 @@ -// $Id: mytree.cpp,v 1.2 2005/08/16 12:22:52 rdmp1c Exp $ - - -#include "mytree.h" - -#include -#include - -bool is_tree (const GTL::graph& G) -{ - GTL::node v; - forall_nodes(v,G) - if ( v.indeg () > 1 ) return false; // nonunique parent - return ( G.number_of_nodes() == G.number_of_edges() + 1 - && G.is_connected() ); -} - -bool MyTree::is_root( const GTL::node v ) const -{ - return (v.indeg() == 0); -} - -bool MyTree::is_leaf( const GTL::node v ) const -{ - return (v.outdeg() == 0); -} - -GTL::node MyTree::parent( const GTL::node v ) const -{ - if (v.indeg() == 0) return v; - GTL::edge e = (*v.in_edges_begin()); - return e.source(); -} - -GTL::node MyTree::root() const -{ - GTL::node v = (*nodes_begin()); - while (!is_root(v)) - v = parent(v); - return v; -} - -void MyTree::postorder_traversal() -{ - std::stack < GTL::node, std::vector > S; - S.push (root()); - int num = 1; - do { - GTL::node v = S.top(); - S.pop(); - int n = number_of_nodes() - num++ + 1; // order in which node is visited in postorder - order[v] = n; - number[n] = v; - -// cout << label[v] << " " << order[v] << endl; - - // store info about order here... - - - - GTL::node::adj_nodes_iterator it = v.adj_nodes_begin(); - GTL::node::adj_nodes_iterator end = v.adj_nodes_end(); - while (it != end) - { - S.push (*it); - it++; - } - } while ( !S.empty() ); -} - - - -GTL::node MyTree::get_left_child(const GTL::node v) const -{ - return (*(v.adj_nodes_begin())); -} - -GTL::node MyTree::get_right_child(const GTL::node v) const -{ - GTL::node right; - GTL::node::adj_nodes_iterator it = v.adj_nodes_begin(); - GTL::node::adj_nodes_iterator end = v.adj_nodes_end(); - while (it != end) - { - right = *it; - it++; - } - return right; -} - diff --git a/src/Tracker/graph/mytree.h b/src/Tracker/graph/mytree.h deleted file mode 100644 index 89b493955..000000000 --- a/src/Tracker/graph/mytree.h +++ /dev/null @@ -1,60 +0,0 @@ -// $Id: mytree.h,v 1.2 2005/08/16 12:22:53 rdmp1c Exp $ - -#ifndef MYTREE_H -#define MYTREE_H - - -/* This code is heavily based on the code provided by Gabriel Valiente for - his book "Algorithms on Trees and Graphs" (Berlin: Springer-Verlag, 2002). - I've modified it to call GTL rather than LEDA functions. -*/ - - - -/* - To do: - - LCA functions - Preorder - Inorder - Visitation numbers - Bottom up - Height - Depth - etc. - -*/ - -#include "mygraph.h" - - -// Test whether graph is a tree -bool is_tree (const GTL::graph& G); - -class MyTree : public MyGraph -{ -public: - MyTree () { }; - - GTL::node parent( const GTL::node v ) const; - GTL::node root() const; - - bool is_root( const GTL::node v ) const; - bool is_leaf( const GTL::node v ) const; - - GTL::node get_left_child(const GTL::node v) const; - GTL::node get_right_child(const GTL::node v) const; - - void postorder_traversal(); - - int postorder (const GTL::node v) const { return order[v]; }; - -protected: - GTL::node_map order; - std::map > number; -}; - - - -#endif - diff --git a/src/Tracker/graph/rings.cpp b/src/Tracker/graph/rings.cpp deleted file mode 100644 index c9093c8aa..000000000 --- a/src/Tracker/graph/rings.cpp +++ /dev/null @@ -1,422 +0,0 @@ -// $Id: rings.cpp,v 1.1 2005/01/07 23:01:20 rdmp1c Exp $ - -// Output RINGS representation of a GML tree -// See "RINGS: A Technique for Visualizing Large Hierarchies" -// http://www.cs.ucdavis.edu/~ma/papers/graph2.pdf - - -#include -#include -#include - -#include -#include -#include - -#include "gport.h" - -#include "mygraph.h" -#include "mytree.h" - -#include - -GPostscriptPort g; - - -clock_t t0; -clock_t t1; - -void ShowTimeUsed (clock_t &t0, clock_t &t1); - -void ShowTimeUsed (clock_t &t0, clock_t &t1) -{ - cout << "CPU time used = " << (float)(t1 - t0)/(float)CLOCKS_PER_SEC << " seconds" << endl; -} - -node_map num_children; -node_map num_grandchildren; -node_map R1; -node_map pt; -node_map outer_ring; - - -class CompareNodes { -public: - bool operator() (node x, node y) - { - return num_children[x] < num_children[y]; - } -}; - - -// Get numbers of children and grandchildren -class mydfs : public dfs -{ -public: - mydfs () : dfs () { }; - virtual void entry_handler (graph &G, node &n, node &f) - { - num_children[n] = n.outdeg(); - num_grandchildren[n] = 0; - } - virtual void leave_handler (graph &G, node &n, node &f) - { - if (n.outdeg() > 0) - { - node::adj_nodes_iterator it = n.adj_nodes_begin(); - while (it != n.adj_nodes_end()) - { - num_grandchildren[n] += num_children[(*it)]; - it++; - } - } - } -}; - -#ifndef M_PI - #define M_PI 3.14159265358979323846 // define pi -#endif - -#define SQR(X) ((X) * (X)) - - -double f (double &R1, double &R2, int n); - -double f (double &R1, double &R2, int n) -{ - double theta = M_PI/double(n); - double fn = SQR(1 - sin(theta)) / SQR (1 + sin(theta)); - R2 = sqrt (fn * SQR(R1)); - return fn; -} - - -// Compute and draw layout ussing a breadth first search -class mybfs : public bfs -{ -public: - mybfs () : bfs () { }; - virtual void popped_node_handler (graph &G, node &n) - { - if ((n.outdeg() > 0) && (num_grandchildren[n] > 0)) - { - // Colour map by level in graph - switch (level(n)) - { - case 0: g.SetFillColorRGB (125,126,190); break; - case 1: g.SetFillColorRGB (202,154,152); break; - case 2: g.SetFillColorRGB (178,219,178); break; - case 3: g.SetFillColorRGB (255,179,179); break; - case 4: g.SetFillColorRGB (225,224,179); break; - case 5: g.SetFillColorRGB (255,178,255); break; - default: g.SetFillColorRGB (192,192,192); break; - } - - - // Create a list of the children sorted by their number of children - double total_grandchildren = 0.0; - priority_queue , CompareNodes> p; - node::adj_nodes_iterator it = n.adj_nodes_begin(); - while (it != n.adj_nodes_end()) - { - p.push(*it); - outer_ring[*it] = false; - total_grandchildren += (double)num_children[*it]; - it++; - } - - - // Find how many children to put in the two rings - double R2; - double sum_grandchildren = 0.0; - int count = 0; - int outer = num_children[n]; // number in outer ring - int inner = 0; // number in inner ring - - // We look at the sorted list of children and - // put the split between inner and outer rings at the point where - // the fraction of children yet to be included is less than the - // amount of space available in an inner ring if count rings are - // in the outer ring. It is possible that there won't be an inner ring, - // in which case inner is 0. - // We use the node map outer_ring to classify children by which ring they - // are assigned to. - while (!p.empty()) - { - - node x = p.top(); - outer_ring[x] = (inner == 0); - count++; - - sum_grandchildren += (double)num_children[x]; - - double fraction = 1.0 - (sum_grandchildren / total_grandchildren); - - double fk = f(R1[n], R2, count); - - if ((count > 2) && (inner == 0)) - { - if (fraction < fk) - { - inner = outer - count; - outer = count; - } - } - p.pop(); - } - - // Compute radius of children in outer ring - double fn = f(R1[n], R2, outer); - double r_outer = (R1[n] - R2)/2.0; - double theta_outer = M_PI/double(outer); - - - // Compute radius of children in inner ring (if any) - double r_inner = 0.0; - double R3 = 0.0; - double theta_inner = 0.0; - if (inner > 0) - { - fn = f(R2, R3, inner); - r_inner = (R2 - R3)/2.0; - theta_inner = M_PI/double(inner); - } - int inner_count = 0; - int outer_count = 0; - it = n.adj_nodes_begin(); - while (it != n.adj_nodes_end()) - { - if (outer_ring[*it]) - { - R1[*it] = r_outer; - int offset_x = (int)((R1[*it] + R2) * cos(2 * theta_outer * outer_count)); - int offset_y = (int)((R1[*it] + R2) * sin(2 * theta_outer * outer_count)); - outer_count++; - - // Draw!!! - GPoint p = pt[n]; - p.Offset (offset_x, offset_y); - pt[*it] = p; - g.DrawLinePts (pt[n], pt[*it]); - //g.DrawCircle (pt[*it], R1[*it]); - - // Centre - pt[*it] = p; - - - - } - else - { - - R1[*it] = r_inner; - int offset_x = (int)((R1[*it] + R3) * cos(2 * theta_inner * inner_count)); - int offset_y = (int)((R1[*it] + R3) * sin(2 * theta_inner * inner_count)); - inner_count++; - - // Draw!!! - GPoint p = pt[n]; - p.Offset (offset_x, offset_y); - pt[*it] = p; - g.DrawLinePts (pt[n], pt[*it]); - //g.DrawCircle (pt[*it], R1[*it]); - - // Centre - pt[*it] = p; - - } - - - it++; - } - - - - } - else - { - // leaf, or node with no grandchildren (i.e., a star) - if (n.outdeg() > 0) - { - //g.DrawCircle (pt[n], R1[n]); - - - double theta = (2 * M_PI)/double(n.outdeg()); - - double radius = R1[n] * 0.9; - - double gap = fabs (sin(theta) * radius); - - // If the gap between two edges is too small to be easily visible - // we draw a filled circle - if ((gap < 2.0) && (n.outdeg() > 1)) - { - g.FillCircle (pt[n], (int)radius); - } - else - { - int count = 0; - node::adj_nodes_iterator it = n.adj_nodes_begin(); - while (it != n.adj_nodes_end()) - { - int offset_x = (int)(radius * cos(theta*count)); - int offset_y = (int)(radius * sin(theta*count)); - GPoint p = pt[n]; - p.Offset (offset_x, offset_y); - pt[*it] = p; - g.DrawLinePts (pt[n], pt[*it]); - count++; - it++; - } - - } - } - - } - } - virtual void finished_handler (graph &G, node &n) - { - } -}; - - - - -int main (int argc, const char * argv[]) -{ - if (argc < 2) - { - cout << "Usage: graph " << endl; - exit(1); - } - char filename[256]; - strcpy (filename, argv[1]); - - // --------------------------------------------------------- - // Read graph - - MyTree G; - - G.read_labels_as_weights(); - t0 = clock(); - GML_error err = G.load (filename); - t1 = clock(); - if (err.err_num != GML_OK) - { - cerr << "Error (" << err.err_num << ") loading graph from file \"" << filename << "\""; - switch (err.err_num) - { - case GML_FILE_NOT_FOUND: cerr << "A file with that name doesn't exist."; break; - case GML_TOO_MANY_BRACKETS: cerr << "A mismatch of brackets was detected, i.e. there were too many closing brackets (])."; break; - case GML_OPEN_BRACKET: cerr << "Now, there were too many opening brackets ([)"; break; - case GML_TOO_MANY_DIGITS: cerr << "The number of digits a integer or floating point value can have is limited to 1024, this should be enough :-)"; break; - case GML_PREMATURE_EOF: cerr << "An EOF occured, where it wasn't expected, e.g. while scanning a string."; break; - case GML_SYNTAX: cerr << "The file isn't a valid GML file, e.g. a mismatch in the key-value pairs."; break; - case GML_UNEXPECTED: cerr << "A character occured, where it makes no sense, e.g. non-numerical characters"; break; - case GML_OK: break; - } - cerr << endl; - exit(1); - } - else - { - cout << "Graph read from file \"" << filename << "\" has " << G.number_of_nodes() << " nodes and " << G.number_of_edges() << " edges" << endl; - } - ShowTimeUsed (t0, t1); - - // --------------------------------------------------------- - // Test that it is a tree - if (is_tree (G)) - { - cout << "Is a tree" << endl; - } - else - { - cout << "Graph is not a tree" << endl; - node v; - forall_nodes(v,G) - if ( v.indeg () < 1 ) cout << G.get_node_label(v) << " has no parent" << endl; - if (!G.is_connected() ) cout << "Not connected"; - - exit(1); - } - - node root = G.root(); - cout << "Root = " << root << " " << "\"" << G.get_node_label (root) << "\"" << endl; - - cout << "Computing layout..." << endl; - t0 = clock(); - - bfs b; - b.start_node (G.root()); - b.calc_level(true); - if (b.check(G) != algorithm::GTL_OK) - { - cerr << "bfs check failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - else - { - if (b.run(G) != algorithm::GTL_OK) - { - cerr << "bfs algorithm failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - } - - // dfs - mydfs d; - d.start_node (G.root()); - if (d.check(G) != algorithm::GTL_OK) - { - cerr << "dfs check failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - else - { - if (d.run(G) != algorithm::GTL_OK) - { - cerr << "dfs algorithm failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - } - - char picture_filename[256]; - strcpy (picture_filename, filename); - strcat (picture_filename, ".ps"); - g.StartPicture (picture_filename); - g.SetPenWidth(1); - - - - R1[G.root()] = 200.0; - GPoint centre(200,200); - pt[G.root()] = centre; - - mybfs layout; - layout.start_node (G.root()); - layout.calc_level(true); - if (layout.check(G) != algorithm::GTL_OK) - { - cerr << "bfs check failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - else - { - if (layout.run(G) != algorithm::GTL_OK) - { - cerr << "bfs algorithm failed at " << __LINE__ << " in " << __FILE__ << endl; - exit(1); - } - } - - g.EndPicture (); - - t1 = clock(); - ShowTimeUsed (t0, t1); - - return 0; -} - - - diff --git a/src/Tracker/graph/script.cpp b/src/Tracker/graph/script.cpp deleted file mode 100644 index 02a41804c..000000000 --- a/src/Tracker/graph/script.cpp +++ /dev/null @@ -1,168 +0,0 @@ -// $Id: script.cpp,v 1.1 2005/03/16 09:52:54 rdmp1c Exp $ - -// -// Simple program to apply an edit script to aa graph -// - - -#include -#include -#include - -#include -#include - -#include "mygraph.h" -#include "tokenise.h" - - - -int main (int argc, const char * argv[]) -{ - if (argc < 2) - { - cout << "Usage: script " << endl; - exit(1); - } - char filename[256]; - strcpy (filename, argv[1]); - - // --------------------------------------------------------- - // Read graph - - MyGraph G; - - GML_error err = G.load (filename); - if (err.err_num != GML_OK) - { - cerr << "Error (" << err.err_num << ") loading graph from file \"" << filename << "\""; - switch (err.err_num) - { - case GML_FILE_NOT_FOUND: cerr << "A file with that name doesn't exist."; break; - case GML_TOO_MANY_BRACKETS: cerr << "A mismatch of brackets was detected, i.e. there were too many closing brackets (])."; break; - case GML_OPEN_BRACKET: cerr << "Now, there were too many opening brackets ([)"; break; - case GML_TOO_MANY_DIGITS: cerr << "The number of digits a integer or floating point value can have is limited to 1024, this should be enough :-)"; break; - case GML_PREMATURE_EOF: cerr << "An EOF occured, where it wasn't expected, e.g. while scanning a string."; break; - case GML_SYNTAX: cerr << "The file isn't a valid GML file, e.g. a mismatch in the key-value pairs."; break; - case GML_UNEXPECTED: cerr << "A character occured, where it makes no sense, e.g. non-numerical characters"; break; - case GML_OK: break; - } - cerr << endl; - exit(1); - } - else - { - cout << "Graph read from file \"" << filename << "\" has " << G.number_of_nodes() << " nodes and " << G.number_of_edges() << " edges" << endl; - } - - - // Output starting graph - G.save_dot ("start.dot"); - - - // 1. Get map between labels and nodes - std::map < std::string, GTL::node, std::less > l; - node n; - forall_nodes (n, G) - { - l[G.get_node_label(n)] = n; - } - - // Read edit script - { - ifstream f ("script.txt"); - char buf[512]; - - while (f.good()) - { - f.getline (buf, sizeof(buf)); - - if (f.good()) - { - - // Break line into tokens - std::vector tokens; - std::string s = buf; - std::string delimiters = "|"; - Tokenise (s, delimiters, tokens); - - if (tokens[0] == "delete") - { - if (tokens[1] == "node") - { - string name = tokens[2]; - if (l.find(name) == l.end()) - { - cout << "Node labelled \"" << name << "\" not found" << endl; - exit(1); - } - else - { - cout << "Node \"" << name << "\" deleted" << endl; - G.del_node(l[name]); - l.erase(name); - } - } - else if (tokens[1] == "branch") - { - string source = tokens[2]; - string target = tokens[3]; - if (l.find(source) == l.end()) - { - cout << "Node labelled \"" << source << "\" not found" << endl; - exit(1); - } - if (l.find(target) == l.end()) - { - cout << "Node labelled \"" << target << "\" not found" << endl; - exit(1); - } - cout << "Edge \"" << source << "\"-->\"" << target << "\" deleted" << endl; - G.delete_edge(l[source], l[target]); - } - } - if (tokens[0] == "insert") - { - if (tokens[1] == "node") - { - string name = tokens[2]; - node n = G.new_node(); - G.set_node_label (n, name); - G.set_node_colour (n, "green"); - l[name] = n; - cout << "Node \"" << name << "\" inserted" << endl; - } - else if (tokens[1] == "branch") - { - string source = tokens[2]; - string target = tokens[3]; - if (l.find(source) == l.end()) - { - cout << "Node labelled \"" << source << "\" not found" << endl; - exit(1); - } - if (l.find(target) == l.end()) - { - cout << "Node labelled \"" << target << "\" not found" << endl; - exit(1); - } - - edge e = G.new_edge (l[source], l[target]); - G.set_edge_colour (e, "green"); - - cout << "Edge \"" << source << "\"-->\"" << target << "\" added" << endl; - } - } - - } - - } - f.close(); - } - - G.save_dot ("end.dot"); - - - - return 0; -} diff --git a/src/Tracker/graph/tokenise.cpp b/src/Tracker/graph/tokenise.cpp deleted file mode 100644 index f252ba28c..000000000 --- a/src/Tracker/graph/tokenise.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * tokenise.cpp - * taxonnames - * - * Created by Roderic Page on Fri Apr 04 2003. - * Copyright (c) 2001 __MyCompanyName__. All rights reserved. - * - */ - -#include "tokenise.h" - -void Tokenise (std::string s, std::string delimiters, std::vector &tokens) -{ - tokens.erase (tokens.begin(), tokens.end()); - int start, stop; - int n = s.length(); - start = s.find_first_not_of (delimiters); - while ((start >= 0) && (start < n)) - { - stop = s.find_first_of (delimiters, start); - if ((stop < 0) || (stop > n)) stop = n; - tokens.push_back (s.substr(start, stop - start)); - start = stop + delimiters.length(); - } -} - - diff --git a/src/Tracker/graph/tokenise.h b/src/Tracker/graph/tokenise.h deleted file mode 100644 index eedfd9f0f..000000000 --- a/src/Tracker/graph/tokenise.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * tokenise.h - * taxonnames - * - * Created by Roderic Page on Fri Apr 04 2003. - * Copyright (c) 2001 __MyCompanyName__. All rights reserved. - * - */ - -#ifndef TOKENISE_H -#define TOKENISE_H - -#include -#include -#include - -void Tokenise (std::string s, std::string delimiters, std::vector &tokens); - -#endif diff --git a/src/Tracker/ldes/correlation.cpp b/src/Tracker/ldes/correlation.cpp deleted file mode 100644 index 0e51d4a49..000000000 --- a/src/Tracker/ldes/correlation.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "correlation.h" -#include "fft_functions.h" - -cv::Mat gaussianCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel, float sigma) { - cv::Mat xy = cv::Mat(cv::Size(w, h), CV_32F, cv::Scalar(0)); - cv::Mat xy_temp; - cv::Mat x; - cv::Mat y; - double xx=0, yy=0; - for (int i = 0; i < channel; i++) { - x = x1.row(i).reshape(1, h); // Procedure do deal with cv::Mat multichannel bug - y = x2.row(i).reshape(1, h); - xx +=cv::norm(x)*cv::norm(x) ; - yy += cv::norm(y)*cv::norm(y) ; - cv::mulSpectrums(fftd(x), fftd(y), xy_temp, 0, true); - xy_temp = fftd(xy_temp, true); - rearrange(xy_temp); //rearange or not? Doesn't matter - xy_temp.convertTo(xy_temp, CV_32F); - xy += xy_temp; - } - cv::Mat d; - cv::max(((xx + yy) - 2. * xy) / (w * h * channel), 0, d); - - cv::Mat k; - cv::exp((-d / (sigma * sigma)), k); - return fftd(k); - - //cv::Mat c = cv::Mat(cv::Size(w,h), CV_32F, cv::Scalar(0)); - //cv::Mat caux; - //cv::Mat x1aux; - //cv::Mat x2aux; - //for (int i = 0; i < channel; i++) { - // x1aux = x1.row(i); // Procedure do deal with cv::Mat multichannel bug - // x1aux = x1aux.reshape(1, h); - // x2aux = x2.row(i).reshape(1, h); - // cv::mulSpectrums(fftd(x1aux), fftd(x2aux), caux, 0, true); - // caux = fftd(caux, true); - // rearrange(caux); - // caux.convertTo(caux, CV_32F); - // c = c + real(caux); - //} - // - //cv::Mat d; - //cv::max(((cv::sum(x1.mul(x1))[0] + cv::sum(x2.mul(x2))[0]) - 2. * c) / (h*w*channel), 0, d); - - //cv::Mat k; - //cv::exp((-d / (sigma * sigma)), k); - //return fftd(k); -} - -cv::Mat linearCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel) { - cv::Mat xy = cv::Mat(cv::Size(w, h), CV_32FC2, cv::Scalar(0)); - cv::Mat xy_temp; - cv::Mat x; - cv::Mat y; - for (int i = 0; i < channel; i++) { - x = x1.row(i).reshape(1, h);; - y = x2.row(i).reshape(1, h); - cv::mulSpectrums(fftd(x), fftd(y), xy_temp, 0, true); - xy = xy + xy_temp; - } - xy.convertTo(xy, CV_32F); - return xy / (h*w*channel); -} - -cv::Mat polynomialCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel) { - cv::Mat xy = cv::Mat(cv::Size(w, h), CV_32F, cv::Scalar(0)); - cv::Mat xy_temp; - cv::Mat x; - cv::Mat y; - for (int i = 0; i < channel; i++) { - x = x1.row(i).reshape(1, h);; - y = x2.row(i).reshape(1, h); - cv::mulSpectrums(fftd(x), fftd(y), xy_temp, 0, true); - //rearrange(caux); //rearange or not? - //caux.convertTo(caux, CV_32F); - xy_temp = fftd(xy_temp, true); - xy_temp.convertTo(xy_temp, CV_32F); - xy = xy + xy_temp; - } - cv::Mat k; - cv::pow(xy / (h*w*channel) + 1, 9, k); //polynomal - return fftd(k); -} - -cv::Mat phaseCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel) { - cv::Mat xy = cv::Mat(h, w, CV_32FC2, cv::Scalar(0)); - cv::Mat xy_temp; - cv::Mat x; - cv::Mat y; - cv::Mat d; - cv::Mat d2; - for (int i = 0; i < channel; i++) { - x = x1.row(i).reshape(1, h);; - y = x2.row(i).reshape(1, h); - cv::mulSpectrums(fftd(x), fftd(y), xy_temp, 0, true); - cv::mulSpectrums(xy_temp, xy_temp, d, 0, true); - cv::sqrt(real(d), d); - d += 2.2204e-16; - //d = complexDivision(xy_temp, d); - std::vector planes = { d,d }; - cv::merge(planes, d2); - cv::divide(xy_temp, d2, xy_temp); - //xy_temp = complexDivision(xy_temp, d2); - xy_temp.convertTo(xy_temp, CV_32F); - xy += xy_temp; - } - return xy; -} \ No newline at end of file diff --git a/src/Tracker/ldes/correlation.h b/src/Tracker/ldes/correlation.h deleted file mode 100644 index 20ed45532..000000000 --- a/src/Tracker/ldes/correlation.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once -#include -#include - -cv::Mat gaussianCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel, float sigma); - -cv::Mat linearCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel); - -cv::Mat polynomialCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel); - -cv::Mat phaseCorrelation(cv::Mat& x1, cv::Mat& x2, int h, int w, int channel); - diff --git a/src/Tracker/ldes/fft_functions.cpp b/src/Tracker/ldes/fft_functions.cpp deleted file mode 100644 index 16007edbc..000000000 --- a/src/Tracker/ldes/fft_functions.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "fft_functions.h" - -cv::Mat fftd(const cv::Mat& img, bool reverse) { - cv::Mat tmp; - if (img.channels() == 1) - { - std::vector planes = { cv::Mat_(img), cv::Mat_::zeros(img.size()) }; - //cv::Mat planes[] = {cv::Mat_ (img), cv::Mat_::zeros(img.size())}; - cv::merge(planes, tmp); - } - else - tmp = img; - cv::dft(tmp, tmp, reverse ? (cv::DFT_INVERSE | cv::DFT_SCALE | cv::DFT_REAL_OUTPUT) : cv::DFT_COMPLEX_OUTPUT); - - return tmp; -} - -cv::Mat real(const cv::Mat& img) { - std::vector planes; - cv::split(img, planes); - return planes[0]; -} - -cv::Mat imag(const cv::Mat& img) { - std::vector planes; - cv::split(img, planes); - return planes[1]; -} - -cv::Mat magnitude(const cv::Mat& img) { - cv::Mat res; - std::vector planes; - cv::split(img, planes); // planes[0] = Re(DFT(I), planes[1] = Im(DFT(I)) - if (planes.size() == 1) res = cv::abs(img); - else if (planes.size() == 2) cv::magnitude(planes[0], planes[1], res); // planes[0] = magnitude - else assert(0); - return res; -} - -cv::Mat complexMultiplication(const cv::Mat& a, const cv::Mat& b) { - std::vector pa; - std::vector pb; - cv::split(a, pa); - cv::split(b, pb); - - std::vector pres; - pres.push_back(pa[0].mul(pb[0]) - pa[1].mul(pb[1])); - pres.push_back(pa[0].mul(pb[1]) + pa[1].mul(pb[0])); - - cv::Mat res; - cv::merge(pres, res); - - return res; -} - -cv::Mat complexDivision(const cv::Mat& a, const cv::Mat& b) { - std::vector pa; - std::vector pb; - cv::split(a, pa); - cv::split(b, pb); - - cv::Mat divisor = 1. / (pb[0].mul(pb[0]) + pb[1].mul(pb[1])); - - std::vector pres; - - pres.push_back((pa[0].mul(pb[0]) + pa[1].mul(pb[1])).mul(divisor)); - pres.push_back((pa[1].mul(pb[0]) + pa[0].mul(pb[1])).mul(divisor)); - - cv::Mat res; - cv::merge(pres, res); - return res; -} - -void rearrange(cv::Mat& img) { - int cx = img.cols / 2; - int cy = img.rows / 2; - - cv::Mat q0(img, cv::Rect(0, 0, cx, cy)); // Top-Left - Create a ROI per quadrant - cv::Mat q1(img, cv::Rect(cx, 0, cx, cy)); // Top-Right - cv::Mat q2(img, cv::Rect(0, cy, cx, cy)); // Bottom-Left - cv::Mat q3(img, cv::Rect(cx, cy, cx, cy)); // Bottom-Right - - cv::Mat tmp; // swap quadrants (Top-Left with Bottom-Right) - q0.copyTo(tmp); - q3.copyTo(q0); - tmp.copyTo(q3); - q1.copyTo(tmp); // swap quadrant (Top-Right with Bottom-Left) - q2.copyTo(q1); - tmp.copyTo(q2); -} \ No newline at end of file diff --git a/src/Tracker/ldes/fft_functions.h b/src/Tracker/ldes/fft_functions.h deleted file mode 100644 index c8f2359ef..000000000 --- a/src/Tracker/ldes/fft_functions.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once -#include -#include - -cv::Mat fftd(const cv::Mat& img, bool reverse = false); - -cv::Mat real(const cv::Mat& img); - -cv::Mat imag(const cv::Mat& img); - -cv::Mat magnitude(const cv::Mat& img); - -cv::Mat complexMultiplication(const cv::Mat& a, const cv::Mat& b); - -cv::Mat complexDivision(const cv::Mat& a, const cv::Mat& b); - -void rearrange(cv::Mat& img); \ No newline at end of file diff --git a/src/Tracker/ldes/fhog.cpp b/src/Tracker/ldes/fhog.cpp deleted file mode 100644 index b2445264f..000000000 --- a/src/Tracker/ldes/fhog.cpp +++ /dev/null @@ -1,485 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - - -//Modified from latentsvm module's "lsvmc_featurepyramid.cpp". - -//#include "precomp.hpp" -//#include "_lsvmc_latentsvm.h" -//#include "_lsvmc_resizeimg.h" - -#include "fhog.hpp" - - -#ifdef HAVE_TBB -#include -#include "tbb/parallel_for.h" -#include "tbb/blocked_range.h" -#endif - -#ifndef max -#define max(a,b) (((a) > (b)) ? (a) : (b)) -#endif - -#ifndef min -#define min(a,b) (((a) < (b)) ? (a) : (b)) -#endif - - -/* -// Getting feature map for the selected subimage -// -// API -// int getFeatureMaps(const cv::Mat& image, const int k, featureMap **map); -// INPUT -// image - selected subimage -// k - size of cells -// OUTPUT -// map - feature map -// RESULT -// Error status -*/ -int getFeatureMaps(const cv::Mat& image, const int k, CvLSVMFeatureMapCaskade **map) -{ - float kernel[3] = {-1.f, 0.f, 1.f}; - cv::Mat kernel_dx(1, 3, CV_32F, kernel); - cv::Mat kernel_dy(3, 1, CV_32F, kernel); - - float boundary_x[NUM_SECTOR + 1]; - float boundary_y[NUM_SECTOR + 1]; - - int height = image.rows; - int width = image.cols; - - int numChannels = image.channels(); - - int sizeX = width / k; - int sizeY = height / k; - int px = 3 * NUM_SECTOR; - int p = px; - int stringSize = sizeX * p; - allocFeatureMapObject(map, sizeX, sizeY, p); - - cv::Mat dx; - cv::Mat dy; - cv::filter2D(image, dx, CV_32FC3, kernel_dx, cv::Point(-1, 0)); - cv::filter2D(image, dy, CV_32FC3, kernel_dy, cv::Point(0, -1)); - - for (int i = 0; i <= NUM_SECTOR; i++) - { - float arg_vector = ( (float) i ) * ( (float)(PI) / (float)(NUM_SECTOR) ); - boundary_x[i] = cosf(arg_vector); - boundary_y[i] = sinf(arg_vector); - }/*for(i = 0; i <= NUM_SECTOR; i++) */ - - float* r = (float *)malloc( sizeof(float) * (width * height)); - int* alfa = (int *)malloc( sizeof(int ) * (width * height * 2)); - - for (int j = 1; j < height - 1; j++) - { - const float* datadx = dx.ptr(j); - const float* datady = dy.ptr(j); - for (int i = 1; i < width - 1; i++) - { - int c = 0; - float x = (datadx[i * numChannels + c]); - float y = (datady[i * numChannels + c]); - - r[j * width + i] =sqrtf(x * x + y * y); - for(int ch = 1; ch < numChannels; ch++) - { - float tx = (datadx[i * numChannels + ch]); - float ty = (datady[i * numChannels + ch]); - float magnitude = sqrtf(tx * tx + ty * ty); - if(magnitude > r[j * width + i]) - { - r[j * width + i] = magnitude; - c = ch; - x = tx; - y = ty; - } - }/*for(ch = 1; ch < numChannels; ch++)*/ - - float max = boundary_x[0] * x + boundary_y[0] * y; - int maxi = 0; - for (int kk = 0; kk < NUM_SECTOR; kk++) - { - float dotProd = boundary_x[kk] * x + boundary_y[kk] * y; - if (dotProd > max) - { - max = dotProd; - maxi = kk; - } - else - { - if (-dotProd > max) - { - max = -dotProd; - maxi = kk + NUM_SECTOR; - } - } - } - alfa[j * width * 2 + i * 2 ] = maxi % NUM_SECTOR; - alfa[j * width * 2 + i * 2 + 1] = maxi; - }/*for(i = 0; i < width; i++)*/ - }/*for(j = 0; j < height; j++)*/ - - int* nearest = (int *)malloc(sizeof(int ) * k); - float* w = (float*)malloc(sizeof(float) * (k * 2)); - - for (int i = 0; i < k / 2; i++) - { - nearest[i] = -1; - }/*for(i = 0; i < k / 2; i++)*/ - for (int i = k / 2; i < k; i++) - { - nearest[i] = 1; - }/*for(i = k / 2; i < k; i++)*/ - - for (int j = 0; j < k / 2; j++) - { - float b_x = k / 2 + j + 0.5f; - float a_x = k / 2 - j - 0.5f; - w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); - w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x)); - }/*for(j = 0; j < k / 2; j++)*/ - for (int j = k / 2; j < k; j++) - { - float a_x = j - k / 2 + 0.5f; - float b_x =-j + k / 2 - 0.5f + k; - w[j * 2 ] = 1.0f/a_x * ((a_x * b_x) / ( a_x + b_x)); - w[j * 2 + 1] = 1.0f/b_x * ((a_x * b_x) / ( a_x + b_x)); - }/*for(j = k / 2; j < k; j++)*/ - - for (int i = 0; i < sizeY; i++) - { - for (int j = 0; j < sizeX; j++) - { - for (int ii = 0; ii < k; ii++) - { - for (int jj = 0; jj < k; jj++) - { - if ((i * k + ii > 0) && - (i * k + ii < height - 1) && - (j * k + jj > 0) && - (j * k + jj < width - 1)) - { - int d = (k * i + ii) * width + (j * k + jj); - (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 ]] += - r[d] * w[ii * 2] * w[jj * 2]; - (*map)->map[ i * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += - r[d] * w[ii * 2] * w[jj * 2]; - if ((i + nearest[ii] >= 0) && - (i + nearest[ii] <= sizeY - 1)) - { - (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 ] ] += - r[d] * w[ii * 2 + 1] * w[jj * 2 ]; - (*map)->map[(i + nearest[ii]) * stringSize + j * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += - r[d] * w[ii * 2 + 1] * w[jj * 2 ]; - } - if ((j + nearest[jj] >= 0) && - (j + nearest[jj] <= sizeX - 1)) - { - (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] += - r[d] * w[ii * 2] * w[jj * 2 + 1]; - (*map)->map[i * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += - r[d] * w[ii * 2] * w[jj * 2 + 1]; - } - if ((i + nearest[ii] >= 0) && - (i + nearest[ii] <= sizeY - 1) && - (j + nearest[jj] >= 0) && - (j + nearest[jj] <= sizeX - 1)) - { - (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 ] ] += - r[d] * w[ii * 2 + 1] * w[jj * 2 + 1]; - (*map)->map[(i + nearest[ii]) * stringSize + (j + nearest[jj]) * (*map)->numFeatures + alfa[d * 2 + 1] + NUM_SECTOR] += - r[d] * w[ii * 2 + 1] * w[jj * 2 + 1]; - } - } - }/*for(jj = 0; jj < k; jj++)*/ - }/*for(ii = 0; ii < k; ii++)*/ - }/*for(j = 1; j < sizeX - 1; j++)*/ - }/*for(i = 1; i < sizeY - 1; i++)*/ - - free(w); - free(nearest); - - free(r); - free(alfa); - - return LATENT_SVM_OK; -} - -/* -// Feature map Normalization and Truncation -// -// API -// int normalizeAndTruncate(featureMap *map, const float alfa); -// INPUT -// map - feature map -// alfa - truncation threshold -// OUTPUT -// map - truncated and normalized feature map -// RESULT -// Error status -*/ -int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa) -{ - int i,j, ii; - int sizeX, sizeY, p, pos, pp, xp, pos1, pos2; - float * partOfNorm; // norm of C(i, j) - float * newData; - float valOfNorm; - - sizeX = map->sizeX; - sizeY = map->sizeY; - partOfNorm = (float *)malloc (sizeof(float) * (sizeX * sizeY)); - - p = NUM_SECTOR; - xp = NUM_SECTOR * 3; - pp = NUM_SECTOR * 12; - - for(i = 0; i < sizeX * sizeY; i++) - { - valOfNorm = 0.0f; - pos = i * map->numFeatures; - for(j = 0; j < p; j++) - { - valOfNorm += map->map[pos + j] * map->map[pos + j]; - }/*for(j = 0; j < p; j++)*/ - partOfNorm[i] = valOfNorm; - }/*for(i = 0; i < sizeX * sizeY; i++)*/ - - sizeX -= 2; - sizeY -= 2; - - newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp)); -//normalization - for(i = 1; i <= sizeY; i++) - { - for(j = 1; j <= sizeX; j++) - { - valOfNorm = sqrtf( - partOfNorm[(i )*(sizeX + 2) + (j )] + - partOfNorm[(i )*(sizeX + 2) + (j + 1)] + - partOfNorm[(i + 1)*(sizeX + 2) + (j )] + - partOfNorm[(i + 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON; - pos1 = (i ) * (sizeX + 2) * xp + (j ) * xp; - pos2 = (i-1) * (sizeX ) * pp + (j-1) * pp; - for(ii = 0; ii < p; ii++) - { - newData[pos2 + ii ] = map->map[pos1 + ii ] / valOfNorm; - }/*for(ii = 0; ii < p; ii++)*/ - for(ii = 0; ii < 2 * p; ii++) - { - newData[pos2 + ii + p * 4] = map->map[pos1 + ii + p] / valOfNorm; - }/*for(ii = 0; ii < 2 * p; ii++)*/ - valOfNorm = sqrtf( - partOfNorm[(i )*(sizeX + 2) + (j )] + - partOfNorm[(i )*(sizeX + 2) + (j + 1)] + - partOfNorm[(i - 1)*(sizeX + 2) + (j )] + - partOfNorm[(i - 1)*(sizeX + 2) + (j + 1)]) + FLT_EPSILON; - for(ii = 0; ii < p; ii++) - { - newData[pos2 + ii + p ] = map->map[pos1 + ii ] / valOfNorm; - }/*for(ii = 0; ii < p; ii++)*/ - for(ii = 0; ii < 2 * p; ii++) - { - newData[pos2 + ii + p * 6] = map->map[pos1 + ii + p] / valOfNorm; - }/*for(ii = 0; ii < 2 * p; ii++)*/ - valOfNorm = sqrtf( - partOfNorm[(i )*(sizeX + 2) + (j )] + - partOfNorm[(i )*(sizeX + 2) + (j - 1)] + - partOfNorm[(i + 1)*(sizeX + 2) + (j )] + - partOfNorm[(i + 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON; - for(ii = 0; ii < p; ii++) - { - newData[pos2 + ii + p * 2] = map->map[pos1 + ii ] / valOfNorm; - }/*for(ii = 0; ii < p; ii++)*/ - for(ii = 0; ii < 2 * p; ii++) - { - newData[pos2 + ii + p * 8] = map->map[pos1 + ii + p] / valOfNorm; - }/*for(ii = 0; ii < 2 * p; ii++)*/ - valOfNorm = sqrtf( - partOfNorm[(i )*(sizeX + 2) + (j )] + - partOfNorm[(i )*(sizeX + 2) + (j - 1)] + - partOfNorm[(i - 1)*(sizeX + 2) + (j )] + - partOfNorm[(i - 1)*(sizeX + 2) + (j - 1)]) + FLT_EPSILON; - for(ii = 0; ii < p; ii++) - { - newData[pos2 + ii + p * 3 ] = map->map[pos1 + ii ] / valOfNorm; - }/*for(ii = 0; ii < p; ii++)*/ - for(ii = 0; ii < 2 * p; ii++) - { - newData[pos2 + ii + p * 10] = map->map[pos1 + ii + p] / valOfNorm; - }/*for(ii = 0; ii < 2 * p; ii++)*/ - }/*for(j = 1; j <= sizeX; j++)*/ - }/*for(i = 1; i <= sizeY; i++)*/ -//truncation - for(i = 0; i < sizeX * sizeY * pp; i++) - { - if(newData [i] > alfa) newData [i] = alfa; - }/*for(i = 0; i < sizeX * sizeY * pp; i++)*/ -//swop data - - map->numFeatures = pp; - map->sizeX = sizeX; - map->sizeY = sizeY; - - free (map->map); - free (partOfNorm); - - map->map = newData; - - return LATENT_SVM_OK; -} -/* -// Feature map reduction -// In each cell we reduce dimension of the feature vector -// according to original paper special procedure -// -// API -// int PCAFeatureMaps(featureMap *map) -// INPUT -// map - feature map -// OUTPUT -// map - feature map -// RESULT -// Error status -*/ -int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map) -{ - int i,j, ii, jj, k; - int sizeX, sizeY, p, pp, xp, yp, pos1, pos2; - float * newData; - float val; - float nx, ny; - - sizeX = map->sizeX; - sizeY = map->sizeY; - p = map->numFeatures; - pp = NUM_SECTOR * 3 + 4; - yp = 4; - xp = NUM_SECTOR; - - nx = 1.0f / sqrtf((float)(xp * 2)); - ny = 1.0f / sqrtf((float)(yp )); - - newData = (float *)malloc (sizeof(float) * (sizeX * sizeY * pp)); - - for(i = 0; i < sizeY; i++) - { - for(j = 0; j < sizeX; j++) - { - pos1 = ((i)*sizeX + j)*p; - pos2 = ((i)*sizeX + j)*pp; - k = 0; - for(jj = 0; jj < xp * 2; jj++) - { - val = 0; - for(ii = 0; ii < yp; ii++) - { - val += map->map[pos1 + yp * xp + ii * xp * 2 + jj]; - }/*for(ii = 0; ii < yp; ii++)*/ - newData[pos2 + k] = val * ny; - k++; - }/*for(jj = 0; jj < xp * 2; jj++)*/ - for(jj = 0; jj < xp; jj++) - { - val = 0; - for(ii = 0; ii < yp; ii++) - { - val += map->map[pos1 + ii * xp + jj]; - }/*for(ii = 0; ii < yp; ii++)*/ - newData[pos2 + k] = val * ny; - k++; - }/*for(jj = 0; jj < xp; jj++)*/ - for(ii = 0; ii < yp; ii++) - { - val = 0; - for(jj = 0; jj < 2 * xp; jj++) - { - val += map->map[pos1 + yp * xp + ii * xp * 2 + jj]; - }/*for(jj = 0; jj < xp; jj++)*/ - newData[pos2 + k] = val * nx; - k++; - } /*for(ii = 0; ii < yp; ii++)*/ - }/*for(j = 0; j < sizeX; j++)*/ - }/*for(i = 0; i < sizeY; i++)*/ -//swop data - - map->numFeatures = pp; - - free (map->map); - - map->map = newData; - - return LATENT_SVM_OK; -} - - -//modified from "lsvmc_routine.cpp" - -int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, - const int sizeY, const int numFeatures) -{ - int i; - (*obj) = (CvLSVMFeatureMapCaskade *)malloc(sizeof(CvLSVMFeatureMapCaskade)); - (*obj)->sizeX = sizeX; - (*obj)->sizeY = sizeY; - (*obj)->numFeatures = numFeatures; - (*obj)->map = (float *) malloc(sizeof (float) * - (sizeX * sizeY * numFeatures)); - for(i = 0; i < sizeX * sizeY * numFeatures; i++) - { - (*obj)->map[i] = 0.0f; - } - return LATENT_SVM_OK; -} - -int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj) -{ - if(*obj == NULL) return LATENT_SVM_MEM_NULL; - free((*obj)->map); - free(*obj); - (*obj) = NULL; - return LATENT_SVM_OK; -} diff --git a/src/Tracker/ldes/fhog.hpp b/src/Tracker/ldes/fhog.hpp deleted file mode 100644 index 223f93e7e..000000000 --- a/src/Tracker/ldes/fhog.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2013, University of Nizhny Novgorod, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - - -//Modified from latentsvm module's "_lsvmc_latentsvm.h". - - -/*****************************************************************************/ -/* Latent SVM prediction API */ -/*****************************************************************************/ - -#ifndef _FHOG_H_ -#define _FHOG_H_ - -#include -//#include "_lsvmc_types.h" -//#include "_lsvmc_error.h" -//#include "_lsvmc_routine.h" - -#include - - -//modified from "_lsvmc_types.h" - -// DataType: STRUCT featureMap -// FEATURE MAP DESCRIPTION -// Rectangular map (sizeX x sizeY), -// every cell stores feature vector (dimension = numFeatures) -// map - matrix of feature vectors -// to set and get feature vectors (i,j) -// used formula map[(j * sizeX + i) * p + k], where -// k - component of feature vector in cell (i, j) -typedef struct{ - int sizeX; - int sizeY; - int numFeatures; - float *map; -} CvLSVMFeatureMapCaskade; - - -#include "float.h" - -#define PI CV_PI - -#define EPS 0.000001 - -#define F_MAX FLT_MAX -#define F_MIN -FLT_MAX - -// The number of elements in bin -// The number of sectors in gradient histogram building -#define NUM_SECTOR 9 - -// The number of levels in image resize procedure -// We need Lambda levels to resize image twice -#define LAMBDA 10 - -// Block size. Used in feature pyramid building procedure -#define SIDE_LENGTH 8 - -#define VAL_OF_TRUNCATE 0.2f - - -//modified from "_lsvm_error.h" -#define LATENT_SVM_OK 0 -#define LATENT_SVM_MEM_NULL 2 -#define DISTANCE_TRANSFORM_OK 1 -#define DISTANCE_TRANSFORM_GET_INTERSECTION_ERROR -1 -#define DISTANCE_TRANSFORM_ERROR -2 -#define DISTANCE_TRANSFORM_EQUAL_POINTS -3 -#define LATENT_SVM_GET_FEATURE_PYRAMID_FAILED -4 -#define LATENT_SVM_SEARCH_OBJECT_FAILED -5 -#define LATENT_SVM_FAILED_SUPERPOSITION -6 -#define FILTER_OUT_OF_BOUNDARIES -7 -#define LATENT_SVM_TBB_SCHEDULE_CREATION_FAILED -8 -#define LATENT_SVM_TBB_NUMTHREADS_NOT_CORRECT -9 -#define FFT_OK 2 -#define FFT_ERROR -10 -#define LSVM_PARSER_FILE_NOT_FOUND -11 - - - -/* -// Getting feature map for the selected subimage -// -// API -// int getFeatureMaps(const cv::Mat& image, const int k, featureMap **map); -// INPUT -// image - selected subimage -// k - size of cells -// OUTPUT -// map - feature map -// RESULT -// Error status -*/ -int getFeatureMaps(const cv::Mat& image, const int k, CvLSVMFeatureMapCaskade **map); - - -/* -// Feature map Normalization and Truncation -// -// API -// int normalizationAndTruncationFeatureMaps(featureMap *map, const float alfa); -// INPUT -// map - feature map -// alfa - truncation threshold -// OUTPUT -// map - truncated and normalized feature map -// RESULT -// Error status -*/ -int normalizeAndTruncate(CvLSVMFeatureMapCaskade *map, const float alfa); - -/* -// Feature map reduction -// In each cell we reduce dimension of the feature vector -// according to original paper special procedure -// -// API -// int PCAFeatureMaps(featureMap *map) -// INPUT -// map - feature map -// OUTPUT -// map - feature map -// RESULT -// Error status -*/ -int PCAFeatureMaps(CvLSVMFeatureMapCaskade *map); - - -//modified from "lsvmc_routine.h" - -int allocFeatureMapObject(CvLSVMFeatureMapCaskade **obj, const int sizeX, const int sizeY, - const int p); - -int freeFeatureMapObject (CvLSVMFeatureMapCaskade **obj); - - -#endif diff --git a/src/Tracker/ldes/hann.cpp b/src/Tracker/ldes/hann.cpp deleted file mode 100644 index 43561d4e2..000000000 --- a/src/Tracker/ldes/hann.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "hann.h" - -cv::Mat hann1D(int len) { - cv::Mat hann1t = cv::Mat::zeros(1, len, CV_32F); - float* ptr = (float*)hann1t.data; - for (int i = 0; i < len; i++) - ptr[i] = static_cast(0.5 * (1 - std::cos(CV_2PI * i / (len - 1)))); - - return hann1t; -} -cv::Mat hann2D(const cv::Size& sz) { - int w = sz.width, h = sz.height; - cv::Mat hann_w = hann1D(w); - cv::Mat hann_h = hann1D(h); - - cv::transpose(hann_h, hann_h); - cv::Mat hann_hw = hann_h * hann_w; - - return hann_hw.reshape(1, 1); -} - -cv::Mat hann3D(const cv::Size& sz, int chns) { - int col = sz.width*sz.height; - cv::Mat hanns(chns, col, CV_32F); - cv::Mat hann_hw = hann2D(sz); - - for (int i = 0; i < chns; ++i) { - hann_hw.copyTo(hanns(cv::Rect(0, i, col, 1))); - } - return hanns; -} \ No newline at end of file diff --git a/src/Tracker/ldes/hann.h b/src/Tracker/ldes/hann.h deleted file mode 100644 index 46ab55e6e..000000000 --- a/src/Tracker/ldes/hann.h +++ /dev/null @@ -1,6 +0,0 @@ -#pragma once -#include - -cv::Mat hann1D(int len); -cv::Mat hann2D(const cv::Size& sz); -cv::Mat hann3D(const cv::Size& sz, int chns); \ No newline at end of file diff --git a/src/Tracker/ldes/ldes_tracker.cpp b/src/Tracker/ldes/ldes_tracker.cpp deleted file mode 100644 index 7d5caaa85..000000000 --- a/src/Tracker/ldes/ldes_tracker.cpp +++ /dev/null @@ -1,552 +0,0 @@ -#include "ldes_tracker.h" - -/// -LDESTracker::LDESTracker() -{ - lambda = 0.0001; - padding = 2.5; - scale_padding = 2.5; - output_sigma_factor = 0.125; - _hogfeatures = true; - _rotation = true; - _scale_hog = true; - interp_factor = 0.012; - sigma = 0.6; - cell_size = 4; - template_size = 96; - scale_template_size = 120; -} - -/// -LDESTracker::~LDESTracker() -{ -} - -/// -void LDESTracker::Initialize(const cv::Mat &im, cv::Rect region) -{ - cell_size = 4; - cell_size_scale = _scale_hog ? 4 : 1; - target_sz = region.size(); - cur_rot_degree = 0.; - template_size = 96; - scale_sz0 = 120; - - assert(region.width >= 0 && region.height >= 0); - - cur_pos.x = region.x + region.width / 2; - cur_pos.y = region.y + region.height / 2; - cur_roi = region; - cur_scale = 1.0; - - //for cropping, then resize to window_sz0 - //int padded_sz = static_cast(sqrt(target_sz.area())*padding); - float padded_sz = MAX(target_sz.width, target_sz.height)*padding; - _scale = padded_sz / template_size; - window_sz0 = cvRound(padded_sz / _scale); - window_sz0 = window_sz0 / (2 * cell_size)*(2 * cell_size) + 2 * cell_size; - - scale_sz0 = scale_template_size / (2 * cell_size_scale)*(2 * cell_size_scale) + 2 * cell_size_scale; - _scale2 = padded_sz / scale_sz0; - - mag = 30; - train_interp_factor = 0.012; - interp_factor_scale = 0.015; - - getTemplates(im); -} - -/// -void LDESTracker::getSubWindow(const cv::Mat& image, const char* type) -{ - if (strcmp(type, "loc") == 0) { - if (_rotation) { - patch = cropImageAffine(image, cur_pos, cvRound(window_sz0*_scale), cur_rot_degree); - } - else { - int win = (int)(window_sz0*_scale); - patch = cropImage(image, cur_pos, win); - } - //cv::imshow("patch", patch); - cv::resize(patch, patch, cv::Size(window_sz0, window_sz0), cv::INTER_LINEAR); - } - else if (strcmp(type, "scale") == 0) { - if (_rotation) { - patchL = cropImageAffine(image, cur_pos, cvRound(scale_sz0*_scale2), cur_rot_degree); - } - else { - patchL = cropImage(image, cur_pos, cvRound(scale_sz0*_scale2)); - } - //cv::imshow("rot_patch", patchL); - cv::resize(patchL, patchL, cv::Size(scale_sz0, scale_sz0), cv::INTER_LINEAR); - cv::logPolar(patchL, patchL, cv::Point2f(0.5f*patchL.cols, 0.5f*patchL.rows), mag, cv::INTER_LINEAR); - } - else - assert(0); -} - -/// -void LDESTracker::getTemplates(const cv::Mat& image) -{ - getSubWindow(image, "loc"); - getSubWindow(image, "scale"); - - cv::Mat empty_; - cv::Mat x = getFeatures(patch, hann, size_patch, true); - cv::Mat xl; - if (!_scale_hog) - xl = getPixFeatures(patchL, size_scale); - else - xl = getFeatures(patchL, empty_, size_scale); - - createGaussianPeak(size_patch[0], size_patch[1]); - - _alphaf = cv::Mat(size_patch[0], size_patch[1], CV_32FC2, float(0)); - _z = cv::Mat(size_patch[2], size_patch[0] * size_patch[1], CV_32F, float(0)); - modelPatch=cv::Mat(size_scale[2], size_scale[0]*size_scale[1], CV_32F, float(0)); - - trainLocation(x, 1.0); - trainScale(xl, 1.0); -} - -/// -void LDESTracker::trainLocation(cv::Mat& x, float train_interp_factor_) -{ - cv::Mat k = gaussianCorrelation(x, x, size_patch[0], size_patch[1], size_patch[2], sigma); - cv::Mat alphaf = complexDivision(_yf, (k + lambda)); - - _z = (1 - train_interp_factor_) * _z + (train_interp_factor_)* x; - _alphaf = (1 - train_interp_factor_) * _alphaf + (train_interp_factor_)* alphaf; -} - -/// -void LDESTracker::trainScale(cv::Mat& x, float interp_factor_) -{ - modelPatch = (1 - interp_factor_)*modelPatch + interp_factor_ * x; -} - -/// -cv::Mat LDESTracker::padImage(const cv::Mat& image, int& x1, int& y1, int& x2, int& y2) -{ - cv::Mat padded; - - int im_h = image.rows, im_w = image.cols; - int left, top, right, bottom; - - left = MAX(0, -x1); - right = MAX(0, x2 - (im_w - 1)); - top = MAX(0, -y1); - bottom = MAX(0, y2 - (im_h - 1)); - - x1 = left; - x2 = right; - y1 = top; - y2 = bottom; - - cv::copyMakeBorder(image, padded, top, bottom, left, right, cv::BORDER_REPLICATE); - - return padded; -} - -/// -cv::Mat LDESTracker::cropImage(const cv::Mat& image, const cv::Point2i& pos, int sz) -{ - int x1 = pos.x - sz / 2; - int y1 = pos.y - sz / 2; - int x2 = x1 + sz - 1; - int y2 = y1 + sz - 1; - if (x1 < 0 && x2 < 0) { - x2 -= x1; - x1 = 0; - } - if (y1 < 0 && y2 < 0) { - y2 -= y1; - } - - int tx1 = MAX(0, x1), ty1 = MAX(0, y1), tx2 = MIN(x2, image.cols - 1), ty2 = MIN(y2, image.rows - 1); - x1 -= tx1; - x2 -= tx1; - y1 -= ty1; - y2 -= ty1; - cv::Rect rec(tx1, ty1, tx2 - tx1 + 1, ty2 - ty1 + 1); - cv::Mat patchl; - image(rec).copyTo(patchl); - patchl = padImage(patchl, x1, y1, x2, y2); - return patchl; -} - -/// -cv::Mat LDESTracker::cropImageAffine(const cv::Mat& image, const cv::Point2i& pos, int win_sz, float rot) -{ - //cv::Mat rot_matrix = cv::getRotationMatrix2D(pos, -rot, scale); - cv::Mat rot_matrix = cv::getRotationMatrix2D(pos, -rot, 1); - rot_matrix.convertTo(rot_matrix, CV_32F); - cv::transpose(rot_matrix, rot_matrix); - - float corners_ptr[12] = { - pos.x - win_sz * 0.5f, pos.y - win_sz * 0.5f, 1.0f,\ - pos.x - win_sz * 0.5f, pos.y + win_sz * 0.5f, 1.0f,\ - pos.x + win_sz * 0.5f, pos.y + win_sz * 0.5f, 1.0f,\ - pos.x + win_sz * 0.5f, pos.y - win_sz * 0.5f, 1.0f - }; - cv::Mat corners(4, 3, CV_32F, corners_ptr); - - cv::Mat wcorners = corners * rot_matrix; - - double x1_, y1_, x2_, y2_; - cv::minMaxLoc(wcorners.col(0).clone(), &x1_, &x2_, NULL, NULL); - cv::minMaxLoc(wcorners.col(1).clone(), &y1_, &y2_, NULL, NULL); - int x1 = cvRound(x1_), y1 = cvRound(y1_), x2 = cvRound(x2_), y2 = cvRound(y2_); - int tx1 = MAX(0, x1), tx2 = MIN(image.cols - 1, x2), ty1 = MAX(0, y1), ty2 = MIN(image.rows-1, y2); - int ix1 = x1-tx1, ix2 = x2-tx1, iy1 = y1-ty1, iy2 = y2-ty1; - - cv::Mat patchl; - cv::Rect rec(tx1, ty1, tx2 - tx1 + 1, ty2 - ty1 + 1); - - image(rec).copyTo(patchl); - cv::Mat padded = padImage(patchl, ix1, iy1, ix2, iy2); - - cv::Point2i p(pos.x - tx1 + ix1, pos.y - ty1 + iy1); - - rot_matrix = cv::getRotationMatrix2D(p, -rot, 1); - - rot_matrix.convertTo(rot_matrix, CV_32F); - rot_matrix.at(0, 2) += win_sz * 0.5f - p.x; - rot_matrix.at(1, 2) += win_sz * 0.5f - p.y; - - cv::warpAffine(padded, patchl, rot_matrix, cv::Size(win_sz, win_sz)); - return patchl; -} - -/// -void LDESTracker::estimateLocation(cv::Mat& z, cv::Mat x) -{ - cv::Mat kf = gaussianCorrelation(x, z, size_patch[0], size_patch[1], size_patch[2], sigma); - cv::Mat res = fftd(complexMultiplication(_alphaf, kf), true); - - res.copyTo(resmap_location); - - cv::Point2i pi; - double pv; - cv::minMaxLoc(res, NULL, &pv, NULL, &pi); - float peak_value = (float)pv; - //cscore=calcPSR(); - peak_loc = cv::Point2f((float)pi.x, (float)pi.y); - - if (pi.x > 0 && pi.x < res.cols - 1) - { - peak_loc.x += subPixelPeak(res.at(pi.y, pi.x - 1), peak_value, res.at(pi.y, pi.x + 1)); - } - if (pi.y > 0 && pi.y < res.rows - 1) - { - peak_loc.y += subPixelPeak(res.at(pi.y - 1, pi.x), peak_value, res.at(pi.y + 1, pi.x)); - } - - //weightedPeak(res, p, 2); - float px = peak_loc.x - res.cols / 2; - float py = peak_loc.y - res.rows / 2; - cur_pos.x = MIN(cvRound(cur_pos.x + px * cell_size*_scale), im_width - 1); - cur_pos.y = MIN(cvRound(cur_pos.y + py * cell_size*_scale), im_height - 1); -} - -/// -void LDESTracker::estimateScale(cv::Mat& z, cv::Mat& x) -{ - cv::Mat rf = phaseCorrelation(x, z, size_scale[0], size_scale[1], size_scale[2]); - cv::Mat res = fftd(rf, true); - rearrange(res); - - cv::Rect center(5, 5, size_scale[1] - 10, size_scale[0] - 10); - - res = res(center).clone(); - - cv::Point2i pi; - double pv_; - cv::minMaxLoc(res, NULL, &pv_, NULL, &pi); - float pv = static_cast(pv_); - - cv::Point2f pf(pi.x + 5.f, pi.y + 5.f); - //weightedPeak(res, pf, 1); - if (pi.x > 0 && pi.x < res.cols - 1) { - pf.x += subPixelPeak(res.at(pi.y, pi.x - 1), pv, res.at(pi.y, pi.x + 1)); - } - - if (pi.y > 0 && pi.y < res.rows - 1) { - pf.y += subPixelPeak(res.at(pi.y - 1, pi.x), pv, res.at(pi.y + 1, pi.x)); - } - - float px = pf.x, py = pf.y; - - px -= size_scale[1] * 0.5f; - py -= size_scale[0] * 0.5f; - //px *= cell_size_scale; - //py *= cell_size_scale; - - float rot = -(py) * 180.0f / (size_scale[0] * 0.5f); - float scale = exp((px) / mag); - - sscore = static_cast(pv); - - delta_rot = rot; - delta_scale = scale; - if (abs(delta_rot) > 5) - delta_rot = 0; - delta_scale = MIN(MAX(delta_scale, 0.6f), 1.4f); -} - -/* -*Update BGD(Block Gradient Descend, original AAAI Paper MATLAB Code) -*If BGD, more precise but slower -*/ -cv::RotatedRect LDESTracker::Update(const cv::Mat &im, float& confidence) -{ - float tmp_scale = 1.0, tmp_scale2 = 1.0; - float mscore = 0.0; - - updateModel(im, 0); - tmp_scale = _scale; - tmp_scale2 = _scale2; - mscore = calcPSR(); - for (int i = 1; i <= 5; ++i) { //BGD iterations, <=5, you can have a test - if (floor(tmp_scale*window_sz0) < 5) - tmp_scale = 1.0; - if (floor(tmp_scale2*scale_sz0) < 5) - tmp_scale2 = 1.0; - _scale = tmp_scale; - _scale2 = tmp_scale2; - updateModel(im, i); - float psr = calcPSR(); - - if (psr > mscore) { - mscore = psr; - tmp_scale = _scale; - tmp_scale2 = _scale2; - } - else { - _scale = tmp_scale; - _scale2 = tmp_scale2; - break; - } - } - conf = mscore; - confidence = conf; - return cv::RotatedRect(cv::Point2f(cur_roi.x + 0.5f * cur_roi.width, cur_roi.y + 0.5f * cur_roi.height), - cv::Size2f(cur_roi.width, cur_roi.height), cur_rot_degree); -} - -/// -void LDESTracker::updateModel(const cv::Mat& image, int /*polish*/) -{ - cv::Mat _han, empty_; - im_height = image.rows; - im_width = image.cols; - //if(polish>=0){ - getSubWindow(image, "loc"); - - cv::Mat x = getFeatures(patch, hann, size_patch, false); - estimateLocation(_z, x); - - getSubWindow(image, "scale"); - cv::Mat xl; - if(!_scale_hog) - xl= getPixFeatures(patchL, size_scale); - else - xl = getFeatures(patchL, empty_, size_scale); - - estimateScale(modelPatch, xl); - - if (_rotation) { - cur_rot_degree += delta_rot; - } - cur_scale *= delta_scale; - _scale *= delta_scale; - _scale2 *= delta_scale; - //cout << "Cur scale: " << cur_scale << " cur rotation: " << cur_rot_degree << endl; - cur_roi.width = cvRound(target_sz.width*cur_scale); - cur_roi.height = cvRound(target_sz.height*cur_scale); - //cur_roi.width = round(cur_roi.width*delta_scale); - //cur_roi.height = round(cur_roi.height*delta_scale); - cur_roi.x = cvRound(cur_pos.x - cur_roi.width / 2); - cur_roi.y = cvRound(cur_pos.y - cur_roi.height / 2); - - getSubWindow(image, "loc"); - getSubWindow(image, "scale"); - - x = getFeatures(patch, hann, size_patch, false); - - if (!_scale_hog) - xl = getPixFeatures(patchL, size_scale); - else - xl = getFeatures(patchL, empty_, size_scale); - - trainLocation(x, train_interp_factor); - trainScale(xl, interp_factor_scale); - //} -} - -/// -void LDESTracker::createGaussianPeak(int sizey, int sizex) -{ - cv::Mat_ res(sizey, sizex); - - int syh = (sizey) / 2; - int sxh = (sizex) / 2; - - //float output_sigma = std::sqrt((float)sizex * sizey) / cell_size * output_sigma_factor; - float output_sigma = std::sqrt((float)sizex * sizey) / padding * output_sigma_factor; - float mult = -0.5f / (output_sigma * output_sigma); - - for (int i = 0; i < sizey; i++) - for (int j = 0; j < sizex; j++) - { - int ih = i - syh; - int jh = j - sxh; - res(i, j) = std::exp(mult * (float)(ih * ih + jh * jh)); - } - - res.copyTo(_y); - _yf = fftd(_y); -} - -/// -cv::Mat LDESTracker::getFeatures(const cv::Mat & patchl, cv::Mat& han, int* sizes, bool inithann) -{ - cv::Mat FeaturesMap; - // HOG features - CvLSVMFeatureMapCaskade *map; - getFeatureMaps(patchl, cell_size, &map); - normalizeAndTruncate(map, 0.2f); - PCAFeatureMaps(map); - sizes[0] = map->sizeY; - sizes[1] = map->sizeX; - sizes[2] = map->numFeatures; - - FeaturesMap = cv::Mat(cv::Size(map->numFeatures, map->sizeX*map->sizeY), CV_32F, map->map); // Procedure do deal with cv::Mat multichannel bug - FeaturesMap = FeaturesMap.t(); - freeFeatureMapObject(&map); - - if (inithann) { - cv::Size hannSize(sizes[1], sizes[0]); - cv::Mat hannsMat = hann3D(hannSize, sizes[2]); - hannsMat.copyTo(han); - FeaturesMap = han.mul(FeaturesMap); - } - else if (!han.empty()) - FeaturesMap = han.mul(FeaturesMap); - return FeaturesMap; -} - -/// -cv::Mat LDESTracker::getPixFeatures(const cv::Mat& patchl, int* size) -{ - int h = patchl.rows, w = patchl.cols; - cv::Mat features(patchl.channels(), w*h, CV_32F); - std::vector planes(3); - cv::split(patchl, planes); - planes[0].reshape(1, 1).copyTo(features.row(0)); - planes[1].reshape(1, 1).copyTo(features.row(1)); - planes[2].reshape(1, 1).copyTo(features.row(2)); - size[0] = h; - size[1] = w; - size[2] = patchl.channels(); - return features; -} - -/// -float LDESTracker::subPixelPeak(float left, float center, float right) -{ - float divisor = 2 * center - right - left; - if (divisor == 0) - return 0; - return 0.5f * (right - left) / divisor; -} - -/// -void LDESTracker::weightedPeak(cv::Mat& resmap, cv::Point2f& peak, int pad) -{ - cv::copyMakeBorder(resmap, resmap, pad, pad, pad, pad, cv::BORDER_REFLECT); - cv::Rect slobe(cvRound(peak.x), cvRound(peak.y), 2*pad+1, 2*pad+1); - cv::Mat patchl; - resmap(slobe).copyTo(patchl); - - int sz = 2 * pad + 1; - int N = sz * sz; - - std::vector xoffset, yoffset; - for (int i = 0; i < N; ++i) { - xoffset.push_back((i%sz) - pad); - yoffset.push_back(i / sz - pad); - } - float* data = (float*)patchl.data; - float xsum = 0, ysum = 0, sum = 0; - for (int i = 0; i < N; ++i) { - sum += data[i]; - xsum += data[i] * (peak.x + xoffset[i]); - ysum += data[i] * (peak.y + yoffset[i]); - } - peak.x = xsum / sum; - peak.y = ysum / sum; -} - -/// -float LDESTracker::calcPSR() -{ - int px = cvRound(peak_loc.x); - int py = cvRound(peak_loc.y); - - cv::Mat res = resmap_location.clone(); - float peak = pv_location; - - const float rate = 0.6f / (1 + padding); - int range = (int)(sqrt(res.cols*res.rows)*rate); - - cv::Rect peak_rect = cv::Rect(px - range / 2, py - range / 2, range, range); - - cv::Mat peakBuff = cv::Mat::zeros(range, range, CV_32FC1); - peakBuff.copyTo(res(peak_rect)); - - float numel = static_cast(res.cols*res.rows); - float mu = static_cast(cv::sum(res)[0]);// / (); - mu /= numel; - cv::Mat subs; - cv::subtract(res, mu, subs); - cv::multiply(subs, subs, subs); - float var = static_cast(cv::sum(subs)[0]); - var /= (numel - 1); //sample variance - float stdev = sqrt(var); - - float psr = (peak - mu) / stdev; - - cscore = psr; - psr = 0.1f*cscore + 0.9f*sscore; - - return psr; -} - -/// -cv::Rect LDESTracker::testKCFTracker(const cv::Mat& image, cv::Rect& rect, bool init) -{ - im_width = image.cols; - im_height = image.rows; - if (init) { - _rotation = false; - this->Initialize(image, rect); - return rect; - } - else { - getSubWindow(image, "loc"); - cv::Rect rec(cur_pos.x - window_sz / 2, cur_pos.y - window_sz / 2, window_sz, window_sz); - - cv::Mat x = getFeatures(patch, hann, size_patch, false); - estimateLocation(_z, x); - x = getFeatures(patch, hann, size_patch, false); - - cur_roi.width = cvRound(target_sz.width*cur_scale); - cur_roi.height = cvRound(target_sz.height*cur_scale); - cur_roi.x = cvRound(cur_pos.x - cur_roi.width / 2); - cur_roi.y = cvRound(cur_pos.y - cur_roi.height / 2); - - trainLocation(x, train_interp_factor); - return cur_roi; - } -} diff --git a/src/Tracker/ldes/ldes_tracker.h b/src/Tracker/ldes/ldes_tracker.h deleted file mode 100644 index ad0c8114b..000000000 --- a/src/Tracker/ldes/ldes_tracker.h +++ /dev/null @@ -1,131 +0,0 @@ -#pragma once -#include -#include "fft_functions.h" -#include "correlation.h" -#include "fhog.hpp" -#include "hann.h" - -#include "../VOTTracker.hpp" - -class LDESTracker : public VOTTracker -{ -public: - LDESTracker(); - ~LDESTracker(); - - void Initialize(const cv::Mat &im, cv::Rect region); - cv::RotatedRect Update(const cv::Mat &im, float& confidence); - void Train(const cv::Mat &/*im*/, bool /*first*/) - { - } - -protected: - float interp_n; - float interp_factor; // linear interpolation factor for adaptation - float sigma; // gaussian kernel bandwidth - float lambda; // regularization - int cell_size; // HOG cell size - int cell_sizeQ; // cell size^2, to avoid repeated operations - int cell_size_scale; - float padding; // extra area surrounding the target - float scale_padding; - float output_sigma_factor; // bandwidth of gaussian target - int template_size; // template size - int scale_template_size; - - float scale_step; // scale step for multi-scale estimation - float scale_weight; // to downweight detection scores of other scales for added stability - - float train_interp_factor; - float interp_factor_scale; - - float cscore; - float sscore; - - cv::Size target_sz; - cv::Size target_sz0; - int window_sz; - int window_sz0; - - int scale_sz; - int scale_sz0; - float scale_base; - - cv::Mat hann; - - cv::Mat patch; - cv::Mat patchL; - - cv::Point2i cur_pos; - cv::Rect cur_roi; - std::vector rotated_roi; - - int im_width; - int im_height; - - const float min_area = 100 * 100; - const float max_area = 350 * 350; - - cv::Rect cur_position; - - float cur_rot_degree; - float cur_scale; - float _scale; - float _scale2; - float delta_rot; - float delta_scale; - float mag; - float conf; - - cv::Mat resmap_location; - float pv_location; - cv::Point2f peak_loc; - - - cv::Rect testKCFTracker(const cv::Mat& image, cv::Rect& rect, bool init = false); - cv::Mat getFeatures(const cv::Mat & patch, cv::Mat& han, int* sizes, bool inithann = false); - cv::Mat getPixFeatures(const cv::Mat& patch, int* size); - float subPixelPeak(float left, float center, float right); - void weightedPeak(cv::Mat& resmap, cv::Point2f& peak, int pad=2); - float calcPSR(); - void updateModel(const cv::Mat& image, int polish); //MATLAB code - - void estimateLocation(cv::Mat& z, cv::Mat x); - void estimateScale(cv::Mat& z, cv::Mat& x); - - void trainLocation(cv::Mat& x, float train_interp_factor_); - void trainScale(cv::Mat& x, float train_interp_factor_); - - void createGaussianPeak(int sizey, int sizex); - - void getTemplates(const cv::Mat& image); - - void getSubWindow(const cv::Mat& image, const char* type="loc"); - - cv::Mat padImage(const cv::Mat& image, int& x1, int& y1, int& x2, int& y2); - cv::Mat cropImage(const cv::Mat& image, const cv::Point2i& pos, int sz); - cv::Mat cropImageAffine(const cv::Mat& image, const cv::Point2i& pos, int win_sz, float rot); - - - cv::Mat hogFeatures; - cv::Mat _alphaf; - cv::Mat _y; - cv::Mat _yf; //alphaf on f domain - cv::Mat _z; //template on time domain - cv::Mat _z_srch; - cv::Mat modelPatch; - cv::Mat _num; - cv::Mat _den; - cv::Mat _labCentroids; - cv::Rect _roi; - -private: - int size_patch[3]; - int size_scale[3]; - int size_search[3]; - int _gaussian_size; - bool _hogfeatures; - bool _labfeatures; - bool _rotation; - bool _scale_hog; -}; \ No newline at end of file diff --git a/src/Tracker/staple/fhog.cpp b/src/Tracker/staple/fhog.cpp deleted file mode 100644 index 8e43c868e..000000000 --- a/src/Tracker/staple/fhog.cpp +++ /dev/null @@ -1,651 +0,0 @@ -#include -#include - -#include "fhog.h" -#undef MIN - -// platform independent aligned memory allocation (see also alFree) -void* alMalloc( size_t size, int alignment ) { - const size_t pSize = sizeof(void*), a = alignment-1; - void *raw = wrMalloc(size + a + pSize); - void *aligned = (void*) (((size_t) raw + pSize + a) & ~a); - *(void**) ((size_t) aligned-pSize) = raw; - return aligned; -} - -// platform independent alignned memory de-allocation (see also alMalloc) -void alFree(void* aligned) { - void* raw = *(void**)((char*)aligned-sizeof(void*)); - wrFree(raw); -} - -/******************************************************************************* -* Piotr's Computer Vision Matlab Toolbox Version 3.30 -* Copyright 2014 Piotr Dollar & Ron Appel. [pdollar-at-gmail.com] -* Licensed under the Simplified BSD License [see external/bsd.txt] -*******************************************************************************/ -// #include "wrappers.hpp" - -#define PI 3.14159265f - -// compute x and y gradients for just one column (uses sse) -void grad1( float *I, float *Gx, float *Gy, int h, int w, int x ) { - int y, y1; float *Ip, *In, r; __m128 *_Ip, *_In, *_G, _r; - // compute column of Gx - Ip=I-h; In=I+h; r=.5f; - if(x==0) { r=1; Ip+=h; } else if(x==w-1) { r=1; In-=h; } - if( h<4 || h%4>0 || (size_t(I)&15) || (size_t(Gx)&15) ) { - for( y=0; yh-1) y1=h-1; - GRADY(1); Ip--; for(y=1; y PI-1e-6f ) a1[i]=PI-1e-6f; - init=true; return a1; -} - -// compute gradient magnitude and orientation at each location (uses sse) -void gradMag( float *I, float *M, float *O, int h, int w, int d, bool full ) { - int y; - __m128 *_Gx, *_Gy, *_M2, _m; - float *acost = acosTable(), acMult=10000.0f; - // allocate memory for storing one column of output (padded so h4%4==0) - int h4=(h%4==0) ? h : h-(h%4)+4; - int s = static_cast(d) * static_cast(h4) * sizeof(float); - float* M2=(float*) alMalloc(s,16); _M2=(__m128*) M2; - float* Gx=(float*) alMalloc(s,16); _Gx=(__m128*) Gx; - float* Gy=(float*) alMalloc(s,16); _Gy=(__m128*) Gy; - // compute gradient magnitude and orientation for each column - for(int x=0; x=oMax) o0=0; O0[i]=o0; - o1=o0+nb; if(o1==oMax) o1=0; O1[i]=o1; - m=M[i]*norm; M1[i]=od*m; M0[i]=m-M1[i]; - } else for(; i=oMax) o0=0; O0[i]=o0; - M0[i]=M[i]*norm; M1[i]=0; O1[i]=0; - } -} - -// compute nOrients gradient histograms per bin x bin block of pixels -void gradHist( float *M, float *O, float *H, int h, int w, - int bin, int nOrients, int softBin, bool full ) -{ - const int hb=h/bin, wb=w/bin, h0=hb*bin, w0=wb*bin, nb=wb*hb; - const float s=(float)bin, sInv=1/s, sInv2=1/s/s; - float *H0, *H1, *M0, *M1; int x, y; int *O0, *O1; float xb = 0, init = 0; - O0=(int*)alMalloc(h*sizeof(int),16); M0=(float*) alMalloc(h*sizeof(float),16); - O1=(int*)alMalloc(h*sizeof(int),16); M1=(float*) alMalloc(h*sizeof(float),16); - // main loop - for( x=0; x=0); - - if( softBin<0 && softBin%2==0 ) { - // no interpolation w.r.t. either orienation or spatial bin - H1=H+(x/bin)*hb; -#define GH H1[O0[y]]+=M0[y]; y++; - if( bin==1 ) for(y=0; y=0; - int xb0 = hasLf?(int)xb:-1; - bool hasRt = xb0 < wb-1; - xd=xb-xb0; - xb+=sInv; - yb=init; - y=0; - int yb0 = -1; - // macros for code conciseness -#define GHinit yd=yb-yb0; yb+=sInv; H0=H+xb0*hb+yb0; xyd=xd*yd; \ - ms[0]=1-xd-yd+xyd; ms[1]=yd-xyd; ms[2]=xd-xyd; ms[3]=xyd; -#define GH(H,ma,mb) H1=H; sse::STRu(*H1,sse::ADD(sse::LDu(*H1),sse::MUL(ma,mb))); - // leading rows, no top bin - for( ; y=hb-1) break; GHinit; _m0=sse::SET(M0[y]); - if(hasLf) { _m=sse::SET(0,0,ms[1],ms[0]); GH(H0+O0[y],_m,_m0); } - if(hasRt) { _m=sse::SET(0,0,ms[3],ms[2]); GH(H0+O0[y]+hb,_m,_m0); } - } else for( ; ; y++ ) { - yb0 = (int) yb; if(yb0>=hb-1) break; GHinit; - _m0=sse::SET(M0[y]); _m1=sse::SET(M1[y]); - if(hasLf) { _m=sse::SET(0,0,ms[1],ms[0]); - GH(H0+O0[y],_m,_m0); GH(H0+O1[y],_m,_m1); } - if(hasRt) { _m=sse::SET(0,0,ms[3],ms[2]); - GH(H0+O0[y]+hb,_m,_m0); GH(H0+O1[y]+hb,_m,_m1); } - } - // final rows, no bottom bin - for( ; y(hb1) * static_cast(wb1), sizeof(float)); - float* N1=N+hb1+1; - for( o=0; oclip) t=clip; c++; - const float r=.2357f; int o, x, y, c; float t; - const int nb=wb*hb, nbo=nOrients*nb, hb1=hb+1; - for( o=0; o(wb) * static_cast(hb) * static_cast(nOrients) * 2 + 2, sizeof(float)); - gradHist( M, O, R1, h, w, binSize, nOrients*2, softBin, true ); - // compute unnormalized contrast insensitive histograms - float* R2 = (float*) wrCalloc(static_cast(wb) * static_cast(hb) * static_cast(nOrients), sizeof(float)); - for(int o=0; onl1 ) mexErrMsgTxt("Incorrect number of outputs."); - if( nrnr1 ) mexErrMsgTxt("Incorrect number of inputs."); - nDims = mxGetNumberOfDimensions(pr[0]); dims = mxGetDimensions(pr[0]); - *h=dims[0]; *w=dims[1]; *d=(nDims==2) ? 1 : dims[2]; *I = mxGetPr(pr[0]); - if( nDims!=2 && nDims!=3 ) mexErrMsgTxt("I must be a 2D or 3D array."); - if( mxGetClassID(pr[0])!=id ) mexErrMsgTxt("I has incorrect type."); -} - -// [Gx,Gy] = grad2(I) - see gradient2.m -void mGrad2( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { - int h, w, d; float *I, *Gx, *Gy; - checkArgs(nl,pl,nr,pr,1,2,1,1,&h,&w,&d,mxSINGLE_CLASS,(void**)&I); - if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2."); - pl[0]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gx ); - pl[1]= mxCreateMatrix3( h, w, d, mxSINGLE_CLASS, 0, (void**) &Gy ); - grad2( I, Gx, Gy, h, w, d ); -} - -// [M,O] = gradMag( I, channel, full ) - see gradientMag.m -void mGradMag( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { - int h, w, d, c, full; float *I, *M, *O=0; - checkArgs(nl,pl,nr,pr,1,2,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&I); - if(h<2 || w<2) mexErrMsgTxt("I must be at least 2x2."); - c = (int) mxGetScalar(pr[1]); full = (int) mxGetScalar(pr[2]); - if( c>0 && c<=d ) { I += h*w*(c-1); d=1; } - pl[0] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&M); - if(nl>=2) pl[1] = mxCreateMatrix3(h,w,1,mxSINGLE_CLASS,0,(void**)&O); - gradMag(I, M, O, h, w, d, full>0 ); -} - -// gradMagNorm( M, S, norm ) - operates on M - see gradientMag.m -void mGradMagNorm( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { - int h, w, d; float *M, *S, norm; - checkArgs(nl,pl,nr,pr,0,0,3,3,&h,&w,&d,mxSINGLE_CLASS,(void**)&M); - if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 || - mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or S is bad."); - S = (float*) mxGetPr(pr[1]); norm = (float) mxGetScalar(pr[2]); - gradMagNorm(M,S,h,w,norm); -} - -// H=gradHist(M,O,[...]) - see gradientHist.m -void mGradHist( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { - int h, w, d, hb, wb, nChns, binSize, nOrients, softBin, useHog; - bool full; float *M, *O, *H, clipHog; - checkArgs(nl,pl,nr,pr,1,3,2,8,&h,&w,&d,mxSINGLE_CLASS,(void**)&M); - O = (float*) mxGetPr(pr[1]); - if( mxGetM(pr[1])!=h || mxGetN(pr[1])!=w || d!=1 || - mxGetClassID(pr[1])!=mxSINGLE_CLASS ) mexErrMsgTxt("M or O is bad."); - binSize = (nr>=3) ? (int) mxGetScalar(pr[2]) : 8; - nOrients = (nr>=4) ? (int) mxGetScalar(pr[3]) : 9; - softBin = (nr>=5) ? (int) mxGetScalar(pr[4]) : 1; - useHog = (nr>=6) ? (int) mxGetScalar(pr[5]) : 0; - clipHog = (nr>=7) ? (float) mxGetScalar(pr[6]) : 0.2f; - full = (nr>=8) ? (bool) (mxGetScalar(pr[7])>0) : false; - hb = h/binSize; wb = w/binSize; - nChns = useHog== 0 ? nOrients : (useHog==1 ? nOrients*4 : nOrients*3+5); - pl[0] = mxCreateMatrix3(hb,wb,nChns,mxSINGLE_CLASS,1,(void**)&H); - if( nOrients==0 ) return; - if( useHog==0 ) { - gradHist( M, O, H, h, w, binSize, nOrients, softBin, full ); - } else if(useHog==1) { - hog( M, O, H, h, w, binSize, nOrients, softBin, full, clipHog ); - } else { - fhog( M, O, H, h, w, binSize, nOrients, softBin, clipHog ); - } -} - -// inteface to various gradient functions (see corresponding Matlab functions) -void mexFunction( int nl, mxArray *pl[], int nr, const mxArray *pr[] ) { - int f; char action[1024]; f=mxGetString(pr[0],action,1024); nr--; pr++; - if(f) mexErrMsgTxt("Failed to get action."); - else if(!strcmp(action,"gradient2")) mGrad2(nl,pl,nr,pr); - else if(!strcmp(action,"gradientMag")) mGradMag(nl,pl,nr,pr); - else if(!strcmp(action,"gradientMagNorm")) mGradMagNorm(nl,pl,nr,pr); - else if(!strcmp(action,"gradientHist")) mGradHist(nl,pl,nr,pr); - else mexErrMsgTxt("Invalid action."); -} -#endif - - -float* crop_H(float *H,int* h_height,int* h_width,int depth,int dh,int dw) { - int crop_h = *h_height-dh-1; - int crop_w = *h_width-dw-1; - float* crop_H = new float[crop_h*crop_w*depth]; - - for(int i = 1;i < *h_height-dh;i ++) - for(int j = 1;j < *h_width-dw;j ++) - for(int k = 0;k < depth;k ++) - crop_H[i-1 + (j-1)*(crop_h) + k*(crop_h*crop_w)] = H[k*(*h_width * *h_height) + j*(*h_height) + i]; - delete []H; - *h_height = crop_h;*h_width = crop_w; - return crop_H; -} - -float* fhog(float *M,float* O,int height,int width,int /*channel*/,int *h,int *w,int *d,int binSize, int nOrients, float clip, bool crop) { - *h = height/binSize; - *w = width/binSize; - *d = nOrients*3+5; - const size_t allSize = static_cast(*h) * static_cast(*w) * static_cast(*d); - - float* H = new float[allSize]; - memset(H, 0, allSize * sizeof(float)); - - fhog( M, O, H, height, width, binSize, nOrients, -1, clip ); - - if(!crop) - return H; - return crop_H(H,h,w,*d,height%binSize < binSize/2,width%binSize < binSize/2); -} - -void fhog(cv::MatND &fhog_feature, const cv::Mat& input, int binSize, int nOrients, float clip, bool crop) { - int HEIGHT = input.rows; - int WIDTH = input.cols; - int DEPTH = input.channels(); - - float *II = new float[HEIGHT*WIDTH*DEPTH]; - int count=0; - - // MatLab:: RGB, OpenCV: BGR - - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - cv::Vec3b p = input.at(j,i); - II[count+2] = p[0]; // B->R - II[count+1] = p[1]; // G->G - II[count+0] = p[2]; // R->B - count += 3; - } - } - - float *I = new float[HEIGHT*WIDTH*DEPTH]; - - // channel x width x height - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - for (int k = 0; k < DEPTH; k++) { - I[k*WIDTH*HEIGHT+i*HEIGHT+j] = II[i*HEIGHT*DEPTH+j*DEPTH+k]; - } - } - } - - float *M = new float[HEIGHT*WIDTH], *O = new float[HEIGHT*WIDTH]; - gradMag(I, M, O, HEIGHT, WIDTH, DEPTH, true); - - int h,w,d; - float* HH = fhog(M,O,HEIGHT,WIDTH,DEPTH,&h,&w,&d,binSize,nOrients,clip,crop); - float* H = new float[w*h*d]; - - for(int i = 0;i < w; i++) - for(int j = 0;j < h; j++) - for(int k = 0;k < d; k++) - //H[i*h*d+j*d+k] = HH[k*w*h+i*h+j]; // ->hwd - H[j*w*d+i*d+k] = HH[k*w*h+i*h+j]; // ->whd - - fhog_feature = cv::MatND(h,w,CV_32FC(32),H).clone(); - - delete[] H; - - delete[] M; delete[] O; - delete[] II;delete[] I;delete[] HH; -} - -void fhog28(cv::MatND &fhog_feature, const cv::Mat& input, int binSize, int nOrients, float clip, bool crop) { - int HEIGHT = input.rows; - int WIDTH = input.cols; - int DEPTH = input.channels(); - - float *II = new float[WIDTH*HEIGHT*DEPTH]; - int count=0; - - // MatLab:: RGB, OpenCV: BGR - - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - cv::Vec3b p = input.at(j,i); - II[count+2] = p[0]; // B->R - II[count+1] = p[1]; // G->G - II[count+0] = p[2]; // R->B - count += 3; - } - } - - float *I = new float[HEIGHT*WIDTH*DEPTH]; - - // channel x width x height - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - for (int k = 0; k < DEPTH; k++) { - I[k*WIDTH*HEIGHT+i*HEIGHT+j] = II[i*HEIGHT*DEPTH+j*DEPTH+k]; - } - } - } - - float *M = new float[HEIGHT*WIDTH], *O = new float[HEIGHT*WIDTH]; - gradMag(I, M, O, HEIGHT, WIDTH, DEPTH, true); - - int h,w,d; - float* HH = fhog(M,O,HEIGHT,WIDTH,DEPTH,&h,&w,&d,binSize,nOrients,clip,crop); - -#undef CHANNELS -#define CHANNELS 28 - - assert(d >= CHANNELS); - - // out = zeros(h, w, 28, 'single'); - // out(:,:,2:28) = temp(:,:,1:27); - - float* H = new float[w*h*CHANNELS]; - - for(int i = 0;i < w; i++) - for(int j = 0;j < h; j++) { - //H[i*h*CHANNELS+j*CHANNELS+0] = 0.0; - H[j*w*CHANNELS+i*CHANNELS+0] = 0.0; - for(int k = 0;k < CHANNELS-1;k++) { - //H[i*h*CHANNELS+j*CHANNELS+k+1] = HH[k*w*h+i*h+j]; // ->hwd - H[j*w*CHANNELS+i*CHANNELS+k+1] = HH[k*w*h+i*h+j]; // ->whd - } - } - - fhog_feature = cv::MatND(h,w,CV_32FC(CHANNELS),H).clone(); - - delete[] H; - - delete[] M; delete[] O; - delete[] II;delete[] I;delete[] HH; -} - -void fhog31(cv::MatND &fhog_feature, const cv::Mat& input, int binSize, int nOrients, float clip, bool crop) { - int HEIGHT = input.rows; - int WIDTH = input.cols; - int DEPTH = input.channels(); - - float *II = new float[WIDTH*HEIGHT*DEPTH]; - int count=0; - - // MatLab:: RGB, OpenCV: BGR - - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - cv::Vec3b p = input.at(j,i); - II[count+2] = p[0]; // B->R - II[count+1] = p[1]; // G->G - II[count+0] = p[2]; // R->B - count += 3; - } - } - - float *I = new float[HEIGHT*WIDTH*DEPTH]; - - // channel x width x height - for (int i = 0; i < WIDTH; i++) { - for (int j = 0; j < HEIGHT; j++) { - for (int k = 0; k < DEPTH; k++) { - I[k*WIDTH*HEIGHT+i*HEIGHT+j] = II[i*HEIGHT*DEPTH+j*DEPTH+k]; - } - } - } - - float *M = new float[HEIGHT*WIDTH], *O = new float[HEIGHT*WIDTH]; - gradMag(I, M, O, HEIGHT, WIDTH, DEPTH, true); - - int h,w,d; - float* HH = fhog(M,O,HEIGHT,WIDTH,DEPTH,&h,&w,&d,binSize,nOrients,clip,crop); - -#undef CHANNELS -#define CHANNELS 31 - - assert(d >= CHANNELS); - - // out = zeros(h, w, 31, 'single'); - // out(:,:,1:31) = temp(:,:,1:31); - - float* H = new float[w*h*CHANNELS]; - - for(int i = 0;i < w; i++) - for(int j = 0;j < h; j++) { - for(int k = 0;k < CHANNELS;k++) { - //H[i*h*CHANNELS+j*CHANNELS+k+1] = HH[k*w*h+i*h+j]; // ->hwd - H[j*w*CHANNELS+i*CHANNELS+k] = HH[k*w*h+i*h+j]; // ->whd - } - } - - fhog_feature = cv::MatND(h,w,CV_32FC(CHANNELS),H).clone(); - - delete[] H; - - delete[] M; delete[] O; - delete[] II;delete[] I;delete[] HH; -} diff --git a/src/Tracker/staple/fhog.h b/src/Tracker/staple/fhog.h deleted file mode 100644 index 34d7dcb79..000000000 --- a/src/Tracker/staple/fhog.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef FHOG_H -#define FHOG_H - -#include -#include -#include -#include "sse.hpp" - -#include - -/** - Inputs: - float* I - a gray or color image matrix with shape = channel x width x height - int *h, *w, *d - return the size of the returned hog features - int binSize -[8] spatial bin size - int nOrients -[9] number of orientation bins - float clip -[.2] value at which to clip histogram bins - bool crop -[false] if true crop boundaries - - Return: - float* H - computed hog features with shape: (nOrients*3+5) x (w/binSize) x (h/binSize), if not crop - - Author: - Sophia - Date: - 2015-01-15 -**/ - -float* fhog(float* I,int height,int width,int channel,int *h,int *w,int *d,int binSize = 4,int nOrients = 9,float clip=0.2f,bool crop = false); -void fhog(cv::MatND &fhog_feature, const cv::Mat& input, int binSize = 4,int nOrients = 9,float clip=0.2f,bool crop = false); -void fhog28(cv::MatND &fhog_feature, const cv::Mat& input, int binSize = 4,int nOrients = 9,float clip=0.2f,bool crop = false); -void fhog31(cv::MatND &fhog_feature, const cv::Mat& input, int binSize = 4,int nOrients = 9,float clip=0.2f,bool crop = false); - -// wrapper functions if compiling from C/C++ -inline void wrError(const char *errormsg) { throw errormsg; } -inline void* wrCalloc( size_t num, size_t size ) { return calloc(num,size); } -inline void* wrMalloc( size_t size ) { return malloc(size); } -inline void wrFree( void * ptr ) { free(ptr); } - -#endif diff --git a/src/Tracker/staple/sse.hpp b/src/Tracker/staple/sse.hpp deleted file mode 100644 index 41768f7fd..000000000 --- a/src/Tracker/staple/sse.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/******************************************************************************* -* Piotr's Computer Vision Matlab Toolbox Version 3.23 -* Copyright 2014 Piotr Dollar. [pdollar-at-gmail.com] -* Licensed under the Simplified BSD License [see external/bsd.txt] -*******************************************************************************/ -#ifndef _SSE_HPP_ -#define _SSE_HPP_ -#include // SSE2:, SSE3:, SSE4: - -namespace sse{ - -#define RETf inline __m128 -#define RETi inline __m128i - -// set, load and store values -RETf SET( const float &x ) { return _mm_set1_ps(x); } -RETf SET( float x, float y, float z, float w ) { return _mm_set_ps(x,y,z,w); } -RETi SET( const int &x ) { return _mm_set1_epi32(x); } -RETf LD( const float &x ) { return _mm_load_ps(&x); } -RETf LDu( const float &x ) { return _mm_loadu_ps(&x); } -RETf STR( float &x, const __m128 y ) { _mm_store_ps(&x,y); return y; } -RETf STR1( float &x, const __m128 y ) { _mm_store_ss(&x,y); return y; } -RETf STRu( float &x, const __m128 y ) { _mm_storeu_ps(&x,y); return y; } -RETf STR( float &x, const float y ) { return STR(x,SET(y)); } - -// arithmetic operators -RETi ADD( const __m128i x, const __m128i y ) { return _mm_add_epi32(x,y); } -RETf ADD( const __m128 x, const __m128 y ) { return _mm_add_ps(x,y); } -RETf ADD( const __m128 x, const __m128 y, const __m128 z ) { - return ADD(ADD(x,y),z); } -RETf ADD( const __m128 a, const __m128 b, const __m128 c, const __m128 &d ) { - return ADD(ADD(ADD(a,b),c),d); } -RETf SUB( const __m128 x, const __m128 y ) { return _mm_sub_ps(x,y); } -RETf MUL( const __m128 x, const __m128 y ) { return _mm_mul_ps(x,y); } -RETf MUL( const __m128 x, const float y ) { return MUL(x,SET(y)); } -RETf MUL( const float x, const __m128 y ) { return MUL(SET(x),y); } -RETf INC( __m128 &x, const __m128 y ) { return x = ADD(x,y); } -RETf INC( float &x, const __m128 y ) { __m128 t=ADD(LD(x),y); return STR(x,t); } -RETf DEC( __m128 &x, const __m128 y ) { return x = SUB(x,y); } -RETf DEC( float &x, const __m128 y ) { __m128 t=SUB(LD(x),y); return STR(x,t); } -RETf MIN( const __m128 x, const __m128 y ) { return _mm_min_ps(x,y); } -RETf RCP( const __m128 x ) { return _mm_rcp_ps(x); } -RETf RCPSQRT( const __m128 x ) { return _mm_rsqrt_ps(x); } - -// logical operators -RETf AND( const __m128 x, const __m128 y ) { return _mm_and_ps(x,y); } -RETi AND( const __m128i x, const __m128i y ) { return _mm_and_si128(x,y); } -RETf ANDNOT( const __m128 x, const __m128 y ) { return _mm_andnot_ps(x,y); } -RETf OR( const __m128 x, const __m128 y ) { return _mm_or_ps(x,y); } -RETf XOR( const __m128 x, const __m128 y ) { return _mm_xor_ps(x,y); } - -// comparison operators -RETf CMPGT( const __m128 x, const __m128 y ) { return _mm_cmpgt_ps(x,y); } -RETf CMPLT( const __m128 x, const __m128 y ) { return _mm_cmplt_ps(x,y); } -RETi CMPGT( const __m128i x, const __m128i y ) { return _mm_cmpgt_epi32(x,y); } -RETi CMPLT( const __m128i x, const __m128i y ) { return _mm_cmplt_epi32(x,y); } - -// conversion operators -RETf CVT( const __m128i x ) { return _mm_cvtepi32_ps(x); } -RETi CVT( const __m128 x ) { return _mm_cvttps_epi32(x); } - -#undef RETf -#undef RETi - -} -#endif diff --git a/src/Tracker/staple/staple_tracker.cpp b/src/Tracker/staple/staple_tracker.cpp deleted file mode 100644 index 6c672ee09..000000000 --- a/src/Tracker/staple/staple_tracker.cpp +++ /dev/null @@ -1,1525 +0,0 @@ -/* - * cv::Size(width, height) - * cv::Point(y, x) - * cv::Mat(height, width, channels, ... ) - * cv::Mat save by row after row - * 2d: address = j * width + i - * 3d: address = j * width * channels + i * channels + k - * ------------------------------------------------------------ - * row == heigh == Point.y - * col == width == Point.x - * Mat::at(Point(x, y)) == Mat::at(y,x) - */ - -#include "fhog.h" -#include "staple_tracker.hpp" -#include - -/// -/// \brief STAPLE_TRACKER::STAPLE_TRACKER -/// -STAPLE_TRACKER::STAPLE_TRACKER() -{ - m_cfg = default_parameters_staple(); - frameno = 0; -} - -/// -/// \brief STAPLE_TRACKER::~STAPLE_TRACKER -/// -STAPLE_TRACKER::~STAPLE_TRACKER() -{ - -} - -/// -/// \brief STAPLE_TRACKER::mexResize -/// mexResize got different results using different OpenCV, it's not trustable -/// I found this bug by running vot2015/tunnel, it happened when frameno+1==22 after frameno+1==21 -/// \param im -/// \param output -/// \param newsz -/// \param method -/// -void STAPLE_TRACKER::mexResize(const cv::Mat &im, cv::Mat &output, cv::Size newsz, const char* /*method*/) -{ - int interpolation = cv::INTER_LINEAR; - -#if 0 - if(!strcmp(method, "antialias")){ - interpolation = cv::INTER_AREA; - } else if (!strcmp(method, "linear")){ - interpolation = cv::INTER_LINEAR; - } else if (!strcmp(method, "auto")){ - if(newsz.width > im.cols){ // xxx - interpolation = cv::INTER_LINEAR; - }else{ - interpolation = cv::INTER_AREA; - } - } else { - assert(0); - return; - } -#endif - - cv::resize(im, output, newsz, 0, 0, interpolation); -} - -/// -/// \brief STAPLE_TRACKER::default_parameters_staple -/// \return -/// -staple_cfg STAPLE_TRACKER::default_parameters_staple() -{ - staple_cfg cfg; - return cfg; -} - -/// -/// \brief STAPLE_TRACKER::initializeAllAreas -/// \param im -/// -void STAPLE_TRACKER::initializeAllAreas(const cv::Mat &im) -{ - // we want a regular frame surrounding the object - double avg_dim = (m_cfg.target_sz.width + m_cfg.target_sz.height) / 2.0; - - bg_area.width = cvRound(m_cfg.target_sz.width + avg_dim); - bg_area.height = cvRound(m_cfg.target_sz.height + avg_dim); - - // pick a "safe" region smaller than bbox to avoid mislabeling - fg_area.width = cvRound(m_cfg.target_sz.width - avg_dim * m_cfg.inner_padding); - fg_area.height = cvRound(m_cfg.target_sz.height - avg_dim * m_cfg.inner_padding); - - // saturate to image size - cv::Size imsize = im.size(); - - bg_area.width = std::min(bg_area.width, imsize.width - 1); - bg_area.height = std::min(bg_area.height, imsize.height - 1); - - // make sure the differences are a multiple of 2 (makes things easier later in color histograms) - bg_area.width = bg_area.width - (bg_area.width - m_cfg.target_sz.width) % 2; - bg_area.height = bg_area.height - (bg_area.height - m_cfg.target_sz.height) % 2; - - fg_area.width = fg_area.width + (bg_area.width - fg_area.width) % 2; - fg_area.height = fg_area.height + (bg_area.height - fg_area.width) % 2; - - //std::cout << "bg_area.width " << bg_area.width << " bg_area.height " << bg_area.height << std::endl; - //std::cout << "fg_area.width " << fg_area.width << " fg_area.height " << fg_area.height << std::endl; - - // Compute the rectangle with (or close to) params.fixedArea - // and same aspect ratio as the target bbox - - area_resize_factor = sqrt(m_cfg.fixed_area / float(bg_area.width * bg_area.height)); - norm_bg_area.width = cvRound(bg_area.width * area_resize_factor); - norm_bg_area.height = cvRound(bg_area.height * area_resize_factor); - - //std::cout << "area_resize_factor " << area_resize_factor << " norm_bg_area.width " << norm_bg_area.width << " norm_bg_area.height " << norm_bg_area.height << std::endl; - - // Correlation Filter (HOG) feature space - // It smaller that the norm bg area if HOG cell size is > 1 - cf_response_size.width = norm_bg_area.width / m_cfg.hog_cell_size; - cf_response_size.height = norm_bg_area.height / m_cfg.hog_cell_size; - - // given the norm BG area, which is the corresponding target w and h? - double norm_target_sz_w = 0.75*norm_bg_area.width - 0.25*norm_bg_area.height; - double norm_target_sz_h = 0.75*norm_bg_area.height - 0.25*norm_bg_area.width; - - // norm_target_sz_w = params.target_sz(2) * params.norm_bg_area(2) / bg_area(2); - // norm_target_sz_h = params.target_sz(1) * params.norm_bg_area(1) / bg_area(1); - norm_target_sz.width = cvRound(norm_target_sz_w); - norm_target_sz.height = cvRound(norm_target_sz_h); - - //std::cout << "norm_target_sz.width " << norm_target_sz.width << " norm_target_sz.height " << norm_target_sz.height << std::endl; - - // distance (on one side) between target and bg area - cv::Size norm_pad; - - norm_pad.width = (norm_bg_area.width - norm_target_sz.width) / 2; - norm_pad.height = (norm_bg_area.height - norm_target_sz.height) / 2; - - int radius = std::min(norm_pad.width, norm_pad.height); - - // norm_delta_area is the number of rectangles that are considered. - // it is the "sampling space" and the dimension of the final merged resposne - // it is squared to not privilege any particular direction - norm_delta_area = cv::Size((2*radius+1), (2*radius+1)); - - // Rectangle in which the integral images are computed. - // Grid of rectangles ( each of size norm_target_sz) has size norm_delta_area. - norm_pwp_search_area.width = norm_target_sz.width + norm_delta_area.width - 1; - norm_pwp_search_area.height = norm_target_sz.height + norm_delta_area.height - 1; - - //std::cout << "norm_pwp_search_area.width " << norm_pwp_search_area.width << " norm_pwp_search_area.height " << norm_pwp_search_area.height << std::endl; -} - -/// -/// \brief STAPLE_TRACKER::getSubwindow -/// GET_SUBWINDOW Obtain image sub-window, padding is done by replicating border values. -/// Returns sub-window of image IM centered at POS ([y, x] coordinates), -/// with size MODEL_SZ ([height, width]). If any pixels are outside of the image, -/// they will replicate the values at the borders -/// \param im -/// \param centerCoor -/// \param model_sz -/// \param scaled_sz -/// \param output -/// -void STAPLE_TRACKER::getSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output) -{ - cv::Size sz = scaled_sz; // scale adaptation - - // make sure the size is not to small - sz.width = std::max(sz.width, 2); - sz.height = std::max(sz.height, 2); - - cv::Mat subWindow; - - // xs = round(pos(2) + (1:sz(2)) - sz(2)/2); - // ys = round(pos(1) + (1:sz(1)) - sz(1)/2); - - cv::Point lefttop( - std::min(im.cols - 1, std::max(-sz.width + 1, int(centerCoor.x + 1 - sz.width/2.0+0.5))), - std::min(im.rows - 1, std::max(-sz.height + 1, int(centerCoor.y + 1 - sz.height/2.0+0.5))) - ); - - cv::Point rightbottom( - std::max(0, int(lefttop.x + sz.width - 1)), - std::max(0, int(lefttop.y + sz.height - 1)) - ); - - cv::Point lefttopLimit( - std::max(lefttop.x, 0), - std::max(lefttop.y, 0) - ); - cv::Point rightbottomLimit( - std::min(rightbottom.x, im.cols - 1), - std::min(rightbottom.y, im.rows - 1) - ); - - rightbottomLimit.x += 1; - rightbottomLimit.y += 1; - cv::Rect roiRect(lefttopLimit, rightbottomLimit); - - im(roiRect).copyTo(subWindow); - - int top = lefttopLimit.y - lefttop.y; - int bottom = rightbottom.y - rightbottomLimit.y + 1; - int left = lefttopLimit.x - lefttop.x; - int right = rightbottom.x - rightbottomLimit.x + 1; - - cv::copyMakeBorder(subWindow, subWindow, top, bottom, left, right, cv::BORDER_REPLICATE); - - // imresize(subWindow, output, model_sz, 'bilinear', 'AntiAliasing', false) - mexResize(subWindow, output, model_sz, "auto"); -} - -/// -/// \brief STAPLE_TRACKER::updateHistModel -/// UPDATEHISTMODEL create new models for foreground and background or update the current ones -/// \param new_model -/// \param patch -/// \param learning_rate_pwp -/// -void STAPLE_TRACKER::updateHistModel(bool new_model, cv::Mat &patch, float learning_rate_pwp) -{ - // Get BG (frame around target_sz) and FG masks (inner portion of target_sz) - - //////////////////////////////////////////////////////////////////////// - cv::Size pad_offset1; - - // we constrained the difference to be mod2, so we do not have to round here - pad_offset1.width = (bg_area.width - target_sz.width) / 2; - pad_offset1.height = (bg_area.height - target_sz.height) / 2; - - // difference between bg_area and target_sz has to be even - if ( - ( - (pad_offset1.width == round(pad_offset1.width)) && - (pad_offset1.height != round(pad_offset1.height)) - ) || - ( - (pad_offset1.width != round(pad_offset1.width)) && - (pad_offset1.height == round(pad_offset1.height)) - )) { - assert(0); - } - - pad_offset1.width = std::max(pad_offset1.width, 1); - pad_offset1.height = std::max(pad_offset1.height, 1); - - //std::cout << "pad_offset1 " << pad_offset1 << std::endl; - - cv::Mat bg_mask(bg_area, CV_8UC1, cv::Scalar(1)); // init bg_mask - - // xxx: bg_mask(pad_offset1(1)+1:end-pad_offset1(1), pad_offset1(2)+1:end-pad_offset1(2)) = false; - - cv::Rect pad1_rect( - pad_offset1.width, - pad_offset1.height, - bg_area.width - 2 * pad_offset1.width, - bg_area.height - 2 * pad_offset1.height - ); - - bg_mask(pad1_rect) = false; - - //////////////////////////////////////////////////////////////////////// - - // we constrained the difference to be mod2, so we do not have to round here - cv::Size pad_offset2((bg_area.width - fg_area.width) / 2, (bg_area.height - fg_area.height) / 2); - - // difference between bg_area and fg_area has to be even - if ( - ( - (pad_offset2.width == round(pad_offset2.width)) && - (pad_offset2.height != round(pad_offset2.height)) - ) || - ( - (pad_offset2.width != round(pad_offset2.width)) && - (pad_offset2.height == round(pad_offset2.height)) - )) { - assert(0); - } - - pad_offset2.width = std::max(pad_offset2.width, 1); - pad_offset2.height = std::max(pad_offset2.height, 1); - - cv::Mat fg_mask(bg_area, CV_8UC1, cv::Scalar(0)); // init fg_mask - - // xxx: fg_mask(pad_offset2(1)+1:end-pad_offset2(1), pad_offset2(2)+1:end-pad_offset2(2)) = true; - - auto Clamp = [](int& v, int& size, int hi) -> int - { - int res = 0; - - if (size < 2) - { - size = 2; - } - if (v < 0) - { - res = v; - v = 0; - return res; - } - else if (v + size > hi - 1) - { - v = hi - 1 - size; - if (v < 0) - { - size += v; - v = 0; - } - res = v; - return res; - } - return res; - }; - - cv::Rect pad2_rect( - pad_offset2.width, - pad_offset2.height, - bg_area.width - 2 * pad_offset2.width, - bg_area.height - 2 * pad_offset2.height - ); - - if (!Clamp(pad2_rect.x, pad2_rect.width, fg_mask.cols) && !Clamp(pad2_rect.y, pad2_rect.height, fg_mask.rows)) - { - fg_mask(pad2_rect) = true; - } - //////////////////////////////////////////////////////////////////////// - - cv::Mat fg_mask_new; - cv::Mat bg_mask_new; - - mexResize(fg_mask, fg_mask_new, norm_bg_area, "auto"); - mexResize(bg_mask, bg_mask_new, norm_bg_area, "auto"); - - int imgCount = 1; - int dims = 3; - const int sizes[] = { m_cfg.n_bins, m_cfg.n_bins, m_cfg.n_bins }; - const int channels[] = { 0, 1, 2 }; - float bRange[] = { 0, 256 }; - float gRange[] = { 0, 256 }; - float rRange[] = { 0, 256 }; - const float *ranges[] = { bRange, gRange, rRange }; - - if (m_cfg.grayscale_sequence) - dims = 1; - - // (TRAIN) BUILD THE MODEL - if (new_model) - { - cv::calcHist(&patch, imgCount, channels, bg_mask_new, bg_hist, dims, sizes, ranges); - cv::calcHist(&patch, imgCount, channels, fg_mask_new, fg_hist, dims, sizes, ranges); - - int bgtotal = std::max(1, cv::countNonZero(bg_mask_new)); - bg_hist = bg_hist / bgtotal; - - int fgtotal = std::max(1, cv::countNonZero(fg_mask_new)); - fg_hist = fg_hist / fgtotal; - } - else - { // update the model - cv::MatND bg_hist_tmp; - cv::MatND fg_hist_tmp; - - cv::calcHist(&patch, imgCount, channels, bg_mask_new, bg_hist_tmp, dims, sizes, ranges); - cv::calcHist(&patch, imgCount, channels, fg_mask_new, fg_hist_tmp, dims, sizes, ranges); - - int bgtotal = std::max(1, cv::countNonZero(bg_mask_new)); - bg_hist_tmp = bg_hist_tmp / bgtotal; - - int fgtotal = std::max(1, cv::countNonZero(fg_mask_new)); - fg_hist_tmp = fg_hist_tmp / fgtotal; - - // xxx - bg_hist = (1 - learning_rate_pwp)*bg_hist + learning_rate_pwp*bg_hist_tmp; - fg_hist = (1 - learning_rate_pwp)*fg_hist + learning_rate_pwp*fg_hist_tmp; - } -} - -/// -/// \brief STAPLE_TRACKER::CalculateHann -/// \param sz -/// \param output -/// -void STAPLE_TRACKER::CalculateHann(cv::Size sz, cv::Mat &output) -{ - cv::Mat temp1(cv::Size(sz.width, 1), CV_32FC1); - cv::Mat temp2(cv::Size(sz.height, 1), CV_32FC1); - - float *p1 = temp1.ptr(0); - float *p2 = temp2.ptr(0); - - for (int i = 0; i < sz.width; ++i) - p1[i] = static_cast(0.5 * (1 - cos(CV_2PI*i / (sz.width - 1)))); - - for (int i = 0; i < sz.height; ++i) - p2[i] = static_cast(0.5 * (1 - cos(CV_2PI*i / (sz.height - 1)))); - - output = temp2.t()*temp1; -} - -/// -/// \brief meshgrid -/// \param xr -/// \param yr -/// \param outX -/// \param outY -/// -void meshgrid(const cv::Range xr, const cv::Range yr, cv::Mat &outX, cv::Mat &outY) -{ - std::vector x; - x.reserve(xr.end - xr.start + 1); - std::vector y; - y.reserve(yr.end - yr.start + 1); - - for (int i = xr.start; i <= xr.end; i++) - x.push_back(i); - for (int i = yr.start; i <= yr.end; i++) - y.push_back(i); - - repeat(cv::Mat(x).t(), static_cast(y.size()), 1, outX); - repeat(cv::Mat(y), 1, static_cast(x.size()), outY); -} - -/// -/// \brief STAPLE_TRACKER::gaussianResponse -/// GAUSSIANRESPONSE create the (fixed) target response of the correlation filter response -/// \param rect_size -/// \param sigma -/// \param output -/// -void STAPLE_TRACKER::gaussianResponse(cv::Size rect_size, float sigma, cv::Mat &output) -{ - // half = floor((rect_size-1) / 2); - // i_range = -half(1):half(1); - // j_range = -half(2):half(2); - // [i, j] = ndgrid(i_range, j_range); - cv::Size half; - - half.width = (rect_size.width - 1) / 2; - half.height = (rect_size.height - 1) / 2; - - cv::Range i_range(-half.width, rect_size.width - (1 + half.width)); - cv::Range j_range(-half.height, rect_size.height - (1 + half.height)); - cv::Mat i, j; - - meshgrid(i_range, j_range, i, j); - - // i_mod_range = mod_one(i_range, rect_size(1)); - // j_mod_range = mod_one(j_range, rect_size(2)); - - std::vector i_mod_range; - i_mod_range.reserve(i_range.end - i_range.start + 1); - std::vector j_mod_range; - i_mod_range.reserve(j_range.end - j_range.start + 1); - - for (int k = i_range.start; k <= i_range.end; k++) { - int val = (int)(k - 1 + rect_size.width) % (int)rect_size.width; - i_mod_range.push_back(val); - } - - for (int k = j_range.start; k <= j_range.end; k++) { - int val = (int)(k - 1 + rect_size.height) % (int)rect_size.height; - j_mod_range.push_back(val); - } - - // y = zeros(rect_size); - // y(i_mod_range, j_mod_range) = exp(-(i.^2 + j.^2) / (2 * sigma^2)); - - output = cv::Mat(rect_size.height, rect_size.width, CV_32FC2); - - for (int jj = 0; jj < rect_size.height; jj++) - { - int j_idx = j_mod_range[jj]; - assert(j_idx < rect_size.height); - - for (int ii = 0; ii < rect_size.width; ii++) - { - int i_idx = i_mod_range[ii]; - assert(i_idx < rect_size.width); - - cv::Vec2f val(exp(-(i.at(jj, ii)*i.at(jj, ii) + j.at(jj, ii)*j.at(jj, ii)) / (2 * sigma*sigma)), 0); - output.at(j_idx, i_idx) = val; - } - } -} - -/// -/// \brief STAPLE_TRACKER::tracker_staple_initialize -/// \param im -/// \param region -/// -void STAPLE_TRACKER::Initialize(const cv::Mat &im, cv::Rect region) -{ - int n = im.channels(); - if (n == 1) - m_cfg.grayscale_sequence = true; - - // xxx: only support 3 channels, TODO: fix updateHistModel - //assert(!cfg.grayscale_sequence); - - m_cfg.init_pos.x = region.x + region.width / 2.0f; - m_cfg.init_pos.y = region.y + region.height / 2.0f; - - m_cfg.target_sz.width = region.width; - m_cfg.target_sz.height = region.height; - - initializeAllAreas(im); - - pos = m_cfg.init_pos; - target_sz = m_cfg.target_sz; - - // patch of the target + padding - cv::Mat patch_padded; - getSubwindow(im, pos, norm_bg_area, bg_area, patch_padded); - - // initialize hist model - updateHistModel(true, patch_padded); - - CalculateHann(cf_response_size, hann_window); - - // gaussian-shaped desired response, centred in (1,1) - // bandwidth proportional to target size - float output_sigma = sqrt(static_cast(norm_target_sz.width * norm_target_sz.height)) * m_cfg.output_sigma_factor / m_cfg.hog_cell_size; - - cv::Mat y; - gaussianResponse(cf_response_size, output_sigma, y); - cv::dft(y, yf); - - // SCALE ADAPTATION INITIALIZATION - if (m_cfg.scale_adaptation) - { - // Code from DSST - scale_factor = 1; - base_target_sz = target_sz; // xxx - float scale_sigma = sqrt(static_cast(m_cfg.num_scales)) * m_cfg.scale_sigma_factor; - - cv::Mat ys = cv::Mat(1, m_cfg.num_scales, CV_32FC2); - for (int i = 0; i < m_cfg.num_scales; i++) - { - cv::Vec2f val((i + 1) - ceil(m_cfg.num_scales/2.0f), 0.f); - val[0] = exp(-0.5f * (val[0] * val[0]) / (scale_sigma * scale_sigma)); - ys.at(i) = val; - - // SS = (1:p.num_scales) - ceil(p.num_scales/2); - // ys = exp(-0.5 * (ss.^2) / scale_sigma^2); - } - - cv::dft(ys, ysf, cv::DFT_ROWS); - //std::cout << ysf << std::endl; - - scale_window = cv::Mat(1, m_cfg.num_scales, CV_32FC1); - if (m_cfg.num_scales % 2 == 0) - { - for (int i = 0; i < m_cfg.num_scales + 1; ++i) - { - if (i > 0) - scale_window.at(i - 1) = 0.5f * (1 - cos(static_cast(CV_2PI) * i / (m_cfg.num_scales + 1 - 1))); - } - } - else - { - for (int i = 0; i < m_cfg.num_scales; ++i) - { - scale_window.at(i) = 0.5f * (1 - cos(static_cast(CV_2PI) * i / (m_cfg.num_scales - 1))); - } - } - - - scale_factors = cv::Mat(1, m_cfg.num_scales, CV_32FC1); - for (int i = 0; i < m_cfg.num_scales; i++) - { - scale_factors.at(i) = pow(m_cfg.scale_step, (ceil(m_cfg.num_scales/2.0f) - (i+1))); - } - - //std::cout << scale_factors << std::endl; - - //ss = 1:p.num_scales; - //scale_factors = p.scale_step.^(ceil(p.num_scales/2) - ss); - - if ((m_cfg.scale_model_factor * m_cfg.scale_model_factor) * (norm_target_sz.width * norm_target_sz.height) > m_cfg.scale_model_max_area) - m_cfg.scale_model_factor = sqrt(m_cfg.scale_model_max_area / (norm_target_sz.width * norm_target_sz.height)); - - //std::cout << cfg.scale_model_factor << std::endl; - - scale_model_sz.width = static_cast(norm_target_sz.width * m_cfg.scale_model_factor); - scale_model_sz.height = static_cast(norm_target_sz.height * m_cfg.scale_model_factor); - //scale_model_sz = floor(p.norm_target_sz * p.scale_model_factor); - - //std::cout << scale_model_sz << std::endl; - - // find maximum and minimum scales - min_scale_factor = pow(m_cfg.scale_step, ceil(log(std::max(5.0f / bg_area.width, 5.0f / bg_area.height)) / log(m_cfg.scale_step))); - max_scale_factor = pow(m_cfg.scale_step, floor(log(std::min(im.cols / (float)target_sz.width, im.rows / (float)target_sz.height)) / log(m_cfg.scale_step))); - - //min_scale_factor = p.scale_step ^ ceil(log(max(5 ./ bg_area)) / log(p.scale_step)); - //max_scale_factor = p.scale_step ^ floor(log(min([size(im,1) size(im,2)] ./ target_sz)) / log(p.scale_step)); - - //std::cout << min_scale_factor << " " << max_scale_factor << std::endl; - } -} - -/// -/// \brief STAPLE_TRACKER::getFeatureMap -/// code from DSST -/// \param im_patch -/// \param feature_type -/// \param output -/// -void STAPLE_TRACKER::getFeatureMap(cv::Mat &im_patch, const char* feature_type, cv::MatND &output) -{ - assert(!strcmp(feature_type, "fhog")); - - // allocate space -#if 0 - cv::Mat tmp_image; - im_patch.convertTo(tmp_image, CV_32FC1); - fhog28(output, tmp_image, cfg.hog_cell_size, 9); -#else - fhog28(output, im_patch, m_cfg.hog_cell_size, 9); -#endif - int w = cf_response_size.width; - int h = cf_response_size.height; - - // hog28 already generate this matrix of (w,h,28) - // out = zeros(h, w, 28, 'single'); - // out(:,:,2:28) = temp(:,:,1:27); - - cv::Mat new_im_patch; - - if (m_cfg.hog_cell_size > 1) - mexResize(im_patch, new_im_patch, cv::Size(w, h), "auto"); - else - new_im_patch = im_patch; - - cv::Mat grayimg; - - if (new_im_patch.channels() > 1) - cv::cvtColor(new_im_patch, grayimg, cv::COLOR_BGR2GRAY); - else - grayimg = new_im_patch; - - // out(:,:,1) = single(im_patch)/255 - 0.5; - - float alpha = 1.f / 255.0f; - float betta = 0.5f; - - typedef cv::Vec Vecf28; - - for (int j = 0; j < h; ++j) - { - Vecf28* pDst = output.ptr(j); - const float* pHann = hann_window.ptr(j); - const uchar* pGray = grayimg.ptr(j); - - for (int i = 0; i < w; ++i) - { - // apply Hann window - Vecf28& val = pDst[0]; - - val = val * pHann[0]; - val[0] = (alpha * pGray[0] - betta) * pHann[0]; - - ++pDst; - ++pHann; - ++pGray; - } - } -} - -/// -/// \brief matsplit -/// \param xt -/// \param xtsplit -/// -void matsplit(const cv::MatND &xt, std::vector &xtsplit) -{ - int w = xt.cols; - int h = xt.rows; - int cn = xt.channels(); - - assert(cn == 28); - - for (int k = 0; k < cn; k++) - { - cv::Mat dim = cv::Mat(h, w, CV_32FC2); - - for (int j = 0; j < h; ++j) - { - float* pDst = dim.ptr(j); - const float* pSrc = xt.ptr(j); - - for (int i = 0; i < w; ++i) - { - pDst[0] = pSrc[k]; - pDst[1] = 0.0f; - - pSrc += cn; - pDst += 2; - } - } - - xtsplit.push_back(dim); - } -} - -/// -/// \brief STAPLE_TRACKER::getSubwindowFloor -/// GET_SUBWINDOW Obtain image sub-window, padding is done by replicating border values. -/// Returns sub-window of image IM centered at POS ([y, x] coordinates), -/// with size MODEL_SZ ([height, width]). If any pixels are outside of the image, -/// they will replicate the values at the borders -/// \param im -/// \param centerCoor -/// \param model_sz -/// \param scaled_sz -/// \param output -/// -void STAPLE_TRACKER::getSubwindowFloor(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output) -{ - cv::Size sz = scaled_sz; // scale adaptation - - // make sure the size is not to small - sz.width = std::max(sz.width, 2); - sz.height = std::max(sz.height, 2); - - cv::Mat subWindow; - - // xs = floor(pos(2)) + (1:patch_sz(2)) - floor(patch_sz(2)/2); - // ys = floor(pos(1)) + (1:patch_sz(1)) - floor(patch_sz(1)/2); - - cv::Point lefttop( - std::min(im.cols - 1, std::max(-sz.width + 1, int(centerCoor.x + 1) - int(sz.width/2.0))), - std::min(im.rows - 1, std::max(-sz.height + 1, int(centerCoor.y + 1) - int(sz.height/2.0))) - ); - - cv::Point rightbottom( - std::max(0, int(lefttop.x + sz.width - 1)), - std::max(0, int(lefttop.y + sz.height - 1)) - ); - - cv::Point lefttopLimit( - std::max(lefttop.x, 0), - std::max(lefttop.y, 0) - ); - cv::Point rightbottomLimit( - std::min(rightbottom.x, im.cols - 1), - std::min(rightbottom.y, im.rows - 1) - ); - - rightbottomLimit.x += 1; - rightbottomLimit.y += 1; - cv::Rect roiRect(lefttopLimit, rightbottomLimit); - - im(roiRect).copyTo(subWindow); - - // imresize(subWindow, output, model_sz, 'bilinear', 'AntiAliasing', false) - mexResize(subWindow, output, model_sz, "auto"); -} - -/// -/// \brief STAPLE_TRACKER::getScaleSubwindow -/// code from DSST -/// \param im -/// \param centerCoor -/// \param output -/// -void STAPLE_TRACKER::getScaleSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Mat &output) -{ - int ch = 0; - int total = 0; - - for (int s = 0; s < m_cfg.num_scales; s++) - { - cv::Size_ patch_sz; - - patch_sz.width = floor(base_target_sz.width * scale_factor * scale_factors.at(s)); - patch_sz.height = floor(base_target_sz.height * scale_factor * scale_factors.at(s)); - - cv::Mat im_patch_resized; - getSubwindowFloor(im, centerCoor, scale_model_sz, patch_sz, im_patch_resized); - - // extract scale features - cv::MatND temp; - fhog31(temp, im_patch_resized, m_cfg.hog_cell_size, 9); - - if (s == 0) - { - ch = temp.channels(); - total = temp.cols * temp.rows * ch; - - output = cv::Mat(total, m_cfg.num_scales, CV_32FC2); - } - - int tempw = temp.cols; - int temph = temp.rows; - int tempch = temp.channels(); - - int count = 0; - - float scaleWnd = scale_window.at(s); - - float* outData = (float*)output.data; - - // window - for (int j = 0; j < temph; ++j) - { - const float* tmpData = temp.ptr(j); - - for (int i = 0; i < tempw; ++i) - { - for (int k = 0; k < tempch; ++k) - { - outData[(count * m_cfg.num_scales + s) * 2 + 0] = tmpData[k] * scaleWnd; - outData[(count * m_cfg.num_scales + s) * 2 + 1] = 0.0; - - ++count; - } - tmpData += ch; - } - } - } -} - -/// -/// \brief STAPLE_TRACKER::tracker_staple_train -/// TRAINING -/// \param im -/// \param first -/// -void STAPLE_TRACKER::Train(const cv::Mat &im, bool first) -{ - // extract patch of size bg_area and resize to norm_bg_area - cv::Mat im_patch_bg; - getSubwindow(im, pos, norm_bg_area, bg_area, im_patch_bg); - - // compute feature map, of cf_response_size - cv::MatND xt; - getFeatureMap(im_patch_bg, m_cfg.feature_type, xt); - - // apply Hann window in getFeatureMap - // xt = bsxfun(@times, hann_window, xt); - - // compute FFT - // cv::MatND xtf; - std::vector xtsplit; - std::vector xtf; // xtf is splits of xtf - - matsplit(xt, xtsplit); - - for (int i = 0; i < xt.channels(); i++) { - cv::Mat dimf; - cv::dft(xtsplit[i], dimf); - xtf.push_back(dimf); - } - - // FILTER UPDATE - // Compute expectations over circular shifts, - // therefore divide by number of pixels. - // new_hf_num = bsxfun(@times, conj(yf), xtf) / prod(p.cf_response_size); - // new_hf_den = (conj(xtf) .* xtf) / prod(p.cf_response_size); - - { - std::vector new_hf_num; - std::vector new_hf_den; - - int w = xt.cols; - int h = xt.rows; - float invArea = 1.f / (cf_response_size.width * cf_response_size.height); - - for (int ch = 0; ch < xt.channels(); ch++) - { - cv::Mat dim = cv::Mat(h, w, CV_32FC2); - - for (int j = 0; j < h; ++j) - { - const float* pXTF = xtf[ch].ptr(j); - const float* pYF = yf.ptr(j); - cv::Vec2f* pDst = dim.ptr(j); - - for (int i = 0; i < w; ++i) - { - cv::Vec2f val(pYF[1] * pXTF[1] + pYF[0] * pXTF[0], pYF[0] * pXTF[1] - pYF[1] * pXTF[0]); - *pDst = invArea * val; - - pXTF += 2; - pYF += 2; - ++pDst; - } - } - new_hf_num.push_back(dim); - } - - for (int ch = 0; ch < xt.channels(); ch++) - { - cv::Mat dim = cv::Mat(h, w, CV_32FC1); - - for (int j = 0; j < h; ++j) - { - const float* pXTF = xtf[ch].ptr(j); - float* pDst = dim.ptr(j); - - for (int i = 0; i < w; ++i) - { - *pDst = invArea * (pXTF[0]*pXTF[0] + pXTF[1]*pXTF[1]); - - pXTF += 2; - ++pDst; - } - } - new_hf_den.push_back(dim); - } - - if (first) { - // first frame, train with a single image - hf_den.assign(new_hf_den.begin(), new_hf_den.end()); - hf_num.assign(new_hf_num.begin(), new_hf_num.end()); - } else { - // subsequent frames, update the model by linear interpolation - for (int ch = 0; ch < xt.channels(); ch++) { - hf_den[ch] = (1 - m_cfg.learning_rate_cf) * hf_den[ch] + m_cfg.learning_rate_cf * new_hf_den[ch]; - hf_num[ch] = (1 - m_cfg.learning_rate_cf) * hf_num[ch] + m_cfg.learning_rate_cf * new_hf_num[ch]; - } - - updateHistModel(false, im_patch_bg, m_cfg.learning_rate_pwp); - - // BG/FG MODEL UPDATE - // patch of the target + padding - // [bg_hist, fg_hist] = updateHistModel(new_pwp_model, im_patch_bg, bg_area, fg_area, target_sz, p.norm_bg_area, p.n_bins, p.grayscale_sequence, bg_hist, fg_hist, p.learning_rate_pwp); - } - } - - // SCALE UPDATE - if (m_cfg.scale_adaptation) { - cv::Mat im_patch_scale; - - getScaleSubwindow(im, pos, im_patch_scale); - - cv::Mat xsf; - cv::dft(im_patch_scale, xsf, cv::DFT_ROWS); - - // new_sf_num = bsxfun(@times, ysf, conj(xsf)); - // new_sf_den = sum(xsf .* conj(xsf), 1); - - cv::Mat new_sf_num; - cv::Mat new_sf_den; - - int w = xsf.cols; - int h = xsf.rows; - - new_sf_num = cv::Mat(h, w, CV_32FC2); - - for (int j = 0; j < h; ++j) // xxx - { - float* pDst = new_sf_num.ptr(j); - - const float* pXSF = xsf.ptr(j); - const float* pYSF = ysf.ptr(0); - - for (int i = 0; i < w; ++i) - { - pDst[0] = (pYSF[1] * pXSF[1] + pYSF[0] * pXSF[0]); - pDst[1] = (pYSF[1] * pXSF[0] - pYSF[0] * pXSF[1]); - - pXSF += 2; - pYSF += 2; - pDst += 2; - } - } - - new_sf_den = cv::Mat(1, w, CV_32FC1, cv::Scalar(0, 0, 0)); - float* pDst = new_sf_den.ptr(0); - - for (int j = 0; j < h; ++j) - { - const float* pSrc = xsf.ptr(j); - - for (int i = 0; i < w; ++i) - { - pDst[i] += (pSrc[0] * pSrc[0] + pSrc[1] * pSrc[1]); - pSrc += 2; - } - } - - if (first) { - // first frame, train with a single image - new_sf_den.copyTo(sf_den); - new_sf_num.copyTo(sf_num); - } else { - sf_den = (1 - m_cfg.learning_rate_scale) * sf_den + m_cfg.learning_rate_scale * new_sf_den; - sf_num = (1 - m_cfg.learning_rate_scale) * sf_num + m_cfg.learning_rate_scale * new_sf_num; - } - } - - // update bbox position - if (first) { - rect_position.x = cvRound(pos.x - target_sz.width / 2); - rect_position.y = cvRound(pos.y - target_sz.height / 2); - rect_position.width = target_sz.width; - rect_position.height = target_sz.height; - } - - frameno += 1; -} - -/// -/// \brief ensure_real -/// xxx: improve later -/// \param complex -/// \return -/// -cv::Mat ensure_real(const cv::Mat &complex) -{ - int w = complex.cols; - int h = complex.rows; - - cv::Mat real = cv::Mat(h, w, CV_32FC1); - - for (int j = 0; j < h; ++j) - { - float* pDst = real.ptr(j); - const float* pSrc = complex.ptr(j); - - for (int i = 0; i < w; ++i) - { - *pDst = *pSrc; - ++pDst; - pSrc += 2; - } - } - return real; -} - -/// -/// \brief STAPLE_TRACKER::cropFilterResponse -/// \param response_cf -/// \param response_size -/// \param output -/// -void STAPLE_TRACKER::cropFilterResponse(const cv::Mat &response_cf, cv::Size response_size, cv::Mat& output) -{ - int w = response_cf.cols; - int h = response_cf.rows; - - // newh and neww must be odd, as we want an exact center - assert(((response_size.width % 2) == 1) && ((response_size.height % 2) == 1)); - - int half_width = response_size.width / 2; - int half_height = response_size.height / 2; - - cv::Range i_range(-half_width, response_size.width - (1 + half_width)); - cv::Range j_range(-half_height, response_size.height - (1 + half_height)); - - std::vector i_mod_range; - i_mod_range.reserve(i_range.end - i_range.start + 1); - std::vector j_mod_range; - i_mod_range.reserve(j_range.end - j_range.start + 1); - - for (int k = i_range.start; k <= i_range.end; k++) { - int val = (k - 1 + w) % w; - i_mod_range.push_back(val); - } - - for (int k = j_range.start; k <= j_range.end; k++) { - int val = (k - 1 + h) % h; - j_mod_range.push_back(val); - } - - cv::Mat tmp = cv::Mat(response_size.height, response_size.width, CV_32FC1, cv::Scalar(0, 0, 0)); - - for (int j = 0; j < response_size.height; j++) - { - int j_idx = j_mod_range[j]; - assert(j_idx < h); - - float* pDst = tmp.ptr(j); - const float* pSrc = response_cf.ptr(j_idx); - - for (int i = 0; i < response_size.width; i++) - { - int i_idx = i_mod_range[i]; - assert(i_idx < w); - - *pDst = pSrc[i_idx]; - ++pDst; - } - } - output = tmp; -} - -/// -/// \brief STAPLE_TRACKER::getColourMap -/// GETCOLOURMAP computes pixel-wise probabilities (PwP) given PATCH and models BG_HIST and FG_HIST -/// \param patch -/// \param output -/// -void STAPLE_TRACKER::getColourMap(const cv::Mat &patch, cv::Mat& output) -{ - // check whether the patch has 3 channels - int h = patch.rows; - int w = patch.cols; - int d = patch.channels(); - - // figure out which bin each pixel falls into - int bin_width = 256 / m_cfg.n_bins; - - // convert image to d channels array - //patch_array = reshape(double(patch), w*h, d); - - output = cv::Mat(h, w, CV_32FC1); - - if (!m_cfg.grayscale_sequence) - { - for (int j = 0; j < h; ++j) - { - const uchar* pSrc = patch.ptr(j); - float* pDst = output.ptr(j); - - for (int i = 0; i < w; ++i) - { - int b1 = pSrc[0] / bin_width; - int b2 = pSrc[1] / bin_width; - int b3 = pSrc[2] / bin_width; - - float* histd = (float*)bg_hist.data; - float probg = histd[b1 * m_cfg.n_bins * m_cfg.n_bins + b2 * m_cfg.n_bins + b3]; - - histd = (float*)fg_hist.data; - float profg = histd[b1 * m_cfg.n_bins * m_cfg.n_bins + b2 * m_cfg.n_bins + b3]; - - // xxx - *pDst = profg / (profg + probg); - if (std::isnan(*pDst)) - *pDst = 0.0; - - pSrc += d; - ++pDst; - - // (TODO) in theory it should be at 0.5 (unseen colors shoud have max entropy) - //likelihood_map(isnan(likelihood_map)) = 0; - } - } - } - else - { - for (int j = 0; j < h; j++) - { - const uchar* pSrc = patch.ptr(j); - float* pDst = output.ptr(j); - - for (int i = 0; i < w; i++) - { - int b = *pSrc; - - float* histd = (float*)bg_hist.data; - float probg = histd[b]; - - histd = (float*)fg_hist.data; - float profg = histd[b]; - - // xxx - *pDst = profg / (profg + probg); - if (std::isnan(*pDst)) - *pDst = 0.0; - - pSrc += d; - ++pDst; - - // (TODO) in theory it should be at 0.5 (unseen colors shoud have max entropy) - //likelihood_map(isnan(likelihood_map)) = 0; - } - } - } - - // to which bin each pixel (for all d channels) belongs to - //bin_indices = floor(patch_array/bin_width) + 1; - - // Get pixel-wise posteriors (PwP) - // P_bg = getP(bg_hist, h, w, bin_indices, grayscale_sequence); - // P_fg = getP(fg_hist, h, w, bin_indices, grayscale_sequence); - - // Object-likelihood map - //P_O = P_fg ./ (P_fg + P_bg); -} - -/// -/// \brief STAPLE_TRACKER::getCenterLikelihood -/// GETCENTERLIKELIHOOD computes the sum over rectangles of size M. -/// \param object_likelihood -/// \param m -/// \param center_likelihood -/// -void STAPLE_TRACKER::getCenterLikelihood(const cv::Mat &object_likelihood, cv::Size m, cv::Mat& center_likelihood) -{ - // CENTER_LIKELIHOOD is the 'colour response' - int h = object_likelihood.rows; - int w = object_likelihood.cols; - int n1 = w - m.width + 1; - int n2 = h - m.height + 1; - float invArea = 1.f / (m.width * m.height); - - cv::Mat temp; - - // integral images - cv::integral(object_likelihood, temp); - - center_likelihood = cv::Mat(n2, n1, CV_32FC1); - - for (int j = 0; j < n2; ++j) - { - float* pLike = reinterpret_cast(center_likelihood.ptr(j)); - - for (int i = 0; i < n1; ++i) - { - *pLike = invArea * static_cast(temp.at(j, i) + temp.at(j+m.height, i+m.width) - temp.at(j, i+m.width) - temp.at(j+m.height, i)); - ++pLike; - } - } - - // SAT = integralImage(object_likelihood); - // i = 1:n1; - // j = 1:n2; - // center_likelihood = (SAT(i,j) + SAT(i+m(1), j+m(2)) - SAT(i+m(1), j) - SAT(i, j+m(2))) / prod(m); -} - -/// -/// \brief STAPLE_TRACKER::mergeResponses -/// \param response_cf -/// \param response_pwp -/// \param response -/// -void STAPLE_TRACKER::mergeResponses(const cv::Mat &response_cf, const cv::Mat &response_pwp, cv::Mat &response) -{ - auto alpha = m_cfg.merge_factor; - //const char *merge_method = cfg.merge_method; - - // MERGERESPONSES interpolates the two responses with the hyperparameter ALPHA - response = (1 - alpha) * response_cf + alpha * response_pwp; - - // response = (1 - alpha) * response_cf + alpha * response_pwp; -} - -/// -/// \brief STAPLE_TRACKER::tracker_staple_update -/// TESTING step -/// \param im -/// \param confidence -/// \return -/// -cv::RotatedRect STAPLE_TRACKER::Update(const cv::Mat &im, float& confidence) -{ - confidence = 0; - - // extract patch of size bg_area and resize to norm_bg_area - cv::Mat im_patch_cf; - getSubwindow(im, pos, norm_bg_area, bg_area, im_patch_cf); - - cv::Size pwp_search_area; - - pwp_search_area.width = static_cast(norm_pwp_search_area.width / area_resize_factor); - pwp_search_area.height = static_cast(norm_pwp_search_area.height / area_resize_factor); - - // extract patch of size pwp_search_area and resize to norm_pwp_search_area - getSubwindow(im, pos, norm_pwp_search_area, pwp_search_area, im_patch_pwp); - - // compute feature map - cv::MatND xt_windowed; - getFeatureMap(im_patch_cf, m_cfg.feature_type, xt_windowed); - - // apply Hann window in getFeatureMap - - // compute FFT - // cv::MatND xtf; - std::vector xtsplit; - std::vector xtf; // xtf is splits of xtf - - matsplit(xt_windowed, xtsplit); - - for (int i = 0; i < xt_windowed.channels(); i++) { - cv::Mat dimf; - cv::dft(xtsplit[i], dimf); - xtf.push_back(dimf); - } - - const int w = xt_windowed.cols; - const int h = xt_windowed.rows; - std::vector hf(xt_windowed.channels(), cv::Mat(h, w, CV_32FC2)); - - // Correlation between filter and test patch gives the response - // Solve diagonal system per pixel. - if (m_cfg.den_per_channel) - { - for (int ch = 0; ch < xt_windowed.channels(); ++ch) - { - for (int j = 0; j < h; ++j) - { - const cv::Vec2f* pSrc = hf_num[ch].ptr(j); - const float* pDen = hf_den[ch].ptr(j); - cv::Vec2f* pDst = hf[ch].ptr(j); - - for (int i = 0; i < w; ++i) - { - pDst[i] = pSrc[i] / (pDen[i] + m_cfg.lambda); - } - } - } - } - else - { - //hf = bsxfun(@rdivide, hf_num, sum(hf_den, 3)+p.lambda); - - std::vector DIM1(static_cast(w) * static_cast(h), m_cfg.lambda); - - for (int ch = 0; ch < xt_windowed.channels(); ++ch) - { - float* pDim1 = &DIM1[0]; - for (int j = 0; j < h; ++j) - { - const float* pDen = hf_den[ch].ptr(j); - for (int i = 0; i < w; ++i) - { - *pDim1 += pDen[i]; - ++pDim1; - } - } - } - - for (int ch = 0; ch < xt_windowed.channels(); ++ch) - { - const float* pDim1 = &DIM1[0]; - for (int j = 0; j < h; ++j) - { - const cv::Vec2f* pSrc = hf_num[ch].ptr(j); - cv::Vec2f* pDst = hf[ch].ptr(j); - - for (int i = 0; i < w; ++i) - { - *pDst = *pSrc / *pDim1; - ++pDim1; - ++pDst; - ++pSrc; - } - } - } - } - - cv::Mat response_cff = cv::Mat(h, w, CV_32FC2); - - for (int j = 0; j < h; j++) - { - cv::Vec2f* pDst = response_cff.ptr(j); - - for (int i = 0; i < w; i++) - { - float sum = 0.0; - float sumi = 0.0; - - for (size_t ch = 0; ch < hf.size(); ch++) - { - cv::Vec2f pHF = hf[ch].at(j,i); - cv::Vec2f pXTF = xtf[ch].at(j,i); - - sum += (pHF[0] * pXTF[0] + pHF[1] * pXTF[1]); - sumi += (pHF[0] * pXTF[1] - pHF[1] * pXTF[0]); - // assert(norm(imag(x(:))) <= 1e-5 * norm(real(x(:)))); - } - - *pDst = cv::Vec2f(sum, sumi); - ++pDst; - } - } - - cv::Mat response_cfi; - cv::dft(response_cff, response_cfi, cv::DFT_SCALE|cv::DFT_INVERSE); - cv::Mat response_cf = ensure_real(response_cfi); - - // response_cf = ensure_real(ifft2(sum(conj(hf) .* xtf, 3))); - - // Crop square search region (in feature pixels). - cv::Size newsz = norm_delta_area; - newsz.width = (newsz.width / m_cfg.hog_cell_size); - newsz.height = (newsz.height / m_cfg.hog_cell_size); - - if (newsz.width % 2 == 0) - newsz.width -= 1; - if (newsz.height % 2 == 0) - newsz.height -= 1; - - cropFilterResponse(response_cf, newsz, response_cf); - - if (m_cfg.hog_cell_size > 1) - { - cv::Mat temp; - mexResize(response_cf, temp, norm_delta_area, "auto"); - response_cf = temp; // xxx: low performance - } - - cv::Mat likelihood_map; - getColourMap(im_patch_pwp, likelihood_map); - //[likelihood_map] = getColourMap(im_patch_pwp, bg_hist, fg_hist, p.n_bins, p.grayscale_sequence); - - // each pixel of response_pwp loosely represents the likelihood that - // the target (of size norm_target_sz) is centred on it - cv::Mat response_pwp; - getCenterLikelihood(likelihood_map, norm_target_sz, response_pwp); - - // ESTIMATION - cv::Mat response; - mergeResponses(response_cf, response_pwp, response); - - double maxVal = 0; - cv::Point maxLoc; - - cv::minMaxLoc(response, nullptr, &maxVal, nullptr, &maxLoc); - //[row, col] = find(response == max(response(:)), 1); - - //std::cout << "maxLoc = " << maxLoc << ", maxVal = " << maxVal << std::endl; - confidence = static_cast(maxVal); - - float centerx = static_cast((1 + norm_delta_area.width) / 2 - 1); - float centery = static_cast((1 + norm_delta_area.height) / 2 - 1); - - pos.x += (maxLoc.x - centerx) / area_resize_factor; - pos.y += (maxLoc.y - centery) / area_resize_factor; - - // Report current location - cv::Rect_ location; - - location.x = pos.x - target_sz.width / 2.0f; - location.y = pos.y - target_sz.height / 2.0f; - location.width = static_cast(target_sz.width); - location.height = static_cast(target_sz.height); - - //std::cout << location << std::endl; - - // center = (1+p.norm_delta_area) / 2; - // pos = pos + ([row, col] - center) / area_resize_factor; - // rect_position = [pos([2,1]) - target_sz([2,1])/2, target_sz([2,1])]; - - // SCALE SPACE SEARCH - if (m_cfg.scale_adaptation) - { - cv::Mat im_patch_scale; - - getScaleSubwindow(im, pos, im_patch_scale); - - cv::Mat xsf; - cv::dft(im_patch_scale, xsf, cv::DFT_ROWS); - - // im_patch_scale = getScaleSubwindow(im, pos, base_target_sz, scale_factor * scale_factors, scale_window, scale_model_sz, p.hog_scale_cell_size); - // xsf = fft(im_patch_scale,[],2); - - const int cols = xsf.cols; - const int rows = xsf.rows; - - cv::Mat scale_responsef = cv::Mat(1, cols, CV_32FC2, cv::Scalar(0, 0, 0)); - - for (int j = 0; j < rows; ++j) - { - const float* pXSF = xsf.ptr(j); - const float* pXSFNUM = sf_num.ptr(j); - const float* pDen = sf_den.ptr(0); - float* pscale = scale_responsef.ptr(0); - - for (int i = 0; i < cols; ++i) - { - float invDen = 1.f / (*pDen + m_cfg.lambda); - - pscale[0] += invDen * (pXSFNUM[0]*pXSF[0] - pXSFNUM[1]*pXSF[1]); - pscale[1] += invDen * (pXSFNUM[0]*pXSF[1] + pXSFNUM[1]*pXSF[0]); - - pscale += 2; - pXSF += 2; - pXSFNUM += 2; - ++pDen; - } - } - - cv::Mat scale_response; - cv::dft(scale_responsef, scale_response, cv::DFT_SCALE|cv::DFT_INVERSE|cv::DFT_REAL_OUTPUT); - - //scale_response = real(ifft(sum(sf_num .* xsf, 1) ./ (sf_den + p.lambda) )); - cv::minMaxLoc(scale_response, nullptr, &maxVal, nullptr, &maxLoc); - - //recovered_scale = ind2sub(size(scale_response),find(scale_response == max(scale_response(:)), 1)); - - int recovered_scale = maxLoc.x; - - // set the scale - scale_factor = scale_factor * scale_factors.at(recovered_scale); - - if (scale_factor < min_scale_factor) { - scale_factor = min_scale_factor; - } else if (scale_factor > max_scale_factor) { - scale_factor = max_scale_factor; - } - - // use new scale to update bboxes for target, filter, bg and fg models - target_sz.width = cvRound(base_target_sz.width * scale_factor); - target_sz.height = cvRound(base_target_sz.height * scale_factor); - - float avg_dim = (target_sz.width + target_sz.height) / 2.0f; - - bg_area.width = std::min(im.cols - 1, cvRound(target_sz.width + avg_dim)); - bg_area.height = std::min(im.rows - 1, cvRound(target_sz.height + avg_dim)); - - bg_area.width = bg_area.width - (bg_area.width - target_sz.width) % 2; - bg_area.height = bg_area.height - (bg_area.height - target_sz.height) % 2; - - fg_area.width = cvRound(target_sz.width - avg_dim * m_cfg.inner_padding); - fg_area.height = cvRound(target_sz.height - avg_dim * m_cfg.inner_padding); - - fg_area.width = fg_area.width + int(bg_area.width - fg_area.width) % 2; - fg_area.height = fg_area.height + int(bg_area.height - fg_area.height) % 2; - - // Compute the rectangle with (or close to) params.fixed_area and - // same aspect ratio as the target bboxgetScaleSubwindow - area_resize_factor = sqrt(m_cfg.fixed_area / (float)(bg_area.width * bg_area.height)); - } - - return cv::RotatedRect(cv::Point2f(location.x + 0.5f * location.width, location.y + 0.5f * location.height), - cv::Size2f(location.width, location.height), 0.f); -} diff --git a/src/Tracker/staple/staple_tracker.hpp b/src/Tracker/staple/staple_tracker.hpp deleted file mode 100644 index 00382b0ac..000000000 --- a/src/Tracker/staple/staple_tracker.hpp +++ /dev/null @@ -1,126 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "../VOTTracker.hpp" - -/// -/// \brief The staple_cfg struct -/// -struct staple_cfg -{ - bool grayscale_sequence = false; // suppose that sequence is colour - int hog_cell_size = 4; - int fixed_area = 150*150; // standard area to which we resize the target - int n_bins = 2*2*2*2*2; // number of bins for the color histograms (bg and fg models) - float learning_rate_pwp = 0.04f; // bg and fg color models learning rate - const char * feature_type = "fhog"; // "fhog", ""gray"" - float inner_padding = 0.2f; // defines inner area used to sample colors from the foreground - float output_sigma_factor = 1/16.0f; // standard deviation for the desired translation filter output - float lambda = 1e-3f; // egularization weight - float learning_rate_cf = 0.01f; // HOG model learning rate - float merge_factor = 0.3f; // fixed interpolation factor - how to linearly combine the two responses - const char * merge_method = "const_factor"; - bool den_per_channel = false; - - // scale related - bool scale_adaptation = true; - int hog_scale_cell_size = 4; // Default DSST=4 - float learning_rate_scale = 0.025f; - float scale_sigma_factor = 1/4.0f; - int num_scales = 33; - float scale_model_factor = 1.0f; - float scale_step = 1.02f; - float scale_model_max_area = 32*16; - - // debugging stuff - int visualization = 0; // show output bbox on frame - int visualization_dbg = 0; // show also per-pixel scores, desired response and filter output - - cv::Point_ init_pos; - cv::Size target_sz; -}; - -/// -/// \brief The STAPLE_TRACKER class -/// -class STAPLE_TRACKER : public VOTTracker -{ -public: - STAPLE_TRACKER(); - ~STAPLE_TRACKER(); - - void Initialize(const cv::Mat &im, cv::Rect region); - cv::RotatedRect Update(const cv::Mat &im, float& confidence); - void Train(const cv::Mat &im, bool first); - -protected: - staple_cfg default_parameters_staple(); - void initializeAllAreas(const cv::Mat &im); - - void getSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output); - void getSubwindowFloor(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output); - void updateHistModel(bool new_model, cv::Mat &patch, float learning_rate_pwp=0.0f); - void CalculateHann(cv::Size sz, cv::Mat &output); - void gaussianResponse(cv::Size rect_size, float sigma, cv::Mat &output); - void getFeatureMap(cv::Mat &im_patch, const char *feature_type, cv::MatND &output); - void cropFilterResponse(const cv::Mat &response_cf, cv::Size response_size, cv::Mat& output); - void getColourMap(const cv::Mat &patch, cv::Mat& output); - void getCenterLikelihood(const cv::Mat &object_likelihood, cv::Size m, cv::Mat& center_likelihood); - void mergeResponses(const cv::Mat &response_cf, const cv::Mat &response_pwp, cv::Mat &response); - void getScaleSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Mat &output); - - void mexResize(const cv::Mat &im, cv::Mat &output, cv::Size newsz, const char *method); - -private: - staple_cfg m_cfg; - - cv::Point_ pos; - cv::Size target_sz; - - cv::Size bg_area; - cv::Size fg_area; - float area_resize_factor; - cv::Size cf_response_size; - - cv::Size norm_bg_area; - cv::Size norm_target_sz; - cv::Size norm_delta_area; - cv::Size norm_pwp_search_area; - - cv::Mat im_patch_pwp; - - cv::MatND bg_hist; - cv::MatND fg_hist; - - cv::Mat hann_window; - cv::Mat yf; - - std::vector hf_den; - std::vector hf_num; - - cv::Rect rect_position; - - float scale_factor; - cv::Mat scale_window; - cv::Mat scale_factors; - cv::Size scale_model_sz; - float min_scale_factor; - float max_scale_factor; - cv::Size base_target_sz; - - cv::Mat ysf; - cv::Mat sf_den; - cv::Mat sf_num; - - int frameno = 0; -}; diff --git a/src/Tracker/track.cpp b/src/Tracker/track.cpp index 0b455900a..fa5c167e3 100644 --- a/src/Tracker/track.cpp +++ b/src/Tracker/track.cpp @@ -1,10 +1,7 @@ #include "track.h" -#include "dat/dat_tracker.hpp" -#ifdef USE_STAPLE_TRACKER -#include "staple/staple_tracker.hpp" -#include "ldes/ldes_tracker.h" -#endif +#include "Circular_Code/CircVal.h" +#include "Circular_Code/CircStat.h" /// /// \brief CTrack @@ -21,25 +18,40 @@ CTrack::CTrack(const CRegion& region, track_t deltaTime, track_t accelNoiseMag, bool useAcceleration, - size_t trackID, - bool filterObjectSize, - tracking::LostTrackType externalTrackerForLost) + track_id_t trackID, + tracking::FilterGoal filterGoal, + tracking::LostTrackType externalTrackerForLost, + time_point_t currTime) : m_kalman(kalmanType, useAcceleration, deltaTime, accelNoiseMag), m_lastRegion(region), m_predictionRect(region.m_rrect), m_predictionPoint(region.m_rrect.center), m_trackID(trackID), + m_lastDetectionTime(currTime), + m_currType(region.m_type), + m_lastType(region.m_type), m_externalTrackerForLost(externalTrackerForLost), - m_filterObjectSize(filterObjectSize) + m_filterGoal(filterGoal) { - if (filterObjectSize) - m_kalman.Update(region.m_brect, true); - else - m_kalman.Update(m_predictionPoint, true); + //std::cout << "CTrack::CTrack1: m_predictionRect: " << m_predictionRect.center << ", " << m_predictionRect.angle << ", " << m_predictionRect.size << std::endl; + + switch (filterGoal) + { + case tracking::FilterGoal::FilterCenter: + m_kalman.Update(m_predictionPoint, true); + break; + case tracking::FilterGoal::FilterRect: + m_kalman.Update(region.m_brect, true); + break; + case tracking::FilterGoal::FilterRRect: + m_kalman.Update(region.m_rrect, true); + break; + }; Point_t pt(m_predictionPoint.x, m_predictionPoint.y + region.m_brect.height / 2); - m_trace.push_back(pt, pt); + m_trace.push_back(pt, pt, currTime); + ResetLostTime(currTime); } /// @@ -60,25 +72,38 @@ CTrack::CTrack(const CRegion& region, track_t deltaTime, track_t accelNoiseMag, bool useAcceleration, - size_t trackID, - bool filterObjectSize, - tracking::LostTrackType externalTrackerForLost) + track_id_t trackID, + tracking::FilterGoal filterGoal, + tracking::LostTrackType externalTrackerForLost, + time_point_t currTime) : m_kalman(kalmanType, useAcceleration, deltaTime, accelNoiseMag), m_lastRegion(region), m_predictionRect(region.m_rrect), m_predictionPoint(region.m_rrect.center), m_trackID(trackID), + m_lastDetectionTime(currTime), + m_currType(region.m_type), + m_lastType(region.m_type), m_externalTrackerForLost(externalTrackerForLost), m_regionEmbedding(regionEmbedding), - m_filterObjectSize(filterObjectSize) + m_filterGoal(filterGoal) { - if (filterObjectSize) - m_kalman.Update(region.m_brect, true); - else - m_kalman.Update(m_predictionPoint, true); + //std::cout << "CTrack::CTrack2: m_predictionRect: " << m_predictionRect.center << ", " << m_predictionRect.angle << ", " << m_predictionRect.size << std::endl; - m_trace.push_back(m_predictionPoint, m_predictionPoint); + switch (filterGoal) + { + case tracking::FilterGoal::FilterCenter: + m_kalman.Update(m_predictionPoint, true); + break; + case tracking::FilterGoal::FilterRect: + m_kalman.Update(region.m_brect, true); + break; + case tracking::FilterGoal::FilterRRect: + m_kalman.Update(region.m_rrect, true); + break; + }; + m_trace.push_back(m_predictionPoint, m_predictionPoint, currTime); } /// @@ -89,7 +114,10 @@ CTrack::CTrack(const CRegion& region, track_t CTrack::CalcDistCenter(const CRegion& reg) const { Point_t diff = m_predictionPoint - reg.m_rrect.center; - return sqrtf(sqr(diff.x) + sqr(diff.y)); + if constexpr (sizeof(track_t) == sizeof(float)) + return sqrtf(sqr(diff.x) + sqr(diff.y)); + else + return sqrt(sqr(diff.x) + sqr(diff.y)); } /// @@ -111,7 +139,10 @@ track_t CTrack::CalcDistRect(const CRegion& reg) const { dist += sqr(diff[i]); } - return sqrtf(dist); + if constexpr (sizeof(track_t) == sizeof(float)) + return sqrtf(dist); + else + return sqrt(dist); } /// @@ -122,39 +153,48 @@ track_t CTrack::CalcDistRect(const CRegion& reg) const track_t CTrack::CalcDistJaccard(const CRegion& reg) const { track_t intArea = static_cast((reg.m_brect & m_lastRegion.m_brect).area()); - track_t unionArea = static_cast(reg.m_brect.area() + m_lastRegion.m_brect.area() - intArea); + track_t unionArea = static_cast(reg.m_brect.area() + m_lastRegion.m_brect.area() - intArea + 1e-6); - return 1 - intArea / unionArea; + return std::fabs(1 - intArea / unionArea); } /// -/// \brief CTrack::CalcDistHist +/// \brief CTrack::CalcMahalanobisDist /// \param reg /// \return /// -track_t CTrack::CalcDistHist(const CRegion& reg, RegionEmbedding& embedding, cv::UMat currFrame) const +track_t CTrack::CalcMahalanobisDist(const cv::RotatedRect& rrect) const { - track_t res = 1; + cv::Mat res1, predictPoint; + // res1 = Hn * Pn+1|n+1 * Hn^T + Rn+1 error covariance + // res2 = Hn * Xn+1|n + m_kalman.GetPtStateAndResCov(res1, predictPoint); - if (embedding.m_hist.empty()) - { - int bins = 64; - std::vector histSize; - std::vector ranges; - std::vector channels; + double mahaDist = 0.0; + if (!res1.empty() && !predictPoint.empty()) + { + cv::Mat icovar_Pn; + cv::invert(res1, icovar_Pn, cv::DECOMP_SVD); + cv::Mat measurePoint; + if (predictPoint.rows == 2) // PointUpdate + measurePoint = (cv::Mat_(2, 1) << rrect.center.x, rrect.center.y); // detection + else + measurePoint = (cv::Mat_(4, 1) << rrect.center.x, rrect.center.y, rrect.size.width, rrect.size.height); // predict + mahaDist = cv::Mahalanobis(measurePoint, predictPoint, icovar_Pn); + mahaDist += std::log(cv::determinant(res1)); + } + return static_cast(mahaDist); +} - for (int i = 0, stop = currFrame.channels(); i < stop; ++i) - { - histSize.push_back(bins); - ranges.push_back(0); - ranges.push_back(255); - channels.push_back(i); - } +/// +/// \brief CTrack::CalcDistHist +/// \param embedding +/// \return +/// +track_t CTrack::CalcDistHist(const RegionEmbedding& embedding) const +{ + track_t res = 1; - std::vector regROI = { currFrame(reg.m_brect) }; - cv::calcHist(regROI, channels, cv::Mat(), embedding.m_hist, histSize, ranges, false); - cv::normalize(embedding.m_hist, embedding.m_hist, 0, 1, cv::NORM_MINMAX, -1, cv::Mat()); - } if (!embedding.m_hist.empty() && !m_regionEmbedding.m_hist.empty()) { #if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR < 1)) || (CV_VERSION_MAJOR == 3)) @@ -164,6 +204,12 @@ track_t CTrack::CalcDistHist(const CRegion& reg, RegionEmbedding& embedding, cv: res = static_cast(cv::compareHist(embedding.m_hist, m_regionEmbedding.m_hist, cv::HISTCMP_BHATTACHARYYA)); #endif } + else + { + assert(0); + CV_Assert(!embedding.m_hist.empty()); + CV_Assert(!m_regionEmbedding.m_hist.empty()); + } return res; } @@ -172,55 +218,107 @@ track_t CTrack::CalcDistHist(const CRegion& reg, RegionEmbedding& embedding, cv: /// \param embedding /// \return /// -track_t CTrack::CalcCosine(RegionEmbedding& embedding, cv::UMat currFrame) const +std::pair CTrack::CalcCosine(const RegionEmbedding& embedding) const { track_t res = 1; if (!embedding.m_embedding.empty() && !m_regionEmbedding.m_embedding.empty()) { - double xy = embedding.m_embedding.dot(m_regionEmbedding.m_embedding); - double norm = sqrt(embedding.m_embDot * m_regionEmbedding.m_embDot) + 1e-6; - res = 0.5f * static_cast(1.0 - xy / norm); + cv::Mat mul = embedding.m_embedding * m_regionEmbedding.m_embedding.t(); + res = static_cast(1.f - mul.at(0, 0)); + if (res < 0) + res = 0; + //std::cout << "CTrack::CalcCosine: " << embedding.m_embedding.size() << " - " << m_regionEmbedding.m_embedding.size() << " = " << res << std::endl; + return { res, true }; } - return res; + else + { + //assert(0); + //CV_Assert(!embedding.m_embedding.empty()); + //CV_Assert(!m_regionEmbedding.m_embedding.empty()); + return { (track_t)0, false }; + } } /// /// \brief CTrack::Update /// \param region /// \param dataCorrect -/// \param max_trace_length +/// \param maxTraceLength /// \param prevFrame /// \param currFrame /// \param trajLen /// void CTrack::Update(const CRegion& region, bool dataCorrect, - size_t max_trace_length, + double maxTraceLength, cv::UMat prevFrame, cv::UMat currFrame, - int trajLen, int maxSpeedForStatic) + int trajLen, int maxSpeedForStatic, + time_point_t currTime) { - if (m_filterObjectSize) // Kalman filter for object coordinates and size - RectUpdate(region, dataCorrect, prevFrame, currFrame); - else // Kalman filter only for object center - PointUpdate(region.m_rrect.center, region.m_rrect.size, dataCorrect, currFrame.size()); + //std::cout << "CTrack::Update: dataCorrect = " << dataCorrect << ", m_predictionRect: " << m_predictionRect.center << ", " << m_predictionRect.angle << ", " << m_predictionRect.size << std::endl; + + if (dataCorrect) + { + if (region.m_type == m_currType) + { + m_anotherTypeCounter = 0; + m_lastType = region.m_type; + } + else + { + if (region.m_type == m_lastType) + { + ++m_anotherTypeCounter; + if (m_anotherTypeCounter > m_changeTypeThreshold) + { + m_currType = region.m_type; + m_anotherTypeCounter = 0; + } + } + else + { + m_lastType = region.m_type; + m_anotherTypeCounter = 0; + } + } + } + + switch (m_filterGoal) + { + case tracking::FilterGoal::FilterCenter: + PointUpdate(region.m_rrect.center, region.m_rrect.size, region.m_rrect.angle, dataCorrect, currFrame.size()); + break; + case tracking::FilterGoal::FilterRect: + RectUpdate(region, dataCorrect, prevFrame, currFrame); + break; + case tracking::FilterGoal::FilterRRect: + RectUpdate(region, dataCorrect, prevFrame, currFrame); + break; + }; if (dataCorrect) { //std::cout << m_lastRegion.m_brect << " - " << region.m_brect << std::endl; m_lastRegion = region; - m_trace.push_back(m_predictionPoint, region.m_rrect.center); + m_trace.push_back(m_predictionPoint, region.m_rrect.center, currTime); - CheckStatic(trajLen, currFrame, region, maxSpeedForStatic); + CheckStatic(trajLen, currFrame, region, maxSpeedForStatic, currTime); } else { - m_trace.push_back(m_predictionPoint); + m_trace.push_back(m_predictionPoint, currTime); } - if (m_trace.size() > max_trace_length) - m_trace.pop_front(m_trace.size() - max_trace_length); + for (;;) + { + std::chrono::duration period = currTime - m_trace.at(0).m_frameTime; + if (period.count() > maxTraceLength) + m_trace.pop_front(1); + else + break; + } } /// @@ -228,7 +326,7 @@ void CTrack::Update(const CRegion& region, /// \param region /// \param regionEmbedding /// \param dataCorrect -/// \param max_trace_length +/// \param maxTraceLength /// \param prevFrame /// \param currFrame /// \param trajLen @@ -236,34 +334,49 @@ void CTrack::Update(const CRegion& region, void CTrack::Update(const CRegion& region, const RegionEmbedding& regionEmbedding, bool dataCorrect, - size_t max_trace_length, + double maxTraceLength, cv::UMat prevFrame, cv::UMat currFrame, - int trajLen, int maxSpeedForStatic) + int trajLen, int maxSpeedForStatic, + time_point_t currTime) { m_regionEmbedding = regionEmbedding; - if (m_filterObjectSize) // Kalman filter for object coordinates and size - RectUpdate(region, dataCorrect, prevFrame, currFrame); - else // Kalman filter only for object center - PointUpdate(region.m_rrect.center, region.m_rrect.size, dataCorrect, currFrame.size()); + switch (m_filterGoal) + { + case tracking::FilterGoal::FilterCenter: + PointUpdate(region.m_rrect.center, region.m_rrect.size, region.m_rrect.angle, dataCorrect, currFrame.size()); + break; + case tracking::FilterGoal::FilterRect: + RectUpdate(region, dataCorrect, prevFrame, currFrame); + break; + case tracking::FilterGoal::FilterRRect: + RectUpdate(region, dataCorrect, prevFrame, currFrame); + break; + }; if (dataCorrect) { //std::cout << m_lastRegion.m_brect << " - " << region.m_brect << std::endl; m_lastRegion = region; - m_trace.push_back(m_predictionPoint, m_lastRegion.m_rrect.center); + m_trace.push_back(m_predictionPoint, m_lastRegion.m_rrect.center, currTime); - CheckStatic(trajLen, currFrame, region, maxSpeedForStatic); + CheckStatic(trajLen, currFrame, region, maxSpeedForStatic, currTime); } else { - m_trace.push_back(m_predictionPoint); + m_trace.push_back(m_predictionPoint, currTime); } - if (m_trace.size() > max_trace_length) - m_trace.pop_front(m_trace.size() - max_trace_length); + for (;;) + { + std::chrono::duration period = currTime - m_trace.at(0).m_frameTime; + if (period.count() > maxTraceLength) + m_trace.pop_front(1); + else + break; + } } /// @@ -280,9 +393,17 @@ bool CTrack::IsStatic() const /// \param framesTime /// \return /// -bool CTrack::IsStaticTimeout(int framesTime) const +bool CTrack::IsStaticTimeout(time_point_t currTime, double staticPeriod) const { - return (m_staticFrames > framesTime); + if (m_isStatic) + { + std::chrono::duration period = currTime - m_staticStartTime; + return period.count() > staticPeriod; + } + else + { + return false; + } } /// @@ -372,19 +493,32 @@ track_t CTrack::HeightDist(const CRegion& reg) const /// \param trajLen /// \return /// -bool CTrack::CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, int maxSpeedForStatic) +bool CTrack::CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, int maxSpeedForStatic, time_point_t currTime) { if (!trajLen || static_cast(m_trace.size()) < trajLen) { m_isStatic = false; - m_staticFrames = 0; + m_staticStartTime = currTime; m_staticFrame = cv::UMat(); } else { auto velocity = m_kalman.GetVelocity(); track_t speed = sqrt(sqr(velocity[0]) + sqr(velocity[1])); - if (speed < maxSpeedForStatic) + + bool inCenter = true; + cv::Rect centerROI(cvRound(m_trace[m_trace.size() - trajLen].x) - region.m_brect.width / 2, + cvRound(m_trace[m_trace.size() - trajLen].y) - region.m_brect.height / 2, region.m_brect.width, region.m_brect.height); + for (size_t i = m_trace.size() - trajLen; i < m_trace.size() - 1; ++i) + { + if (!centerROI.contains(m_trace[i])) + { + inCenter = false; + break; + } + } + + if (inCenter/*speed < maxSpeedForStatic*/) { if (!m_isStatic) { @@ -413,13 +547,12 @@ bool CTrack::CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, #endif } - ++m_staticFrames; m_isStatic = true; } else { m_isStatic = false; - m_staticFrames = 0; + m_staticStartTime = currTime; m_staticFrame = cv::UMat(); } } @@ -432,7 +565,7 @@ bool CTrack::CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, /// cv::RotatedRect CTrack::GetLastRect() const { - if (m_filterObjectSize) + if (m_filterGoal != tracking::FilterGoal::FilterCenter) return m_predictionRect; else return cv::RotatedRect(cv::Point2f(m_predictionPoint.x, m_predictionPoint.y), m_predictionRect.size, m_predictionRect.angle); @@ -447,32 +580,79 @@ const CRegion& CTrack::LastRegion() const return m_lastRegion; } +/// +/// \brief CTrack::GetCurrType +/// \return +/// +objtype_t CTrack::GetCurrType() const +{ + return m_currType; +} + /// /// \brief CTrack::ConstructObject /// \return /// -TrackingObject CTrack::ConstructObject() const +TrackingObject CTrack::ConstructObject(time_point_t frameTime) const +{ + std::chrono::duration period = frameTime - m_staticStartTime; + return TrackingObject(GetLastRect(), m_trackID, m_trace, IsStatic(), cvRound(period.count()), IsOutOfTheFrame(), + m_currType, m_lastRegion.m_confidence, m_kalman.GetVelocity()); +} + +/// +/// \brief CTrack::GetID +/// \return +/// +track_id_t CTrack::GetID() const { - return TrackingObject(GetLastRect(), m_trackID, m_trace, IsStatic(), IsOutOfTheFrame(), - m_lastRegion.m_type, m_lastRegion.m_confidence, m_kalman.GetVelocity()); + return m_trackID; } /// -/// \brief CTrack::SkippedFrames +/// \brief CTrack::GetLostPeriod /// \return /// -size_t CTrack::SkippedFrames() const +double CTrack::GetLostPeriod(time_point_t currTime) const { - return m_skippedFrames; + std::chrono::duration period = currTime - m_lastDetectionTime; + return period.count(); } /// -/// \brief CTrack::SkippedFrames +/// \brief CTrack::ResetLostTime /// \return /// -size_t& CTrack::SkippedFrames() +void CTrack::ResetLostTime(time_point_t currTime) { - return m_skippedFrames; + m_lastDetectionTime = currTime; +} + +/// +/// \brief CTrack::GetFilterGoal +/// \return +/// +tracking::FilterGoal CTrack::GetFilterGoal() const +{ + return m_filterGoal; +} + +/// +/// \brief CTrack::KalmanPredictRect +/// \return +/// +void CTrack::KalmanPredictRect() +{ + m_kalman.GetRectPrediction(); +} + +/// +/// \brief CTrack::KalmanPredictPoint +/// \return +/// +void CTrack::KalmanPredictPoint() +{ + m_kalman.GetPointPrediction(); } /// @@ -487,9 +667,8 @@ void CTrack::RectUpdate(const CRegion& region, cv::UMat prevFrame, cv::UMat currFrame) { - m_kalman.GetRectPrediction(); - - bool recalcPrediction = true; + bool wasTracked = false; + cv::RotatedRect trackedRRect; auto Clamp = [](int& v, int& size, int hi) -> int { @@ -527,207 +706,211 @@ void CTrack::RectUpdate(const CRegion& region, m_predictionRect.size.height *= newRect.height / static_cast(prevRect.height); }; - switch (m_externalTrackerForLost) + auto InitTracker = [&](cv::Rect& roiRect, bool reinit) { - case tracking::TrackNone: - break; - - case tracking::TrackKCF: - case tracking::TrackMIL: - case tracking::TrackMedianFlow: - case tracking::TrackGOTURN: - case tracking::TrackMOSSE: - case tracking::TrackCSRT: -#ifdef USE_OCV_KCF - if (!dataCorrect) + bool inited = false; + cv::Rect brect = dataCorrect ? region.m_brect : m_predictionRect.boundingRect(); + roiRect.x = 0; + roiRect.y = 0; + roiRect.width = currFrame.cols; + roiRect.height = currFrame.rows; + + switch (m_externalTrackerForLost) { - cv::Rect brect = m_predictionRect.boundingRect(); - - cv::Size roiSize(std::max(3 * brect.width, currFrame.cols / 4), std::max(3 * brect.height, currFrame.rows / 4)); - if (roiSize.width > currFrame.cols) - roiSize.width = currFrame.cols; - - if (roiSize.height > currFrame.rows) - roiSize.height = currFrame.rows; + case tracking::TrackNone: + break; + + case tracking::TrackKCF: + case tracking::TrackCSRT: + case tracking::TrackDaSiamRPN: + case tracking::TrackNano: + case tracking::TrackVit: +#ifdef USE_OCV_KCF + { + roiRect.width = std::max(3 * brect.width, currFrame.cols / 4); + roiRect.height = std::max(3 * brect.height, currFrame.rows / 4); + if (roiRect.width > currFrame.cols) + roiRect.width = currFrame.cols; - cv::Point roiTL(brect.x + brect.width / 2 - roiSize.width / 2, brect.y + brect.height / 2 - roiSize.height / 2); - cv::Rect roiRect(roiTL, roiSize); - Clamp(roiRect.x, roiRect.width, currFrame.cols); - Clamp(roiRect.y, roiRect.height, currFrame.rows); + if (roiRect.height > currFrame.rows) + roiRect.height = currFrame.rows; - bool inited = false; - if (!m_tracker || m_tracker.empty()) - { - CreateExternalTracker(currFrame.channels()); + roiRect.x = brect.x + brect.width / 2 - roiRect.width / 2; + roiRect.y = brect.y + brect.height / 2 - roiRect.height / 2; + Clamp(roiRect.x, roiRect.width, currFrame.cols); + Clamp(roiRect.y, roiRect.height, currFrame.rows); - cv::Rect2d lastRect; - if (m_staticFrame.empty()) - { - int dx = 1;//m_predictionRect.width / 8; - int dy = 1;//m_predictionRect.height / 8; - lastRect = cv::Rect2d(brect.x - roiRect.x - dx, brect.y - roiRect.y - dy, brect.width + 2 * dx, brect.height + 2 * dy); - } - else + if (!m_tracker || m_tracker.empty() || reinit) { - lastRect = cv::Rect2d(m_staticRect.x - roiRect.x, m_staticRect.y - roiRect.y, m_staticRect.width, m_staticRect.height); - } + CreateExternalTracker(currFrame.channels()); - if (lastRect.x >= 0 && - lastRect.y >= 0 && - lastRect.x + lastRect.width < roiRect.width && - lastRect.y + lastRect.height < roiRect.height && - lastRect.area() > 0) - { - if (m_staticFrame.empty()) - m_tracker->init(cv::UMat(prevFrame, roiRect), lastRect); - else - m_tracker->init(cv::UMat(m_staticFrame, roiRect), lastRect); + int dx = 0;//m_predictionRect.width / 8; + int dy = 0;//m_predictionRect.height / 8; + cv::Rect2d lastRect(brect.x - roiRect.x - dx, brect.y - roiRect.y - dy, brect.width + 2 * dx, brect.height + 2 * dy); + + if (lastRect.x >= 0 && + lastRect.y >= 0 && + lastRect.x + lastRect.width < roiRect.width && + lastRect.y + lastRect.height < roiRect.height && + lastRect.area() > 0) + { + m_tracker->init(cv::UMat(currFrame, roiRect), lastRect); #if 0 #ifndef SILENT_WORK - cv::Mat tmp; - if (m_staticFrame.empty()) - tmp = cv::UMat(prevFrame, roiRect).getMat(cv::ACCESS_READ).clone(); - else - tmp = cv::UMat(m_staticFrame, roiRect).getMat(cv::ACCESS_READ).clone(); - cv::rectangle(tmp, lastRect, cv::Scalar(255, 255, 255), 2); - cv::imshow("init " + std::to_string(m_trackID), tmp); + cv::Mat tmp = cv::UMat(currFrame, roiRect).getMat(cv::ACCESS_READ).clone(); + cv::rectangle(tmp, lastRect, cv::Scalar(255, 255, 255), 2); + cv::imshow("init " + std::to_string(m_trackID), tmp); #endif #endif - - inited = true; - m_outOfTheFrame = false; - } - else - { - m_tracker.release(); - m_outOfTheFrame = true; + inited = true; + m_outOfTheFrame = false; + } + else + { + m_tracker.release(); + m_outOfTheFrame = true; + } } } +#else + std::cerr << "KCF tracker was disabled in CMAKE! Set lostTrackType = TrackNone in constructor." << std::endl; +#endif + break; + } + return inited; + }; + + if (m_externalTrackerForLost != tracking::TrackNone) + { +#ifdef USE_OCV_KCF + cv::Rect roiRect; + bool inited = InitTracker(roiRect, false); #if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR < 5)) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR == 5) && (CV_VERSION_REVISION < 1)) || (CV_VERSION_MAJOR == 3)) - cv::Rect2d newRect; + cv::Rect2d newRect; #else - cv::Rect newRect; + cv::Rect newRect; #endif - if (!inited && !m_tracker.empty() && m_tracker->update(cv::UMat(currFrame, roiRect), newRect)) - { + if (!inited && !m_tracker.empty() && m_tracker->update(cv::UMat(currFrame, roiRect), newRect)) + { #if 0 #ifndef SILENT_WORK - cv::Mat tmp2 = cv::UMat(currFrame, roiRect).getMat(cv::ACCESS_READ).clone(); - cv::rectangle(tmp2, newRect, cv::Scalar(255, 255, 255), 2); - cv::imshow("track " + std::to_string(m_trackID), tmp2); + cv::Mat tmp2 = cv::UMat(currFrame, roiRect).getMat(cv::ACCESS_READ).clone(); + cv::rectangle(tmp2, newRect, cv::Scalar(255, 255, 255), 2); + cv::imshow("track " + std::to_string(m_trackID), tmp2); #endif #endif #if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR < 5)) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR == 5) && (CV_VERSION_REVISION < 1)) || (CV_VERSION_MAJOR == 3)) - cv::Rect prect(cvRound(newRect.x) + roiRect.x, cvRound(newRect.y) + roiRect.y, cvRound(newRect.width), cvRound(newRect.height)); + cv::Rect prect(cvRound(newRect.x) + roiRect.x, cvRound(newRect.y) + roiRect.y, cvRound(newRect.width), cvRound(newRect.height)); #else - cv::Rect prect(newRect.x + roiRect.x, newRect.y + roiRect.y, newRect.width, newRect.height); + cv::Rect prect(newRect.x + roiRect.x, newRect.y + roiRect.y, newRect.width, newRect.height); #endif + trackedRRect = cv::RotatedRect(cv::Point2f(prect.x + prect.width / 2.f, prect.y + prect.height / 2.f), cv::Size2f(static_cast(prect.width), static_cast(prect.height)), 0); + wasTracked = true; + } +#else + std::cerr << "KCF tracker was disabled in CMAKE! Set lostTrackType = TrackNone in constructor." << std::endl; +#endif + } - UpdateRRect(brect, m_kalman.Update(prect, true)); + cv::Rect brect = m_predictionRect.boundingRect(); - recalcPrediction = false; + if (dataCorrect) + { + if (wasTracked) + { +#if 0 + if (trackedRRect.angle > 0.5f) + { + m_predictionRect = trackedRRect; + m_kalman.Update(trackedRRect.boundingRect(), true); + } + else + { + UpdateRRect(brect, m_kalman.Update(trackedRRect.boundingRect(), true)); + } +#else + auto IoU = [](cv::Rect r1, cv::Rect r2) + { + return (r1 & r2).area() / static_cast((r1 | r2).area()); + }; + auto iou = IoU(trackedRRect.boundingRect(), region.m_brect); + if (iou < 0.5f) + { + cv::Rect roiRect; + InitTracker(roiRect, true); + //std::cout << "Reinit tracker with iou = " << iou << std::endl; + } + +#if 0 +#ifndef SILENT_WORK + { + auto rrr = trackedRRect.boundingRect() | region.m_brect; + cv::Mat tmpFrame = cv::UMat(currFrame, rrr).getMat(cv::ACCESS_READ).clone(); + cv::Rect r1(trackedRRect.boundingRect()); + cv::Rect r2(region.m_brect); + r1.x -= rrr.x; + r1.y -= rrr.y; + r2.x -= rrr.x; + r2.y -= rrr.y; + cv::rectangle(tmpFrame, r1, cv::Scalar(0, 255, 0), 1); + cv::rectangle(tmpFrame, r2, cv::Scalar(255, 0, 255), 1); + cv::imshow("reinit " + std::to_string(m_trackID), tmpFrame); } +#endif +#endif + if (m_filterGoal == tracking::FilterGoal::FilterRect) + UpdateRRect(brect, m_kalman.Update(region.m_brect, dataCorrect)); + else + m_predictionRect = m_kalman.Update(region.m_rrect, dataCorrect); +#endif } else { - if (m_tracker) - m_tracker = nullptr; + if (m_filterGoal == tracking::FilterGoal::FilterRect) + UpdateRRect(brect, m_kalman.Update(region.m_brect, dataCorrect)); + else + m_predictionRect = m_kalman.Update(region.m_rrect, dataCorrect); } -#else - std::cerr << "KCF tracker was disabled in CMAKE! Set lostTrackType = TrackNone in constructor." << std::endl; -#endif - break; - - case tracking::TrackDAT: - case tracking::TrackSTAPLE: - case tracking::TrackLDES: - if (!dataCorrect) + } + else + { + if (wasTracked) { - bool inited = false; - cv::Rect brect = m_predictionRect.boundingRect(); - if (!m_VOTTracker) + if (trackedRRect.angle > 0.5f) { - CreateExternalTracker(currFrame.channels()); - - cv::Rect2d lastRect(brect.x, brect.y, brect.width, brect.height); - if (!m_staticFrame.empty()) - lastRect = cv::Rect2d(m_staticRect.x, m_staticRect.y, m_staticRect.width, m_staticRect.height); - - if (lastRect.x >= 0 && - lastRect.y >= 0 && - lastRect.x + lastRect.width < prevFrame.cols && - lastRect.y + lastRect.height < prevFrame.rows && - lastRect.area() > 0) - { - if (m_staticFrame.empty()) - { - cv::Mat mat = prevFrame.getMat(cv::ACCESS_READ); - m_VOTTracker->Initialize(mat, lastRect); - m_VOTTracker->Train(mat, true); - } - else - { - cv::Mat mat = m_staticFrame.getMat(cv::ACCESS_READ); - m_VOTTracker->Initialize(mat, lastRect); - m_VOTTracker->Train(mat, true); - } - - inited = true; - m_outOfTheFrame = false; - } - else - { - m_VOTTracker = nullptr; - m_outOfTheFrame = true; - } + m_predictionRect = trackedRRect; + m_kalman.Update(trackedRRect.boundingRect(), true); } - if (!inited && m_VOTTracker) + else { - constexpr float confThresh = 0.3f; - cv::Mat mat = currFrame.getMat(cv::ACCESS_READ); - float confidence = 0; - cv::RotatedRect newRect = m_VOTTracker->Update(mat, confidence); - if (confidence > confThresh) - { - m_VOTTracker->Train(mat, false); - - if (newRect.angle > 0.5f) - { - m_predictionRect = newRect; - m_kalman.Update(newRect.boundingRect(), true); - } - else - { - UpdateRRect(brect, m_kalman.Update(newRect.boundingRect(), true)); - } - recalcPrediction = false; - } + if (m_filterGoal == tracking::FilterGoal::FilterRect) + UpdateRRect(brect, m_kalman.Update(trackedRRect.boundingRect(), true)); + else + m_predictionRect = m_kalman.Update(trackedRRect, true); } } else { - if (m_VOTTracker) - m_VOTTracker = nullptr; + if (m_filterGoal == tracking::FilterGoal::FilterRect) + UpdateRRect(brect, m_kalman.Update(region.m_brect, dataCorrect)); + else + m_predictionRect = m_kalman.Update(region.m_rrect, dataCorrect); } - break; } - if (recalcPrediction) - UpdateRRect(m_predictionRect.boundingRect(), m_kalman.Update(region.m_brect, dataCorrect)); - - cv::Rect brect = m_predictionRect.boundingRect(); + brect = m_predictionRect.boundingRect(); int dx = Clamp(brect.x, brect.width, currFrame.cols); int dy = Clamp(brect.y, brect.height, currFrame.rows); #if 0 m_predictionRect.center.x += dx; m_predictionRect.center.y += dy; #endif - m_outOfTheFrame = (dx != 0) || (dy != 0) || (brect.width < 2) || (brect.height < 2); + m_outOfTheFrame = (dx != 0) || (dy != 0) || (brect.width < 1) || (brect.height < 1); m_predictionPoint = m_predictionRect.center; - //std::cout << "brect = " << brect << ", dx = " << dx << ", dy = " << dy << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl; + //std::cout << GetID().ID2Str() << ": brect = " << brect << ", dx = " << dx << ", dy = " << dy << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl; } /// @@ -738,9 +921,6 @@ void CTrack::CreateExternalTracker(int channels) switch (m_externalTrackerForLost) { case tracking::TrackNone: - if (m_VOTTracker) - m_VOTTracker = nullptr; - #ifdef USE_OCV_KCF if (m_tracker && !m_tracker.empty()) m_tracker.release(); @@ -773,84 +953,6 @@ void CTrack::CreateExternalTracker(int channels) #endif } #endif - if (m_VOTTracker) - m_VOTTracker = nullptr; - break; - - case tracking::TrackMIL: -#ifdef USE_OCV_KCF - if (!m_tracker || m_tracker.empty()) - { - cv::TrackerMIL::Params params; - -#if (((CV_VERSION_MAJOR == 3) && (CV_VERSION_MINOR >= 3)) || (CV_VERSION_MAJOR > 3)) - m_tracker = cv::TrackerMIL::create(params); -#else - m_tracker = cv::TrackerMIL::createTracker(params); -#endif - } -#endif - if (m_VOTTracker) - m_VOTTracker = nullptr; - break; - - case tracking::TrackMedianFlow: -#ifdef USE_OCV_KCF - if (!m_tracker || m_tracker.empty()) - { -#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 4)) || (CV_VERSION_MAJOR > 4)) - std::cerr << "TrackMedianFlow not supported in OpenCV 4.5 and newer!" << std::endl; - CV_Assert(0); -#else - cv::TrackerMedianFlow::Params params; - -#if (((CV_VERSION_MAJOR == 3) && (CV_VERSION_MINOR >= 3)) || (CV_VERSION_MAJOR > 3)) - m_tracker = cv::TrackerMedianFlow::create(params); -#else - m_tracker = cv::TrackerMedianFlow::createTracker(params); -#endif -#endif - } -#endif - if (m_VOTTracker) - m_VOTTracker = nullptr; - break; - - case tracking::TrackGOTURN: -#ifdef USE_OCV_KCF - if (!m_tracker || m_tracker.empty()) - { - cv::TrackerGOTURN::Params params; - -#if (((CV_VERSION_MAJOR == 3) && (CV_VERSION_MINOR >= 3)) || (CV_VERSION_MAJOR > 3)) - m_tracker = cv::TrackerGOTURN::create(params); -#else - m_tracker = cv::TrackerGOTURN::createTracker(params); -#endif - } -#endif - if (m_VOTTracker) - m_VOTTracker = nullptr; - break; - - case tracking::TrackMOSSE: -#ifdef USE_OCV_KCF - if (!m_tracker || m_tracker.empty()) - { -#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 4)) || (CV_VERSION_MAJOR > 4)) - std::cerr << "TrackMOSSE not supported in OpenCV 4.5 and newer!" << std::endl; - CV_Assert(0); -#else -#if (((CV_VERSION_MAJOR == 3) && (CV_VERSION_MINOR > 3)) || (CV_VERSION_MAJOR > 3)) - m_tracker = cv::TrackerMOSSE::create(); -#else - m_tracker = cv::TrackerMOSSE::createTracker(); -#endif -#endif - } -#endif - if (m_VOTTracker) - m_VOTTracker = nullptr; break; case tracking::TrackCSRT: @@ -874,45 +976,100 @@ void CTrack::CreateExternalTracker(int channels) #endif } #endif - if (m_VOTTracker) - m_VOTTracker = nullptr; break; - - case tracking::TrackDAT: + + case tracking::TrackDaSiamRPN: #ifdef USE_OCV_KCF - if (m_tracker && !m_tracker.empty()) - m_tracker.release(); + if (!m_tracker || m_tracker.empty()) + { +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 5)) || ((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR == 5) && (CV_VERSION_REVISION > 2)) || (CV_VERSION_MAJOR > 4)) + cv::TrackerDaSiamRPN::Params params; + params.model = "dasiamrpn_model.onnx"; + params.kernel_cls1 = "dasiamrpn_kernel_cls1.onnx"; + params.kernel_r1 = "dasiamrpn_kernel_r1.onnx"; + // backend + // 0: automatically (by default) + // 1: Halide language + // 2: Intel's Deep Learning Inference Engine + // 3: OpenCV implementation + // 4: VKCOM + // 5: CUDA + params.backend = 0; + // target + // 0: CPU target (by default) + // 1: OpenCL + // 2: OpenCL fp16 (half-float precision) + // 3: VPU + // 4: Vulkan + // 6: CUDA + // 7: CUDA fp16 (half-float preprocess) + params.target = 0; + m_tracker = cv::TrackerDaSiamRPN::create(params); #endif - if (!m_VOTTracker) - m_VOTTracker = std::unique_ptr(new DAT_TRACKER()); - break; + } +#endif + break; - case tracking::TrackSTAPLE: + case tracking::TrackNano: #ifdef USE_OCV_KCF - if (m_tracker && !m_tracker.empty()) - m_tracker.release(); + if (!m_tracker || m_tracker.empty()) + { +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 6)) || (CV_VERSION_MAJOR > 4)) + cv::TrackerNano::Params params; + params.backbone = "nanotrack_backbone_sim.onnx"; + params.neckhead = "nanotrack_head_sim.onnx"; + // backend + // 0: automatically (by default) + // 1: Halide language + // 2: Intel's Deep Learning Inference Engine + // 3: OpenCV implementation + // 4: VKCOM + // 5: CUDA + params.backend = 0; + // target + // 0: CPU target (by default) + // 1: OpenCL + // 2: OpenCL fp16 (half-float precision) + // 3: VPU + // 4: Vulkan + // 6: CUDA + // 7: CUDA fp16 (half-float preprocess) + params.target = 0; + m_tracker = cv::TrackerNano::create(params); #endif -#ifdef USE_STAPLE_TRACKER - if (!m_VOTTracker) - m_VOTTracker = std::unique_ptr(new STAPLE_TRACKER()); -#else - std::cerr << "Project was compiled without STAPLE tracking!" << std::endl; + } #endif break; -#if 1 - case tracking::TrackLDES: + + case tracking::TrackVit: #ifdef USE_OCV_KCF - if (m_tracker && !m_tracker.empty()) - m_tracker.release(); -#endif -#ifdef USE_STAPLE_TRACKER - if (!m_VOTTracker) - m_VOTTracker = std::unique_ptr(new LDESTracker()); -#else - std::cerr << "Project was compiled without STAPLE tracking!" << std::endl; + if (!m_tracker || m_tracker.empty()) + { +#if (((CV_VERSION_MAJOR == 4) && (CV_VERSION_MINOR > 8)) || (CV_VERSION_MAJOR > 4)) + cv::TrackerVit::Params params; + params.net = "vitTracker.onnx"; + // backend + // 0: automatically (by default) + // 1: Halide language + // 2: Intel's Deep Learning Inference Engine + // 3: OpenCV implementation + // 4: VKCOM + // 5: CUDA + params.backend = 0; + // target + // 0: CPU target (by default) + // 1: OpenCL + // 2: OpenCL fp16 (half-float precision) + // 3: VPU + // 4: Vulkan + // 6: CUDA + // 7: CUDA fp16 (half-float preprocess) + params.target = 0; + m_tracker = cv::TrackerVit::create(params); #endif - break; + } #endif + break; } } @@ -923,11 +1080,10 @@ void CTrack::CreateExternalTracker(int channels) /// void CTrack::PointUpdate(const Point_t& pt, const cv::Size& newObjSize, + float newAngle, bool dataCorrect, const cv::Size& frameSize) { - m_kalman.GetPointPrediction(); - m_predictionPoint = m_kalman.Update(pt, dataCorrect); if (dataCorrect) @@ -936,6 +1092,19 @@ void CTrack::PointUpdate(const Point_t& pt, const int a2 = 9; m_predictionRect.size.width = (a1 * newObjSize.width + a2 * m_predictionRect.size.width) / (a1 + a2); m_predictionRect.size.height = (a1 * newObjSize.height + a2 * m_predictionRect.size.height) / (a1 + a2); + + std::vector, double>> angles; + + angles.push_back(std::make_pair(static_cast(m_predictionRect.angle), 0.1)); + angles.push_back(std::make_pair(static_cast(newAngle), 0.9)); + auto vals = WeightedCircAverage(angles); + //std::cout << "[" << m_predictionRect.angle << ", " << newAngle << "] -> "; + //for (auto v : vals) + //{ + // std::cout << v.operator double() << " "; + //} + //std::cout << std::endl; + m_predictionRect.angle = static_cast(vals.begin()->operator double()); } auto Clamp = [](track_t& v, int hi) -> bool @@ -953,7 +1122,8 @@ void CTrack::PointUpdate(const Point_t& pt, return false; }; auto p = m_predictionPoint; - m_outOfTheFrame = Clamp(p.x, frameSize.width) || Clamp(p.y, frameSize.height) || (m_predictionRect.size.width < 2) || (m_predictionRect.size.height < 2); + m_outOfTheFrame = Clamp(p.x, frameSize.width) || Clamp(p.y, frameSize.height) || (m_predictionRect.size.width < 1) || (m_predictionRect.size.height < 1); - //std::cout << "predictionRect = " << m_predictionRect.boundingRect() << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << std::endl; + //std::cout << "CTrack::PointUpdate: m_predictionRect: " << m_predictionRect.center << ", " << m_predictionRect.angle << ", " << m_predictionRect.size << std::endl; + //std::cout << GetID().ID2Str() << ": predictionRect = " << m_predictionRect.boundingRect() << ", outOfTheFrame = " << m_outOfTheFrame << ", predictionPoint = " << m_predictionPoint << ", newAngle = " << newAngle << std::endl; } diff --git a/src/Tracker/track.h b/src/Tracker/track.h index 2e5c8cbc2..5556c54b4 100644 --- a/src/Tracker/track.h +++ b/src/Tracker/track.h @@ -10,229 +10,9 @@ #endif #include "defines.h" +#include "trajectory.h" #include "object_types.h" #include "Kalman.h" -#include "VOTTracker.hpp" - -/// -/// \brief The TrajectoryPoint struct -/// -struct TrajectoryPoint -{ - /// - /// \brief TrajectoryPoint - /// - TrajectoryPoint() = default; - - /// - /// \brief TrajectoryPoint - /// \param prediction - /// - TrajectoryPoint(const Point_t& prediction) - : m_prediction(prediction) - { - } - - /// - /// \brief TrajectoryPoint - /// \param prediction - /// \param raw - /// - TrajectoryPoint(const Point_t& prediction, const Point_t& raw) - : - m_prediction(prediction), - m_raw(raw), - m_hasRaw(true) - { - } - - /// - TrajectoryPoint(const TrajectoryPoint& tp) noexcept - : m_prediction(tp.m_prediction), m_raw(tp.m_raw), m_hasRaw(tp.m_hasRaw) - { - } - - /// - TrajectoryPoint& operator=(const TrajectoryPoint& tp) noexcept - { - m_prediction = tp.m_prediction; - m_raw = tp.m_raw; - m_hasRaw = tp.m_hasRaw; - return *this; - } - - /// - TrajectoryPoint(TrajectoryPoint&&) = default; - - Point_t m_prediction; - Point_t m_raw; - bool m_hasRaw = false; -}; - -/// -/// \brief The Trace class -/// -class Trace -{ -public: - /// - Trace() = default; - /// - Trace(const Trace&) = default; - /// - Trace(Trace&&) = default; - - /// - /// \brief operator [] - /// \param i - /// \return - /// - const Point_t& operator[](size_t i) const - { - return m_trace[i].m_prediction; - } - - /// - /// \brief operator [] - /// \param i - /// \return - /// - Point_t& operator[](size_t i) - { - return m_trace[i].m_prediction; - } - - /// - /// \brief at - /// \param i - /// \return - /// - const TrajectoryPoint& at(size_t i) const - { - return m_trace[i]; - } - - /// - /// \brief size - /// \return - /// - size_t size() const - { - return m_trace.size(); - } - - /// - /// \brief push_back - /// \param prediction - /// - void push_back(const Point_t& prediction) - { - m_trace.emplace_back(prediction); - } - void push_back(const Point_t& prediction, const Point_t& raw) - { - m_trace.emplace_back(prediction, raw); - } - - /// - /// \brief pop_front - /// \param count - /// - void pop_front(size_t count) - { - if (count < size()) - m_trace.erase(m_trace.begin(), m_trace.begin() + count); - else - m_trace.clear(); - } - - /// - /// \brief GetRawCount - /// \param lastPeriod - /// \return - /// - size_t GetRawCount(size_t lastPeriod) const - { - size_t res = 0; - - size_t i = 0; - if (lastPeriod < m_trace.size()) - i = m_trace.size() - lastPeriod; - - for (; i < m_trace.size(); ++i) - { - if (m_trace[i].m_hasRaw) - ++res; - } - - return res; - } - - /// - /// \brief Reserve - /// \param capacity - /// \return - /// - void Reserve(size_t capacity) - { - m_trace.reserve(capacity); - } - -private: - std::vector m_trace; -}; - -/// -/// \brief The TrackingObject class -/// -struct TrackingObject -{ - Trace m_trace; // Trajectory - size_t m_ID = 0; // Objects ID - cv::RotatedRect m_rrect; // Coordinates - cv::Vec m_velocity; // pixels/sec - objtype_t m_type = bad_type; // Objects type name or empty - float m_confidence = -1; // From Detector with score (YOLO or SSD) - bool m_isStatic = false; // Object is abandoned - bool m_outOfTheFrame = false; // Is object out of freme - mutable bool m_lastRobust = false; // saved latest robust value - - /// - TrackingObject(const cv::RotatedRect& rrect, size_t ID, const Trace& trace, - bool isStatic, bool outOfTheFrame, objtype_t type, float confidence, cv::Vec velocity) - : - m_trace(trace), m_ID(ID), m_rrect(rrect), m_velocity(velocity), m_type(type), m_confidence(confidence), m_isStatic(isStatic), m_outOfTheFrame(outOfTheFrame) - { - } - - /// - TrackingObject(TrackingObject&&) = default; - - /// - /// \brief IsRobust - /// \param minTraceSize - /// \param minRawRatio - /// \param sizeRatio - /// \return - /// - bool IsRobust(int minTraceSize, float minRawRatio, cv::Size2f sizeRatio) const - { - m_lastRobust = m_trace.size() > static_cast(minTraceSize); - m_lastRobust &= m_trace.GetRawCount(m_trace.size() - 1) / static_cast(m_trace.size()) > minRawRatio; - if (sizeRatio.width + sizeRatio.height > 0) - { - float sr = m_rrect.size.width / m_rrect.size.height; - if (sizeRatio.width > 0) - m_lastRobust &= (sr > sizeRatio.width); - - if (sizeRatio.height > 0) - m_lastRobust &= (sr < sizeRatio.height); - } - if (m_outOfTheFrame) - m_lastRobust = false; - return m_lastRobust; - } -}; /// /// \brief The RegionEmbedding struct @@ -241,10 +21,8 @@ struct RegionEmbedding { cv::Mat m_hist; cv::Mat m_embedding; - double m_embDot = 0.; }; - /// /// \brief The CTrack class /// @@ -256,9 +34,10 @@ class CTrack track_t deltaTime, track_t accelNoiseMag, bool useAcceleration, - size_t trackID, - bool filterObjectSize, - tracking::LostTrackType externalTrackerForLost); + track_id_t trackID, + tracking::FilterGoal filterGoal, + tracking::LostTrackType externalTrackerForLost, + time_point_t currTime); CTrack(const CRegion& region, const RegionEmbedding& regionEmbedding, @@ -266,9 +45,10 @@ class CTrack track_t deltaTime, track_t accelNoiseMag, bool useAcceleration, - size_t trackID, - bool filterObjectSize, - tracking::LostTrackType externalTrackerForLost); + track_id_t trackID, + tracking::FilterGoal filterGoal, + tracking::LostTrackType externalTrackerForLost, + time_point_t currTime); /// /// \brief CalcDistCenter @@ -291,51 +71,60 @@ class CTrack /// \return /// track_t CalcDistJaccard(const CRegion& reg) const; - /// - /// \brief CalcDistHist - /// Distance from 0 to 1 between objects histogramms on two N and N+1 frames - /// \param reg - /// \param currFrame - /// \return - /// - track_t CalcDistHist(const CRegion& reg, RegionEmbedding& embedding, cv::UMat currFrame) const; + /// + /// \brief CTrack::CalcMahalanobisDist + /// \param reg + /// \return + /// + track_t CalcMahalanobisDist(const cv::RotatedRect& rrect) const; + /// + /// \brief CalcDistHist + /// Distance from 0 to 1 between objects histogramms on two N and N+1 frames + /// \param embedding + /// \return + /// + track_t CalcDistHist(const RegionEmbedding& embedding) const; /// /// \brief CalcCosine /// Distance from 0 to 1 between objects embeddings on two N and N+1 frames /// \param embedding - /// \param currFrame /// \return /// - track_t CalcCosine(RegionEmbedding& embedding, cv::UMat currFrame) const; + std::pair CalcCosine(const RegionEmbedding& embedding) const; + + cv::RotatedRect CalcPredictionEllipse(cv::Size_ minRadius) const; + /// + /// \brief IsInsideArea + /// Test point inside in prediction area: prediction area + object velocity + /// \param pt + /// \param minVal + /// \return + /// + track_t IsInsideArea(const Point_t& pt, const cv::RotatedRect& rrect) const; - cv::RotatedRect CalcPredictionEllipse(cv::Size_ minRadius) const; - /// - /// \brief IsInsideArea - /// Test point inside in prediction area: prediction area + object velocity - /// \param pt - /// \param minVal - /// \return - /// - track_t IsInsideArea(const Point_t& pt, const cv::RotatedRect& rrect) const; track_t WidthDist(const CRegion& reg) const; track_t HeightDist(const CRegion& reg) const; - void Update(const CRegion& region, bool dataCorrect, size_t max_trace_length, cv::UMat prevFrame, cv::UMat currFrame, int trajLen, int maxSpeedForStatic); - void Update(const CRegion& region, const RegionEmbedding& regionEmbedding, bool dataCorrect, size_t max_trace_length, cv::UMat prevFrame, cv::UMat currFrame, int trajLen, int maxSpeedForStatic); + void Update(const CRegion& region, bool dataCorrect, double maxTraceLength, cv::UMat prevFrame, cv::UMat currFrame, int trajLen, int maxSpeedForStatic, time_point_t currTime); + void Update(const CRegion& region, const RegionEmbedding& regionEmbedding, bool dataCorrect, double maxTraceLength, cv::UMat prevFrame, cv::UMat currFrame, int trajLen, int maxSpeedForStatic, time_point_t currTime); bool IsStatic() const; - bool IsStaticTimeout(int framesTime) const; + bool IsStaticTimeout(time_point_t currTime, double staticPeriod) const; bool IsOutOfTheFrame() const; cv::RotatedRect GetLastRect() const; - const Point_t& AveragePoint() const; - Point_t& AveragePoint(); const CRegion& LastRegion() const; - size_t SkippedFrames() const; - size_t& SkippedFrames(); + objtype_t GetCurrType() const; + double GetLostPeriod(time_point_t currTime) const; + void ResetLostTime(time_point_t currTime); + + TrackingObject ConstructObject(time_point_t frameTime) const; + track_id_t GetID() const; - TrackingObject ConstructObject() const; + tracking::FilterGoal GetFilterGoal() const; + void KalmanPredictRect(); + void KalmanPredictPoint(); private: TKalmanFilter m_kalman; @@ -344,14 +133,18 @@ class CTrack cv::RotatedRect m_predictionRect; Point_t m_predictionPoint; - size_t m_trackID = 0; - size_t m_skippedFrames = 0; + track_id_t m_trackID; + time_point_t m_lastDetectionTime; + + objtype_t m_currType = bad_type; + objtype_t m_lastType = bad_type; + size_t m_anotherTypeCounter = 0; + static constexpr size_t m_changeTypeThreshold = 25; - tracking::LostTrackType m_externalTrackerForLost; + tracking::LostTrackType m_externalTrackerForLost = tracking::TrackNone; #ifdef USE_OCV_KCF cv::Ptr m_tracker; #endif - std::unique_ptr m_VOTTracker; /// void RectUpdate(const CRegion& region, bool dataCorrect, cv::UMat prevFrame, cv::UMat currFrame); @@ -360,18 +153,18 @@ class CTrack void CreateExternalTracker(int channels); /// - void PointUpdate(const Point_t& pt, const cv::Size& newObjSize, bool dataCorrect, const cv::Size& frameSize); + void PointUpdate(const Point_t& pt, const cv::Size& newObjSize, float newAngle, bool dataCorrect, const cv::Size& frameSize); RegionEmbedding m_regionEmbedding; /// - bool CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, int maxSpeedForStatic); + bool CheckStatic(int trajLen, cv::UMat currFrame, const CRegion& region, int maxSpeedForStatic, time_point_t currTime); cv::UMat m_staticFrame; cv::Rect m_staticRect; - int m_staticFrames = 0; + time_point_t m_staticStartTime; bool m_isStatic = false; - bool m_filterObjectSize = false; + tracking::FilterGoal m_filterGoal = tracking::FilterGoal::FilterCenter; bool m_outOfTheFrame = false; }; diff --git a/src/Tracker/trajectory.h b/src/Tracker/trajectory.h new file mode 100644 index 000000000..9b4dd22a9 --- /dev/null +++ b/src/Tracker/trajectory.h @@ -0,0 +1,404 @@ +#pragma once +#include +#include "defines.h" + +/// +/// \brief The TrajectoryPoint struct +/// +struct TrajectoryPoint +{ + /// + /// \brief TrajectoryPoint + /// + TrajectoryPoint() = default; + + /// + /// \brief TrajectoryPoint + /// \param prediction + /// + TrajectoryPoint(const Point_t& prediction, time_point_t frameTime) + : m_prediction(prediction), m_frameTime(frameTime) + { + } + + /// + /// \brief TrajectoryPoint + /// \param prediction + /// \param raw + /// + TrajectoryPoint(const Point_t& prediction, const Point_t& raw, time_point_t frameTime) + : + m_prediction(prediction), + m_raw(raw), + m_frameTime(frameTime), + m_hasRaw(true) + { + } + + /// + TrajectoryPoint(const TrajectoryPoint& tp) noexcept + : m_prediction(tp.m_prediction), m_raw(tp.m_raw), m_frameTime(tp.m_frameTime), m_hasRaw(tp.m_hasRaw) + { + } + + /// + TrajectoryPoint& operator=(const TrajectoryPoint& tp) noexcept + { + m_prediction = tp.m_prediction; + m_raw = tp.m_raw; + m_frameTime = tp.m_frameTime; + m_hasRaw = tp.m_hasRaw; + return *this; + } + + /// + TrajectoryPoint(TrajectoryPoint&&) = default; + + Point_t m_prediction; + Point_t m_raw; + time_point_t m_frameTime; + bool m_hasRaw = false; +}; + +/// +/// \brief The Trace class +/// +class Trace +{ +public: + /// + Trace() = default; + /// + Trace(const Trace&) = default; + /// + Trace(Trace&&) = default; + /// + Trace& operator=(const Trace& trace) + { + m_trace = trace.m_trace; + return *this; + } + + /// + /// \brief operator [] + /// \param i + /// \return + /// + const Point_t& operator[](size_t i) const + { + return m_trace[i].m_prediction; + } + + /// + /// \brief operator [] + /// \param i + /// \return + /// + Point_t& operator[](size_t i) + { + return m_trace[i].m_prediction; + } + + /// + /// \brief at + /// \param i + /// \return + /// + const TrajectoryPoint& at(size_t i) const + { + return m_trace[i]; + } + /// + /// \brief at + /// \param i + /// \return + /// + TrajectoryPoint& at(size_t i) + { + return m_trace[i]; + } + + /// + /// \brief size + /// \return + /// + size_t size() const + { + return m_trace.size(); + } + + /// + /// \brief push_back + /// \param prediction + /// + void push_back(const Point_t& prediction, time_point_t currTime) + { + m_trace.emplace_back(prediction, currTime); + } + void push_back(const Point_t& prediction, const Point_t& raw, time_point_t currTime) + { + m_trace.emplace_back(prediction, raw, currTime); + } + + /// + /// \brief pop_front + /// \param count + /// + void pop_front(size_t count) + { + if (count < size()) + m_trace.erase(m_trace.begin(), m_trace.begin() + count); + else + m_trace.clear(); + } + + /// + /// \brief pop_back + /// + void pop_back() + { + m_trace.pop_back(); + } + + /// + /// \brief GetRawCount + /// \param lastPeriod + /// \return + /// + size_t GetRawCount(size_t lastPeriod) const + { + size_t res = 0; + + size_t i = 0; + if (lastPeriod < m_trace.size()) + i = m_trace.size() - lastPeriod; + + for (; i < m_trace.size(); ++i) + { + if (m_trace[i].m_hasRaw) + ++res; + } + + return res; + } + + /// + /// \brief Reserve + /// \param capacity + /// \return + /// + void Reserve(size_t capacity) + { + m_trace.reserve(capacity); + } + +private: + std::vector m_trace; +}; + +/// +/// \brief The TrackingObject class +/// +struct TrackingObject +{ + Trace m_trace; // Trajectory + track_id_t m_ID; // Objects ID + cv::RotatedRect m_rrect; // Coordinates + cv::Vec m_velocity; // pixels/sec + objtype_t m_type = bad_type; // Objects type name or empty value + float m_confidence = -1; // From Detector with score (YOLO or SSD) + bool m_isStatic = false; // Object is abandoned + int m_staticTime = 0; // Object is abandoned, seconds + bool m_outOfTheFrame = false; // Is object out of the frame + mutable bool m_lastRobust = false; // saved latest robust value + + /// + TrackingObject(const cv::RotatedRect& rrect, track_id_t ID, const Trace& trace, + bool isStatic, int staticTime, bool outOfTheFrame, objtype_t type, float confidence, cv::Vec velocity) + : + m_trace(trace), m_ID(ID), m_rrect(rrect), m_velocity(velocity), m_type(type), m_confidence(confidence), + m_isStatic(isStatic), m_staticTime(staticTime), + m_outOfTheFrame(outOfTheFrame) + { + //std::cout << "TrackingObject.m_rrect: " << m_rrect.center << ", " << m_rrect.angle << ", " << m_rrect.size << std::endl; + } + + /// + TrackingObject() = default; + /// + TrackingObject(const TrackingObject&) = default; + /// + TrackingObject(TrackingObject&&) = default; + /// + TrackingObject & operator=(const TrackingObject& track) + { + m_trace = track.m_trace; + m_ID = track.m_ID; + m_rrect = track.m_rrect; + m_velocity = track.m_velocity; + m_type = track.m_type; + m_confidence = track.m_confidence; + m_isStatic = track.m_isStatic; + m_staticTime = track.m_staticTime; + m_outOfTheFrame = track.m_outOfTheFrame; + m_lastRobust = track.m_lastRobust; + + return *this; + } + /// + ~TrackingObject() = default; + + /// + /// \brief IsRobust + /// \param minTraceSize + /// \param minRawRatio + /// \param sizeRatio + /// \return + /// + bool IsRobust(int minTraceSize, float minRawRatio, cv::Size2f sizeRatio, size_t lastDetectsCount = 0) const + { + m_lastRobust = m_trace.size() > static_cast(minTraceSize); + if (lastDetectsCount) + { + size_t raws = m_trace.GetRawCount(lastDetectsCount); + m_lastRobust = (raws > 0); + } + m_lastRobust &= m_trace.GetRawCount(m_trace.size() - 1) / static_cast(m_trace.size()) > minRawRatio; + if (sizeRatio.width + sizeRatio.height > 0) + { + float sr = m_rrect.size.width / m_rrect.size.height; + if (sizeRatio.width > 0) + m_lastRobust &= (sr > sizeRatio.width); + + if (sizeRatio.height > 0) + m_lastRobust &= (sr < sizeRatio.height); + } + if (m_outOfTheFrame) + m_lastRobust = false; + + //std::cout << "lastRobust = " << m_lastRobust << ", outOfTheFrame = " << m_outOfTheFrame << ", raw count = " << m_trace.GetRawCount(m_trace.size() - 1) << ", trace = " << m_trace.size() << std::endl; + + return m_lastRobust; + } + + /// + /// \brief GetTrajectory + /// \return + /// + std::vector GetTrajectory() const + { + std::vector trajectory(m_trace.size()); + for (size_t i = 0; i < m_trace.size(); ++i) + { + trajectory[i] = m_trace.at(i).m_prediction; + } + return trajectory; + } + + /// + /// \brief LeastSquarespoly2 + /// \return + /// + void LeastSquarespoly2(size_t posFrom, size_t count, track_t& ax, track_t& v0x, track_t& x0, track_t& ay, track_t& v0y, track_t& y0) const + { + double b1_x(0), b2_x(0), b3_x(0); + double b1_y(0), b2_y(0), b3_y(0); + double t_0(0.), t_1(0.), t_2(0.), t_3(0.), t_4(0.); + double j = static_cast(posFrom); + for (size_t i = posFrom; i < count; ++i, j += 1.) + { + double sqr_j = sqr(j); + + t_0 += 1.; + t_1 += j; + t_2 += sqr_j; + t_3 += j * sqr_j; + t_4 += sqr(sqr_j); + + const auto& pt = m_trace.at(i).m_prediction; + + b1_x += pt.x; + b2_x += j * pt.x; + b3_x += sqr_j * pt.x; + + b1_y += pt.y; + b2_y += j * pt.y; + b3_y += sqr_j * pt.y; + } + + // Cramers rule for system of linear equations 3x3 + double a11(t_0), a12(t_1), a13(t_2), a21(t_1), a22(t_2), a23(t_3), a31(t_2), a32(t_3), a33(t_4); + + double det_1 = 1. / (a11 * a22 * a33 + a21 * a32 * a13 + a12 * a23 * a31 - a31 * a22 * a13 - a11 * a23 * a32 - a12 * a21 * a33); + x0 = static_cast(det_1 * (b1_x * a22 * a33 + b2_x * a32 * a13 + a12 * a23 * b3_x - b3_x * a22 * a13 - b1_x * a23 * a32 - a12 * b2_x * a33)); + v0x = static_cast(det_1 * (a11 * b2_x * a33 + a21 * b3_x * a13 + b1_x * a23 * a31 - a31 * b2_x * a13 - a11 * a23 * b3_x - b1_x * a21 * a33)); + ax = static_cast(det_1 * (a11 * a22 * b3_x + a21 * a32 * b1_x + a12 * b2_x * a31 - a31 * a22 * b1_x - a11 * b2_x * a32 - a12 * a21 * b3_x)); + y0 = static_cast(det_1 * (b1_y * a22 * a33 + b2_y * a32 * a13 + a12 * a23 * b3_y - b3_y * a22 * a13 - b1_y * a23 * a32 - a12 * b2_y * a33)); + v0y = static_cast(det_1 * (a11 * b2_y * a33 + a21 * b3_y * a13 + b1_y * a23 * a31 - a31 * b2_y * a13 - a11 * a23 * b3_y - b1_y * a21 * a33)); + ay = static_cast(det_1 * (a11 * a22 * b3_y + a21 * a32 * b1_y + a12 * b2_y * a31 - a31 * a22 * b1_y - a11 * b2_y * a32 - a12 * a21 * b3_y)); + } + + /// + struct LSParams + { + track_t m_ax = 0; + track_t m_v0x = 0; + track_t m_x0 = 0; + track_t m_ay = 0; + track_t m_v0y = 0; + track_t m_y0 = 0; + + friend std::ostream& operator<<(std::ostream& os, const LSParams& lsParaml) + { + os << "(" << lsParaml.m_ax << ", " << lsParaml.m_v0x << ", " << lsParaml.m_x0 << "), (" << lsParaml.m_ay << ", " << lsParaml.m_v0y << ", " << lsParaml.m_y0 << ")"; + return os; + } + }; + + /// + /// \brief LeastSquares2 + /// \return + /// + bool LeastSquares2(size_t framesCount, track_t& mean, track_t& stddev, LSParams& lsParams) const + { + bool res = m_trace.size() > 3; + + if (res) + { + size_t startPos = 0; +#if 0 + if (framesCount < m_trace.size()) + startPos = m_trace.size() - framesCount; + else + framesCount = m_trace.size(); +#else + framesCount = m_trace.size(); +#endif + + LeastSquarespoly2(startPos, framesCount, lsParams.m_ax, lsParams.m_v0x, lsParams.m_x0, lsParams.m_ay, lsParams.m_v0y, lsParams.m_y0); + + track_t sum = 0; + track_t sum2 = 0; + for (size_t i = startPos; i < framesCount; ++i) + { + track_t t = static_cast(i); + track_t dist = distance(m_trace[i], Point_t(lsParams.m_ax * sqr(t) + lsParams.m_v0x * t + lsParams.m_x0, + lsParams.m_ay * sqr(t) + lsParams.m_v0y * t + lsParams.m_y0)); + sum += dist; + sum2 += sqr(dist); + } + mean = sum / static_cast(framesCount); + stddev = sqrt(sum2 / static_cast(framesCount) - sqr(mean)); + } + return res; + } + + /// + /// \brief GetTrajectory + /// \return + /// + cv::Rect GetBoundingRect() const + { + return m_rrect.boundingRect(); + } +}; diff --git a/src/common/defines.h b/src/common/defines.h deleted file mode 100644 index c71fa9398..000000000 --- a/src/common/defines.h +++ /dev/null @@ -1,166 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include "object_types.h" - -// --------------------------------------------------------------------------- -// -// --------------------------------------------------------------------------- -typedef float track_t; -typedef cv::Point_ Point_t; -#define El_t CV_32F -#define Mat_t CV_32FC - -typedef std::vector assignments_t; -typedef std::vector distMatrix_t; - -/// -/// \brief config_t -/// -typedef std::multimap config_t; - -/// -/// \brief The CRegion class -/// -class CRegion -{ -public: - CRegion() = default; - - CRegion(const cv::Rect& rect) - : m_brect(rect) - { - B2RRect(); - } - - CRegion(const cv::RotatedRect& rrect) - : m_rrect(rrect) - { - R2BRect(); - } - - CRegion(const cv::Rect& rect, objtype_t type, float confidence) - : m_type(type), m_brect(rect), m_confidence(confidence) - { - B2RRect(); - } - - objtype_t m_type = bad_type; - cv::RotatedRect m_rrect; - cv::Rect m_brect; - float m_confidence = -1; - -private: - /// - /// \brief R2BRect - /// \return - /// - cv::Rect R2BRect() - { - m_brect = m_rrect.boundingRect(); - return m_brect; - } - /// - /// \brief B2RRect - /// \return - /// - cv::RotatedRect B2RRect() - { - m_rrect = cv::RotatedRect(m_brect.tl(), cv::Point2f(static_cast(m_brect.x + m_brect.width), static_cast(m_brect.y)), m_brect.br()); - return m_rrect; - } -}; - -typedef std::vector regions_t; - -/// -/// -/// -namespace tracking -{ -/// -/// \brief The Detectors enum -/// -enum Detectors -{ - Motion_VIBE = 0, - Motion_MOG, - Motion_GMG, - Motion_CNT, - Motion_SuBSENSE, - Motion_LOBSTER, - Motion_MOG2, - Face_HAAR, - Pedestrian_HOG, - Pedestrian_C4, - Yolo_Darknet, - Yolo_TensorRT, - DNN_OCV, - DetectorsCount -}; - -/// -/// \brief The DistType enum -/// -enum DistType -{ - DistCenters, // Euclidean distance between centers, [0, 1] - DistRects, // Euclidean distance between bounding rectangles, [0, 1] - DistJaccard, // Intersection over Union, IoU, [0, 1] - DistHist, // Bhatacharia distance between histograms, [0, 1] - DistFeatureCos, // Cosine distance between embeddings, [0, 1] - DistsCount -}; - -/// -/// \brief The FilterGoal enum -/// -enum FilterGoal -{ - FilterCenter, - FilterRect, - FiltersCount -}; - -/// -/// \brief The KalmanType enum -/// -enum KalmanType -{ - KalmanLinear, - KalmanUnscented, - KalmanAugmentedUnscented, - KalmanCount -}; - -/// -/// \brief The MatchType enum -/// -enum MatchType -{ - MatchHungrian, - MatchBipart, - MatchCount -}; - -/// -/// \brief The LostTrackType enum -/// -enum LostTrackType -{ - TrackNone, - TrackKCF, - TrackMIL, - TrackMedianFlow, - TrackGOTURN, - TrackMOSSE, - TrackCSRT, - TrackDAT, - TrackSTAPLE, - TrackLDES, - SingleTracksCount -}; -} diff --git a/src/common/object_types.cpp b/src/common/object_types.cpp deleted file mode 100644 index 020dc5ef9..000000000 --- a/src/common/object_types.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "object_types.h" - -std::vector TypeConverter::m_typeNames = -{ - "person", -"bicycle", -"car", -"motorbike", -"aeroplane", -"bus", -"train", -"truck", -"boat", -"traffic_light", -"fire_hydrant", -"stop_sign", -"parking_meter", -"bench", -"bird", -"cat", -"dog", -"horse", -"sheep", -"cow", -"elephant", -"bear", -"zebra", -"giraffe", -"backpack", -"umbrella", -"handbag", -"tie", -"suitcase", -"frisbee", -"skis", -"snowboard", -"sports_ball", -"kite", -"baseball_bat", -"baseball_glove", -"skateboard", -"surfboard", -"tennis_racket", -"bottle", -"wine_glass", -"cup", -"fork", -"knife", -"spoon", -"bowl", -"banana", -"apple", -"sandwich", -"orange", -"broccoli", -"carrot", -"hot_dog", -"pizza", -"donut", -"cake", -"chair", -"sofa", -"pottedplant", -"bed", -"diningtable", -"toilet", -"tvmonitor", -"laptop", -"mouse", -"remote", -"keyboard", -"cell_phone", -"microwave", -"oven", -"toaster", -"sink", -"refrigerator", -"book", -"clock", -"vase", -"scissors", -"teddy_bear", -"hair_drier", -"toothbrush" -}; - -std::string TypeConverter::m_badTypeName = "unknown"; \ No newline at end of file diff --git a/src/common/object_types.h b/src/common/object_types.h deleted file mode 100644 index 3d03841e2..000000000 --- a/src/common/object_types.h +++ /dev/null @@ -1,204 +0,0 @@ -#pragma once -#include -#include - -/// -enum class ObjectTypes -{ - obj_person, - obj_bicycle, - obj_car, - obj_motorbike, - obj_aeroplane, - obj_bus, - obj_train, - obj_truck, - obj_boat, - obj_traffic_light, - obj_fire_hydrant, - obj_stop_sign, - obj_parking_meter, - obj_bench, - obj_bird, - obj_cat, - obj_dog, - obj_horse, - obj_sheep, - obj_cow, - obj_elephant, - obj_bear, - obj_zebra, - obj_giraffe, - obj_backpack, - obj_umbrella, - obj_handbag, - obj_tie, - obj_suitcase, - obj_frisbee, - obj_skis, - obj_snowboard, - obj_sports_ball, - obj_kite, - obj_baseball_bat, - obj_baseball_glove, - obj_skateboard, - obj_surfboard, - obj_tennis_racket, - obj_bottle, - obj_wine_glass, - obj_cup, - obj_fork, - obj_knife, - obj_spoon, - obj_bowl, - obj_banana, - obj_apple, - obj_sandwich, - obj_orange, - obj_broccoli, - obj_carrot, - obj_hot_dog, - obj_pizza, - obj_donut, - obj_cake, - obj_chair, - obj_sofa, - obj_pottedplant, - obj_bed, - obj_diningtable, - obj_toilet, - obj_tvmonitor, - obj_laptop, - obj_mouse, - obj_remote, - obj_keyboard, - obj_cell_phone, - obj_microwave, - obj_oven, - obj_toaster, - obj_sink, - obj_refrigerator, - obj_book, - obj_clock, - obj_vase, - obj_scissors, - obj_teddy_bear, - obj_hair_drier, - obj_toothbrush, - TypesCount -}; - -/// -enum class CocoTypes -{ - coco_obj_person, - coco_obj_bicycle, - coco_obj_car, - coco_obj_motorbike, - coco_obj_aeroplane, - coco_obj_bus, - coco_obj_train, - coco_obj_truck, - coco_obj_boat, - coco_obj_traffic_light, - coco_obj_fire_hydrant, - coco_obj_stop_sign, - coco_obj_parking_meter, - coco_obj_bench, - coco_obj_bird, - coco_obj_cat, - coco_obj_dog, - coco_obj_horse, - coco_obj_sheep, - coco_obj_cow, - coco_obj_elephant, - coco_obj_bear, - coco_obj_zebra, - coco_obj_giraffe, - coco_obj_backpack, - coco_obj_umbrella, - coco_obj_handbag, - coco_obj_tie, - coco_obj_suitcase, - coco_obj_frisbee, - coco_obj_skis, - coco_obj_snowboard, - coco_obj_sports_ball, - coco_obj_kite, - coco_obj_baseball_bat, - coco_obj_baseball_glove, - coco_obj_skateboard, - coco_obj_surfboard, - coco_obj_tennis_racket, - coco_obj_bottle, - coco_obj_wine_glass, - coco_obj_cup, - coco_obj_fork, - coco_obj_knife, - coco_obj_spoon, - coco_obj_bowl, - coco_obj_banana, - coco_obj_apple, - coco_obj_sandwich, - coco_obj_orange, - coco_obj_broccoli, - coco_obj_carrot, - coco_obj_hot_dog, - coco_obj_pizza, - coco_obj_donut, - coco_obj_cake, - coco_obj_chair, - coco_obj_sofa, - coco_obj_pottedplant, - coco_obj_bed, - coco_obj_diningtable, - coco_obj_toilet, - coco_obj_tvmonitor, - coco_obj_laptop, - coco_obj_mouse, - coco_obj_remote, - coco_obj_keyboard, - coco_obj_cell_phone, - coco_obj_microwave, - coco_obj_oven, - coco_obj_toaster, - coco_obj_sink, - coco_obj_refrigerator, - coco_obj_book, - coco_obj_clock, - coco_obj_vase, - coco_obj_scissors, - coco_obj_teddy_bear, - coco_obj_hair_drier, - coco_obj_toothbrush -}; - -typedef int objtype_t; -constexpr objtype_t bad_type = -1; - -/// -class TypeConverter -{ -public: - /// - static std::string Type2Str(objtype_t type) - { - return (type == bad_type) ? m_badTypeName : m_typeNames[type]; - } - - /// - static objtype_t Str2Type(const std::string& str) - { - for (size_t i = 0; i < m_typeNames.size(); ++i) - { - if (str == m_typeNames[i]) - return static_cast(i); - } - m_typeNames.emplace_back(str); - return static_cast(m_typeNames.size()) - 1; - } - -private: - static std::vector m_typeNames; - static std::string m_badTypeName; -}; diff --git a/src/mtracking/defines.h b/src/mtracking/defines.h new file mode 100644 index 000000000..245fc0a2b --- /dev/null +++ b/src/mtracking/defines.h @@ -0,0 +1,461 @@ +#pragma once + +#include +#include +#include + +#ifdef HAVE_EXPERIMENTAL_FILESYSTEM +#include +namespace fs = std::experimental::filesystem; +#else +#include +namespace fs = std::filesystem; +#endif + +#include +#include "object_types.h" + +// --------------------------------------------------------------------------- +// +// --------------------------------------------------------------------------- +typedef float track_t; +typedef cv::Point_ Point_t; +#define El_t CV_32F +#define Mat_t CV_32FC + +typedef std::vector assignments_t; +typedef std::vector distMatrix_t; + +typedef std::chrono::time_point time_point_t; + +/// +template +class TrackID +{ +public: + typedef T value_type; + + TrackID() = default; + TrackID(value_type val) + : m_val(val) + { + } + + bool operator==(const TrackID& id) const + { + return m_val == id.m_val; + } + + std::string ID2Str() const + { + return std::to_string(m_val); + } + static TrackID Str2ID(const std::string& id) + { + return TrackID(std::stoi(id)); + } + TrackID NextID() const + { + return TrackID(m_val + 1); + } + size_t ID2Module(size_t module) const + { + return m_val % module; + } + + value_type m_val{ 0 }; +}; + +typedef TrackID track_id_t; +namespace std +{ + template <> + struct hash + { + std::size_t operator()(const track_id_t& k) const + { + return std::hash()(k.m_val); + } + }; + +} + +/// +/// \brief config_t +/// +typedef std::multimap config_t; + +/// +/// \brief The CRegion class +/// +class CRegion +{ +public: + /// + CRegion() = default; + + /// + CRegion(const cv::Rect& rect) noexcept + : m_brect(rect) + { + B2RRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect) noexcept + : m_rrect(rrect) + { + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + R2BRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect, objtype_t type, float confidence) noexcept + : m_type(type), m_rrect(rrect), m_confidence(confidence) + { + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + R2BRect(); + } + + /// + CRegion(const cv::RotatedRect& rrect, const cv::Rect& brect, objtype_t type, float confidence, const cv::Mat& boxMask) noexcept + : m_type(type), m_rrect(rrect), m_brect(brect), m_confidence(confidence) + { + m_boxMask = boxMask; + + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + + if (!m_boxMask.empty() && m_boxMask.size() != m_brect.size()) + { + m_brect.width = m_boxMask.cols; + m_brect.height = m_boxMask.rows; + } + } + + /// + CRegion(const cv::Rect& brect, objtype_t type, float confidence) noexcept + : m_type(type), m_brect(brect), m_confidence(confidence) + { + B2RRect(); + } + + objtype_t m_type = bad_type; + cv::RotatedRect m_rrect; + cv::Rect m_brect; + track_t m_confidence = -1; + cv::Mat m_boxMask; + +private: + /// + /// \brief R2BRect + /// \return + /// + cv::Rect R2BRect() noexcept + { + m_brect = m_rrect.boundingRect(); + return m_brect; + } + /// + /// \brief B2RRect + /// \return + /// + cv::RotatedRect B2RRect() noexcept + { + m_rrect = cv::RotatedRect(m_brect.tl(), cv::Point2f(static_cast(m_brect.x + m_brect.width), static_cast(m_brect.y)), m_brect.br()); + if (m_rrect.size.width < 1) + m_rrect.size.width = 1; + if (m_rrect.size.height < 1) + m_rrect.size.height = 1; + return m_rrect; + } +}; + +typedef std::vector regions_t; + +/// +/// \brief sqr +/// \param val +/// \return +/// +template inline +T sqr(T val) +{ + return val * val; +} + +/// +/// \brief get_lin_regress_params +/// \param in_data +/// \param start_pos +/// \param in_data_size +/// \param kx +/// \param bx +/// \param ky +/// \param by +/// +template +void get_lin_regress_params( + const CONT& in_data, + size_t start_pos, + size_t in_data_size, + T& kx, T& bx, T& ky, T& by) +{ + T m1(0.), m2(0.); + T m3_x(0.), m4_x(0.); + T m3_y(0.), m4_y(0.); + + const T el_count = static_cast(in_data_size - start_pos); + for (size_t i = start_pos; i < in_data_size; ++i) + { + m1 += i; + m2 += sqr(i); + + m3_x += in_data[i].x; + m4_x += i * in_data[i].x; + + m3_y += in_data[i].y; + m4_y += i * in_data[i].y; + } + T det_1 = 1 / (el_count * m2 - sqr(m1)); + + m1 *= -1; + + kx = det_1 * (m1 * m3_x + el_count * m4_x); + bx = det_1 * (m2 * m3_x + m1 * m4_x); + + ky = det_1 * (m1 * m3_y + el_count * m4_y); + by = det_1 * (m2 * m3_y + m1 * m4_y); +} + +/// +/// \brief sqr: Euclid distance between two points +/// \param val +/// \return +/// +template inline +T distance(const POINT_TYPE& p1, const POINT_TYPE& p2) +{ + return sqrt((T)(sqr(p2.x - p1.x) + sqr(p2.y - p1.y))); +} + +/// +/// \brief Clamp: Fit rectangle to frame +/// \param rect +/// \param size +/// \return +/// +inline cv::Rect Clamp(cv::Rect rect, const cv::Size& size) +{ + if (rect.x < 0) + { + rect.width = std::min(rect.width, size.width - 1); + rect.x = 0; + } + else if (rect.x + rect.width >= size.width) + { + rect.x = std::max(0, size.width - rect.width - 1); + rect.width = std::min(rect.width, size.width - 1); + } + if (rect.y < 0) + { + rect.height = std::min(rect.height, size.height - 1); + rect.y = 0; + } + else if (rect.y + rect.height >= size.height) + { + rect.y = std::max(0, size.height - rect.height - 1); + rect.height = std::min(rect.height, size.height - 1); + } + return rect; +} + +/// +/// \brief SaveMat +/// \param m +/// \param name +/// \param path +/// +inline bool SaveMat(const cv::Mat& m, std::string prefix, const std::string& ext, const std::string& savePath, bool compressToImage) +{ + bool res = true; + + std::map depthDict; + depthDict.emplace(CV_8U, "uint8"); + depthDict.emplace(CV_8S, "int8"); + depthDict.emplace(CV_16U, "uint16"); + depthDict.emplace(CV_16S, "int16"); + depthDict.emplace(CV_32S, "int32"); + depthDict.emplace(CV_32F, "float32"); + depthDict.emplace(CV_64F, "float64"); + depthDict.emplace(CV_16F, "float16"); + + auto depth = depthDict.find(m.depth()); + if (depth == std::end(depthDict)) + { + std::cout << "File " << prefix << " has a unknown depth: " << m.depth() << std::endl; + res = false; + return res; + } + assert(depth != std::end(depthDict)); + + fs::path fullPath(savePath); + fullPath.append(prefix + "_" + std::to_string(m.cols) + "x" + std::to_string(m.rows) + "_" + depth->second + "_C" + std::to_string(m.channels()) + ext); + prefix = fullPath.generic_string(); + + if (compressToImage) + { + res = cv::imwrite(prefix, m); + } + else + { + FILE* f = 0; +#ifdef _WIN32 + fopen_s(&f, prefix.c_str(), "wb"); +#else + f = fopen(prefix.c_str(), "wb"); +#endif // _WIN32 + res = f != 0; + if (res) + { + for (int y = 0; y < m.rows; ++y) + { + fwrite(m.ptr(y), 1, m.cols * m.elemSize(), f); + } + fclose(f); + std::cout << "File " << prefix << " was writed" << std::endl; + } + } + if (res) + std::cout << "File " << prefix << " was writed" << std::endl; + else + std::cout << "File " << prefix << " can not be opened!" << std::endl; + return res; +} + +/// +/// \brief DrawFilledRect +/// +inline void DrawFilledRect(cv::Mat& frame, const cv::Rect& rect, cv::Scalar cl, int alpha) +{ + if (alpha) + { + const int alpha_1 = 255 - alpha; + const int nchans = frame.channels(); + int color[3] = { cv::saturate_cast(cl[0]), cv::saturate_cast(cl[1]), cv::saturate_cast(cl[2]) }; + for (int y = std::max(0, rect.y); y < std::min(rect.y + rect.height, frame.rows - 1); ++y) + { + uchar* ptr = frame.ptr(y) + nchans * rect.x; + for (int x = std::max(0, rect.x); x < std::min(rect.x + rect.width, frame.cols - 1); ++x) + { + for (int i = 0; i < nchans; ++i) + { + ptr[i] = cv::saturate_cast((alpha_1 * ptr[i] + alpha * color[i]) / 255); + } + ptr += nchans; + } + } + } + else + { + cv::rectangle(frame, rect, cl, cv::FILLED); + } +} + +/// +/// +/// +namespace tracking +{ +/// +/// \brief The Detectors enum +/// +enum Detectors +{ + Motion_VIBE = 0, + Motion_MOG = 1, + Motion_GMG = 2, + Motion_CNT = 3, + Motion_MOG2 = 4, + ONNX_TensorRT = 5, + DNN_OCV = 6, + DetectorsCount +}; + +/// +/// \brief The TrackerTemplate enum +/// +enum TrackerTemplate +{ + UniversalTracker = 0, + ByteTrack = 1 +}; + +/// +/// \brief The DistType enum +/// +enum DistType +{ + DistCenters, // Euclidean distance between centers, [0, 1] + DistRects, // Euclidean distance between bounding rectangles, [0, 1] + DistJaccard, // Intersection over Union, IoU, [0, 1] + DistHist, // Bhatacharia distance between histograms, [0, 1] + DistFeatureCos, // Cosine distance between embeddings, [0, 1] + DistMahalanobis, // Mahalanobis: https://ww2.mathworks.cn/help/vision/ug/motion-based-multiple-object-tracking.html + DistsCount +}; + +/// +/// \brief The FilterGoal enum +/// +enum FilterGoal +{ + FilterCenter, + FilterRect, + FilterRRect, + FiltersCount +}; + +/// +/// \brief The KalmanType enum +/// +enum KalmanType +{ + KalmanLinear, + KalmanUnscented, + KalmanAugmentedUnscented, + KalmanCount +}; + +/// +/// \brief The MatchType enum +/// +enum MatchType +{ + MatchHungrian, + MatchLAPJV, + MatchCount +}; + +/// +/// \brief The LostTrackType enum +/// +enum LostTrackType +{ + TrackNone, + TrackKCF, + TrackCSRT, + TrackDaSiamRPN, + TrackNano, + TrackVit, + SingleTracksCount +}; +} diff --git a/src/common/nms.h b/src/mtracking/nms.h similarity index 96% rename from src/common/nms.h rename to src/mtracking/nms.h index 4a3051fdd..08f28b2a5 100644 --- a/src/common/nms.h +++ b/src/mtracking/nms.h @@ -142,15 +142,15 @@ inline void nms2(const std::vector& srcRects, * @param thresh * @param neighbors */ -template +template inline void nms3(const std::vector& srcRects, std::vector& resRects, - float thresh, + T thresh, GET_RECT_FUNC GetRect, GET_SCORE_FUNC GetScore, GET_TYPE_FUNC GetType, int neighbors = 0, - float minScoresSum = 0.f) + T minScoresSum = 0) { resRects.clear(); @@ -159,7 +159,7 @@ inline void nms3(const std::vector& srcRects, return; // Sort the bounding boxes by the detection score - std::multimap idxs; + std::multimap idxs; for (size_t i = 0; i < size; ++i) { idxs.emplace(GetScore(srcRects[i]), i); @@ -175,7 +175,7 @@ inline void nms3(const std::vector& srcRects, auto type1 = GetType(srcRects[lastPos]); int neigborsCount = 0; - float scoresSum = lastElem->first; + T scoresSum = lastElem->first; idxs.erase(lastElem); diff --git a/src/mtracking/object_types.cpp b/src/mtracking/object_types.cpp new file mode 100644 index 000000000..8484723c8 --- /dev/null +++ b/src/mtracking/object_types.cpp @@ -0,0 +1,89 @@ +#include "object_types.h" + +std::vector TypeConverter::m_typeNames = +{ + "person", + "bicycle", + "car", + "motorbike", + "aeroplane", + "bus", + "train", + "truck", + "boat", + "traffic_light", + "fire_hydrant", + "stop_sign", + "parking_meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports_ball", + "kite", + "baseball_bat", + "baseball_glove", + "skateboard", + "surfboard", + "tennis_racket", + "bottle", + "wine_glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot_dog", + "pizza", + "donut", + "cake", + "chair", + "sofa", + "pottedplant", + "bed", + "diningtable", + "toilet", + "tvmonitor", + "laptop", + "mouse", + "remote", + "keyboard", + "cell_phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy_bear", + "hair_drier", + "toothbrush", + "vehicle", + "face" +}; + +std::string TypeConverter::m_badTypeName = "unknown"; diff --git a/src/mtracking/object_types.h b/src/mtracking/object_types.h new file mode 100644 index 000000000..25f78a5e7 --- /dev/null +++ b/src/mtracking/object_types.h @@ -0,0 +1,58 @@ +#pragma once +#include +#include +#include + +typedef int objtype_t; +constexpr objtype_t bad_type = -1; + +/// +class TypeConverter +{ +public: + /// + static std::string Type2Str(objtype_t type) + { + return (type == bad_type) ? m_badTypeName : m_typeNames[(size_t)type]; + } + + /// + static objtype_t Str2Type(const std::string& typeName) + { + for (size_t i = 0; i < m_typeNames.size(); ++i) + { + if (typeName == m_typeNames[i]) + { + //std::cout << "Str2Type: " << typeName << " exist: " << i << std::endl; + return static_cast(i); + } + } + m_typeNames.emplace_back(typeName); + //std::cout << "Str2Type: " << typeName << " new: " << (m_typeNames.size()) - 1 << std::endl; + return static_cast(m_typeNames.size()) - 1; + } + + static bool AddNewType(const std::string& typeName) + { + for (size_t i = 0; i < m_typeNames.size(); ++i) + { + if (typeName == m_typeNames[i]) + { + //std::cout << "AddNewType: " << typeName << ": false" << std::endl; + return false; + } + } + m_typeNames.emplace_back(typeName); + //std::cout << "AddNewType: " << typeName << ": " << (m_typeNames.size() - 1) << std::endl; + return true; + } + + static size_t TypesCount() + { + return m_typeNames.size(); + } + +private: + static std::vector m_typeNames; + static std::string m_badTypeName; +}; diff --git a/src/python_bind/mtracker.cpp b/src/python_bind/mtracker.cpp new file mode 100644 index 000000000..c9aa731b2 --- /dev/null +++ b/src/python_bind/mtracker.cpp @@ -0,0 +1,431 @@ +#include +#include +#include +#include +#include +#include + +#include + +#include "../mtracking/defines.h" +#include "../Tracker/Ctracker.h" +#include "../Detector/BaseDetector.h" +#include "../Detector/MotionDetector.h" + +namespace py = pybind11; + +PYBIND11_MAKE_OPAQUE(std::multimap) +PYBIND11_MAKE_OPAQUE(std::map) +PYBIND11_MAKE_OPAQUE(std::map) + +#include +#include +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#include + +#if PY_VERSION_HEX >= 0x03000000 + #define PyInt_Check PyLong_Check + #define PyInt_AsLong PyLong_AsLong +#endif + +namespace pybind11 { namespace detail{ +template<> +struct type_caster{ +public: + PYBIND11_TYPE_CASTER(cv::Mat, _("numpy.ndarray")); + + //! 1. cast numpy.ndarray to cv::Mat + bool load(handle obj, bool) + { + array b = reinterpret_borrow(obj); + buffer_info info = b.request(); + + //const int ndims = (int)info.ndim; + int nh = 1; + int nw = 1; + int nc = 1; + int ndims = static_cast(info.ndim); + if(ndims == 2){ + nh = static_cast(info.shape[0]); + nw = static_cast(info.shape[1]); + } else if(ndims == 3){ + nh = static_cast(info.shape[0]); + nw = static_cast(info.shape[1]); + nc = static_cast(info.shape[2]); + }else{ + char msg[64]; + std::sprintf(msg, "Unsupported dim %d, only support 2d, or 3-d", ndims); + throw std::logic_error(msg); + return false; + } + + int dtype; + if(info.format == format_descriptor::format()){ + dtype = CV_8UC(nc); + }else if (info.format == format_descriptor::format()){ + dtype = CV_32SC(nc); + }else if (info.format == format_descriptor::format()){ + dtype = CV_32FC(nc); + }else{ + throw std::logic_error("Unsupported type, only support uchar, int32, float"); + return false; + } + + value = cv::Mat(nh, nw, dtype, info.ptr); + return true; + } + + //! 2. cast cv::Mat to numpy.ndarray + static handle cast(const cv::Mat& mat, return_value_policy, handle /*defval*/) + { + std::string format = format_descriptor::format(); + size_t elemsize = sizeof(unsigned char); + int nw = mat.cols; + int nh = mat.rows; + int nc = mat.channels(); + int depth = mat.depth(); + int type = mat.type(); + int dim = (depth == type)? 2 : 3; + + if(depth == CV_8U){ + format = format_descriptor::format(); + elemsize = sizeof(unsigned char); + }else if(depth == CV_32S){ + format = format_descriptor::format(); + elemsize = sizeof(int); + }else if(depth == CV_32F){ + format = format_descriptor::format(); + elemsize = sizeof(float); + }else{ + throw std::logic_error("Unsupport type, only support uchar, int32, float"); + } + + std::vector bufferdim; + std::vector strides; + if (dim == 2) { + bufferdim = {(size_t) nh, (size_t) nw}; + strides = {elemsize * (size_t) nw, elemsize}; + } else if (dim == 3) { + bufferdim = {(size_t) nh, (size_t) nw, (size_t) nc}; + strides = {(size_t) elemsize * nw * nc, (size_t) elemsize * nc, (size_t) elemsize}; + } + return array(buffer_info( mat.data, elemsize, format, dim, bufferdim, strides )).release(); + } +}; + +template +struct type_caster>{ + + PYBIND11_TYPE_CASTER(cv::Size_, _("tuple_wh")); + + bool load(handle obj, bool) + { + if(!py::isinstance(obj)) + { + std::logic_error("Size(w, h) should be a tuple!"); + return false; + } + + py::tuple pt = reinterpret_borrow(obj); + if (pt.size() != 2) + { + std::logic_error("Size(w, h) tuple should be size of 2"); + return false; + } + + value = cv::Size(static_cast(pt[0].cast()), static_cast(pt[1].cast())); + return true; + } + + static handle cast(const cv::Size_& sz, return_value_policy, handle) + { + return py::make_tuple(sz.width, sz.height).release(); + } +}; + +template<> +struct type_caster{ + + PYBIND11_TYPE_CASTER(cv::Point, _("tuple_xy")); + + bool load(handle obj, bool) + { + if (!py::isinstance(obj)) + { + std::logic_error("Point(x,y) should be a tuple!"); + return false; + } + + py::tuple pt = reinterpret_borrow(obj); + if(pt.size()!=2) + { + std::logic_error("Point(x,y) tuple should be size of 2"); + return false; + } + + value = cv::Point(pt[0].cast(), pt[1].cast()); + return true; + } + + static handle cast(const cv::Point& pt, return_value_policy, handle) + { + return py::make_tuple(pt.x, pt.y).release(); + } +}; + + +}}//! end namespace pybind11::detail + + +/// +class PyDetector : public BaseDetector +{ +public: + using BaseDetector::BaseDetector; + + PyDetector() + { + std::cout << "PyDetector" << std::endl; + } + + bool Init(const config_t& config) override + { + PYBIND11_OVERLOAD_PURE(bool, BaseDetector, Init, config); + } + + void DetectMat(cv::Mat frame) override + { + PYBIND11_OVERLOAD(void, BaseDetector, DetectMat, frame); + } + + bool CanGrayProcessing() const override + { + PYBIND11_OVERLOAD_PURE(bool, BaseDetector, CanGrayProcessing, ); + } +}; + +/// +class PyMotionDetector : public MotionDetector +{ +public: + using MotionDetector::MotionDetector; + + bool Init(const config_t& config) override + { + PYBIND11_OVERLOAD(bool, MotionDetector, Init, config); + } + + bool CanGrayProcessing() const override + { + PYBIND11_OVERLOAD(bool, MotionDetector, CanGrayProcessing, ); + } +}; + +/// +cv::Mat read_image(std::string image_name) +{ + return cv::imread(image_name, cv::IMREAD_COLOR); +} + +/// +void show_image(cv::Mat image) +{ +#ifndef SILENT_WORK + cv::imshow("image_from_Cpp", image); + cv::waitKey(0); +#endif +} + +/// +cv::Mat passthru(cv::Mat image) +{ + return image; +} + +/// +cv::Mat cloneimg(cv::Mat image) +{ + return image.clone(); +} + +class PyBaseTracker : public BaseTracker +{ +public: + using BaseTracker::BaseTracker; + + void UpdateMat(const regions_t& regions, cv::Mat currFrame, float fps) override + { + PYBIND11_OVERLOAD_PURE(void, BaseTracker, Update, regions, currFrame, fps); + } + + std::vector GetTracksCopy() const override + { + PYBIND11_OVERLOAD(std::vector, BaseTracker, GetTracksCopy); + } + + void GetRemovedTracks(std::vector& trackIDs) const override + { + PYBIND11_OVERLOAD_PURE(void, BaseTracker, GetRemovedTracks, trackIDs); + } +}; + +/// +PYBIND11_MODULE(pymtracking, m) +{ + m.doc() = R"pbdoc( + mtracking library + ----------------------- + .. currentmodule:: pymtracking + .. autosummary:: + :toctree: _generate + add + subtract + )pbdoc"; + + py::class_(m, "KeyVal") + .def(py::init<>()) + .def("Add", &KeyVal::Add); + + py::bind_map>(m, "MapStringString"); + py::bind_map>(m, "MapStringDouble"); + + py::class_(m, "TrackerSettings") + .def(py::init<>()) + .def("CheckDistance", &TrackerSettings::CheckDistance) + .def("SetDistances", &TrackerSettings::SetDistances) + .def("SetDistance", &TrackerSettings::SetDistance) + .def("AddNearTypes", &TrackerSettings::AddNearTypes) + .def("CheckType", &TrackerSettings::CheckType) + .def_readwrite("kalmanType", &TrackerSettings::m_kalmanType) + .def_readwrite("filterGoal", &TrackerSettings::m_filterGoal) + .def_readwrite("lostTrackType", &TrackerSettings::m_lostTrackType) + .def_readwrite("matchType", &TrackerSettings::m_matchType) + .def_readwrite("dt", &TrackerSettings::m_dt) + .def_readwrite("accelNoiseMag", &TrackerSettings::m_accelNoiseMag) + .def_readwrite("useAcceleration", &TrackerSettings::m_useAcceleration) + .def_readwrite("distThres", &TrackerSettings::m_distThres) + .def_readwrite("minAreaRadiusPix", &TrackerSettings::m_minAreaRadiusPix) + .def_readwrite("minAreaRadiusK", &TrackerSettings::m_minAreaRadiusK) + .def_readwrite("maximumAllowedLostTime", &TrackerSettings::m_maximumAllowedLostTime) + .def_readwrite("maxTraceLength", &TrackerSettings::m_maxTraceLength) + .def_readwrite("useAbandonedDetection", &TrackerSettings::m_useAbandonedDetection) + .def_readwrite("minStaticTime", &TrackerSettings::m_minStaticTime) + .def_readwrite("maxStaticTime", &TrackerSettings::m_maxStaticTime); + + py::class_(m, "CvRect") + .def(py::init<>()) + .def_readwrite("x", &cv::Rect::x) + .def_readwrite("y", &cv::Rect::y) + .def_readwrite("width", &cv::Rect::width) + .def_readwrite("height", &cv::Rect::height); + + py::class_(m, "CvRRect") + .def(py::init<>()) + .def("brect", &cv::RotatedRect::boundingRect); + + py::class_(m, "CRegion") + .def(py::init<>()) + .def_readwrite("brect", &CRegion::m_brect) + .def_readwrite("type", &CRegion::m_type) + .def_readwrite("confidence", &CRegion::m_confidence); + + py::class_(m, "TrackingObject") + .def(py::init<>()) + .def("IsRobust", &TrackingObject::IsRobust) + .def("GetTrajectory", &TrackingObject::GetTrajectory) + .def("GetBoundingRect", &TrackingObject::GetBoundingRect) + .def_readwrite("rrect", &TrackingObject::m_rrect) + .def_readwrite("ID", &TrackingObject::m_ID) + .def_readwrite("isStatic", &TrackingObject::m_isStatic) + .def_readwrite("outOfTheFrame", &TrackingObject::m_outOfTheFrame) + .def_readwrite("type", &TrackingObject::m_type) + .def_readwrite("confidence", &TrackingObject::m_confidence) + .def_readwrite("velocity", &TrackingObject::m_velocity); + + py::class_ mtracker(m, "MTracker"); + mtracker.def(py::init(&BaseTracker::CreateTracker)); + mtracker.def("Update", &BaseTracker::UpdateMat); + mtracker.def("GetTracks", &BaseTracker::GetTracksCopy); + + py::enum_(mtracker, "DistType") + .value("DistCenters", tracking::DistType::DistCenters) + .value("DistRects", tracking::DistType::DistRects) + .value("DistJaccard", tracking::DistType::DistJaccard) + .export_values(); + + py::enum_(mtracker, "FilterGoal") + .value("FilterCenter", tracking::FilterGoal::FilterCenter) + .value("FilterRect", tracking::FilterGoal::FilterRect) + .export_values(); + + py::enum_(mtracker, "KalmanType") + .value("KalmanLinear", tracking::KalmanType::KalmanLinear) + .value("KalmanUnscented", tracking::KalmanType::KalmanUnscented) + .value("KalmanAugmentedUnscented", tracking::KalmanType::KalmanAugmentedUnscented) + .export_values(); + + py::enum_(mtracker, "MatchType") + .value("MatchHungrian", tracking::MatchType::MatchHungrian) + .value("MatchLAPJV", tracking::MatchType::MatchLAPJV) + .export_values(); + + py::enum_(mtracker, "LostTrackType") + .value("TrackNone", tracking::LostTrackType::TrackNone) + .value("TrackKCF", tracking::LostTrackType::TrackKCF) + .value("TrackCSRT", tracking::LostTrackType::TrackCSRT) + .export_values(); + + py::class_ base_detector(m, "BaseDetector"); + base_detector.def(py::init(&BaseDetector::CreateDetectorKV)); + base_detector.def("Init", &BaseDetector::Init); + base_detector.def("Detect", &BaseDetector::DetectMat); + base_detector.def("ResetModel", &BaseDetector::ResetModel); + base_detector.def("CanGrayProcessing", &BaseDetector::CanGrayProcessing); + base_detector.def("SetMinObjectSize", &BaseDetector::SetMinObjectSize); + base_detector.def("GetDetects", &BaseDetector::GetDetects); + base_detector.def("CalcMotionMap", &BaseDetector::CalcMotionMap); + + py::class_ mdetector(m, "MotionDetector"); + mdetector.def(py::init()); + mdetector.def("Init", &MotionDetector::Init); + mdetector.def("Detect", &MotionDetector::Detect); + mdetector.def("ResetModel", &MotionDetector::ResetModel); + mdetector.def("CanGrayProcessing", &MotionDetector::CanGrayProcessing); + mdetector.def("SetMinObjectSize", &MotionDetector::SetMinObjectSize); + mdetector.def("GetDetects", &MotionDetector::GetDetects); + mdetector.def("CalcMotionMap", &MotionDetector::CalcMotionMap); + + py::enum_(mdetector, "BGFG_ALGS") + .value("VIBE", BackgroundSubtract::BGFG_ALGS::ALG_VIBE) + .value("MOG", BackgroundSubtract::BGFG_ALGS::ALG_MOG) + .value("GMG", BackgroundSubtract::BGFG_ALGS::ALG_GMG) + .value("CNT", BackgroundSubtract::BGFG_ALGS::ALG_CNT) + .value("MOG2", BackgroundSubtract::BGFG_ALGS::ALG_MOG2) + .export_values(); + + py::enum_(base_detector, "Detectors") + .value("VIBE", tracking::Detectors::Motion_VIBE) + .value("MOG", tracking::Detectors::Motion_MOG) + .value("GMG", tracking::Detectors::Motion_GMG) + .value("CNT", tracking::Detectors::Motion_CNT) + .value("MOG2", tracking::Detectors::Motion_MOG2) + .value("Yolo_TensorRT", tracking::Detectors::ONNX_TensorRT) + .value("DNN_OCV", tracking::Detectors::DNN_OCV) + .export_values(); + + m.def("read_image", &read_image, "A function that read an image", + py::arg("image")); + + m.def("show_image", &show_image, "A function that show an image", + py::arg("image")); + + m.def("passthru", &passthru, "Passthru function", py::arg("image")); + m.def("clone", &cloneimg, "Clone function", py::arg("image")); + +#define VERSION_INFO "1.0.1" +#ifdef VERSION_INFO + m.attr("__version__") = VERSION_INFO; +#else + m.attr("__version__") = "dev"; +#endif +} diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index aad340af4..ddef494c4 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -1 +1,7 @@ add_subdirectory(inih) +#add_subdirectory(Circular_Code) +add_subdirectory(spdlog) + +if (USE_CLIP) + add_subdirectory(ruclip) +endif(USE_CLIP) diff --git a/thirdparty/Circular_Code/CircHelper.h b/thirdparty/Circular_Code/CircHelper.h new file mode 100644 index 000000000..4e23696c1 --- /dev/null +++ b/thirdparty/Circular_Code/CircHelper.h @@ -0,0 +1,62 @@ +// ========================================================================== +// Copyright (C) 2011 Lior Kogan (koganlior1@gmail.com) +// ========================================================================== + +#pragma once + +// ========================================================================== +static const double _2Pi= 6.2831853071795864769252867665590057683943387987502116419498891846156328125724179972560696; + +// ========================================================================== +// square (x*x) +template +T Sqr(const T& x) +{ + return x*x; +} + +// ========================================================================== +// Floating-point modulo +// The result (the remainder) has the same sign as the divisor. +// Similar to matlab's mod(); Not similar to fmod() - Mod(-3,4)= 1 fmod(-3,4)= -3 +template +T Mod(T x, T y) +{ + static_assert(!std::numeric_limits::is_exact , "Mod: floating-point type expected"); + + if (0. == y) + return x; + + double m= x - y * floor(x/y); + + // handle boundary cases resulting from floating-point limited accuracy: + + if (y > 0) // modulo range: [0..y) + { + if (m>=y) // Mod(-1e-16 , 360. ): m= 360. + return 0; + + if (m<0 ) + { + if (y+m == y) + return 0 ; // just in case... + else + return y+m; // Mod(106.81415022205296 , _TWO_PI ): m= -1.421e-14 + } + } + else // modulo range: (y..0] + { + if (m<=y) // Mod(1e-16 , -360. ): m= -360. + return 0; + + if (m>0 ) + { + if (y+m == y) + return 0 ; // just in case... + else + return y+m; // Mod(-106.81415022205296, -_TWO_PI): m= 1.421e-14 + } + } + + return m; +} diff --git a/thirdparty/Circular_Code/CircStat.h b/thirdparty/Circular_Code/CircStat.h new file mode 100644 index 000000000..bb413c29d --- /dev/null +++ b/thirdparty/Circular_Code/CircStat.h @@ -0,0 +1,427 @@ +// ========================================================================== +// Copyright (C) 2011 Lior Kogan (koganlior1@gmail.com) +// ========================================================================== +// classes defined here: +// CircAverage - calculate average set of circular-values +// WeightedCircAverage - calculate weighted-average set of circular-values +// CAvrgSampledCircSignal - estimate the average of a sampled continuous-time circular signal, using circular linear interpolation +// CircMedian - calculate median set of circular-values +// ========================================================================== + +#pragma once + +#include +#include +#include +#include +#include +#include // sort + +// ========================================================================== +// calculate average set of circular-values +// return set of average values +// T is a circular value type defined with the CircValTypeDef macro +template +std::set> CircAverage(std::vector> const& A) +{ + std::set> MinAvrgVals ; // results set + + // ---------------------------------------------- + // all vars: UnsignedDegRange [0,360) + double fSum = 0.; // of all elements of A + double fSumSqr = 0.; // of all elements of A + double fMinSumSqrDiff ; // minimal sum of squares of differences + std::vector LowerAngles ; // ascending [ 0,180) + std::vector UpperAngles ; // descending (360,180) + double fTestAvrg ; + + // ---------------------------------------------- + // local functions - implemented as lambdas + // ---------------------------------------------- + + // calc sum(dist(180, Bi)^2) - all values are in set B + // dist(180,Bi)= |180-Bi| + // sum(dist(x, Bi)^2) = sum((180-Bi)^2) = sum(180^2-2*180*Bi + Bi^2) = 180^2*A.size - 360*sum(Ai) + sum(Ai^2) + auto SumSqr = [&]() -> double + { + return 32400.*A.size() - 360.*fSum + fSumSqr; + }; + + // calc sum(dist(x, Ai)^2). A=B+C; set D is empty + // dist(x,Bi)= |x-Bi| + // dist(x,Ci)= 360-(Ci-x) + // sum(dist(x, Bi)^2)= sum( (x-Bi) ^2)= sum( Bi^2 + x^2 - 2*Bi*x) + // sum(dist(x, Ci)^2)= sum((360-(Ci-x))^2)= sum(360^2 + Ci^2 + x^2 - 2*360*Ci + 2*360*x - 2*Ci*x) + // sum(dist(x, Bi)^2) + sum(dist(x, Ci)^2) = nCountC*360^2 + sum(Ai^2) + nCountA*x^2 - 2*360*sum(Ci) + nCountC*2*360*x - 2*x*sum(Ai) + auto SumSqrC= [&](double x, size_t nCountC, double fSumC) -> double + { + return x*(A.size()*x - 2*fSum) + fSumSqr - 2*360.*fSumC + nCountC*( 2*360.*x + 360.*360.); + }; + + // calc sum(dist(x, Ai)^2). A=B+D; set C is empty + // dist(x,Bi)= |x-Bi| + // dist(x,Di)= 360-(x-Di) + // sum(dist(x,Bi)^2)= sum( (x-Bi)^2)= sum( Bi^2 + x^2 - 2*Bi*x) + // sum(dist(x,Di)^2)= sum(360-(x-Di)^2)= sum(360^2 + Di^2 + x^2 + 2*360*Di - 2*360*x - 2*Di*x) + // sum(dist(x, Bi)^2) + sum(dist(x, Di)^2) = nCountD*360^2 + sum(Ai^2) + nCountA*x^2 + 2*360*sum(Di) - nCountD*2*360*x - 2*x*sum(Ai) + auto SumSqrD= [&](double x, size_t nCountD, double fSumD) -> double + { + return x*(A.size()*x - 2*fSum) + fSumSqr + 2*360.*fSumD + nCountD*(-2*360.*x + 360.*360.); + }; + + // update MinAvrgAngles if lower/equal fMinSumSqrDiff found + auto TestSum= [&](double fTestAvrg, double fTestSumDiffSqr) -> void + { + if (fTestSumDiffSqr < fMinSumSqrDiff) + { + MinAvrgVals.clear(); + MinAvrgVals.insert(CircVal(fTestAvrg)); + fMinSumSqrDiff= fTestSumDiffSqr; + } + else if (fTestSumDiffSqr == fMinSumSqrDiff) + MinAvrgVals.insert(CircVal(fTestAvrg)); + }; + + // ---------------------------------------------- + for (const auto& a : A) + { + double v= CircVal(a); // convert to [0.360) + fSum += v ; + fSumSqr+= Sqr(v); + if (v < 180.) LowerAngles.push_back(v); + else if (v > 180.) UpperAngles.push_back(v); + } + + sort(LowerAngles.begin(), LowerAngles.end() ); // ascending [ 0,180) + sort(UpperAngles.begin(), UpperAngles.end(), std::greater()); // descending (360,180) + + // ---------------------------------------------- + // start with avrg= 180, sets c,d are empty + // ---------------------------------------------- + MinAvrgVals.clear(); + MinAvrgVals.insert(CircVal(180.)); + fMinSumSqrDiff= SumSqr(); + + // ---------------------------------------------- + // average in (180,360), set D: values in range [0,avrg-180) + // ---------------------------------------------- + double fLowerBound= 0.; // of current sector + double fSumD = 0.; // of elements of set D + + auto iter= LowerAngles.begin(); + for (size_t d= 0; d < LowerAngles.size(); ++d) + { + // 1st iteration : average in ( 180, lowerAngles[0]+180] + // next iterations: average in (lowerAngles[i-1]+180, lowerAngles[i]+180] + // set D : lowerAngles[0..d] + + fTestAvrg= (fSum + 360.*d)/A.size(); // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg > fLowerBound+180.) && (fTestAvrg <= *iter+180.)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrD(fTestAvrg, d, fSumD)); // check if fTestAvrg generates lower SumSqr + + fLowerBound= *iter ; + fSumD += fLowerBound; + ++iter; + } + + // last sector : average in [lowerAngles[lastIdx]+180, 360) + fTestAvrg= (fSum + 360.*LowerAngles.size())/A.size(); // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg < 360.) && (fTestAvrg > fLowerBound)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrD(fTestAvrg, LowerAngles.size(), fSumD)); // check if fTestAvrg generates lower SumSqr + + // ---------------------------------------------- + // average in [0,180); set C: values in range (avrg+180, 360) + // ---------------------------------------------- + double fUpperBound= 360.; // of current sector + double fSumC = 0.; // of elements of set C + + iter= UpperAngles.begin(); + for (size_t c= 0; c < UpperAngles.size(); ++c) + { + // 1st iteration : average in [upperAngles[0]-180, 360 ) + // next iterations: average in [upperAngles[i]-180, upperAngles[i-1]-180) + // set C : upperAngles[0..c] (descendingly sorted) + + fTestAvrg= (fSum - 360.*c)/A.size(); // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg >= *iter-180.) && (fTestAvrg < fUpperBound-180.)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrC(fTestAvrg, c, fSumC)); // check if fTestAvrg generates lower SumSqr + + fUpperBound= *iter ; + fSumC += fUpperBound; + ++iter; + } + + // last sector : average in [0, upperAngles[lastIdx]-180) + fTestAvrg= (fSum - 360.*UpperAngles.size())/A.size(); // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg >= 0.) && (fTestAvrg < fUpperBound)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrC(fTestAvrg, UpperAngles.size(), fSumC)); // check if fTestAvrg generates lower SumSqr + + // ---------------------------------------------- + return MinAvrgVals; +} + +// ========================================================================== +// calculate weighted-average set of circular-values +// return set of average values +// T is a circular value type defined with the CircValTypeDef macro +template +std::set> WeightedCircAverage(std::vector,double>> const& A) // vector +{ + std::set> MinAvrgVals ; // results set + + // ---------------------------------------------- + // all vars: UnsignedDegRange [0,360) + double fASumW = 0.; // sum(Wi ) of all elements of A + double fASumWA = 0.; // sum(Wi*Ai ) of all elements of A + double fASumWA2 = 0.; // sum(Wi*Ai^2) of all elements of A + double fMinSumSqrDiff ; // minimal sum of squares of differences + std::vector> LowerAngles; // ascending [ 0,180) + std::vector> UpperAngles; // descending (360,180) + double fTestAvrg ; + + // ---------------------------------------------- + // local functions - implemented as lambdas + // ---------------------------------------------- + + // calc sum(Wi*dist(180, Bi)^2) - all values are in set B + // dist(180,Bi)= |180-Bi| + // sum(Wi*dist(x, Bi)^2) = sum(Wi*(180-Bi)^2) = sum(Wi*(180^2-2*180*Bi + Bi^2)) = 180^2*fSumW - 360*sum(Wi*Ai) + sum(Wi*Ai^2) + auto SumSqr = [&]() -> double + { + return 32400.*fASumW - 360.*fASumWA + fASumWA2; + }; + + // calc sum(Wi*dist(x, Ai)^2). A=B+C; set D is empty + // dist(x,Bi)= |x-Bi| + // dist(x,Ci)= 360-(Ci-x) + // sum(Wi*dist(x,Bi)^2)= sum(Wi*( (x-Bi) ^2))= sum(Wi*( Bi^2 + x^2 - 2*Bi*x)) + + // sum(Wi*dist(x,Ci)^2)= sum(Wi*((360-(Ci-x))^2))= sum(Wi*(360^2 + Ci^2 + x^2 - 2*360*Ci + 2*360*x - 2*Ci*x)) + // ========================================================== + // sum(Wi*( Ai^2 + x^2 - 2*Ai*x)) + auto SumSqrC= [&](double x , + double fCSumW , // sum(Wi ) of all elements of C + double fCSumWC ) -> double // sum(Wi*Ci) of all elements of C + { + return fASumWA2 + x*x*fASumW -2*x*fASumWA - 720*fCSumWC + (129600+720*x)*fCSumW; + }; + + // calc sum(Wi*dist(x, Ai)^2). A=B+D; set C is empty + // dist(x,Bi)= |x-Bi| + // dist(x,Di)= 360-(x-Di) + // sum(Wi*dist(x,Bi)^2)= sum(Wi*( (x-Bi) ^2))= sum(Wi*( Bi^2 + x^2 - 2*Bi*x)) + // sum(Wi*dist(x,Di)^2)= sum(Wi*((360-(x-Di))^2))= sum(Wi*(360^2 + Di^2 + x^2 + 2*360*Di - 2*360*x - 2*Di*x)) + // ========================================================== + // sum(Wi*( Ai^2 + x^2 - 2*Ai*x)) + auto SumSqrD= [&](double x , + double fDSumW , // sum(Wi ) of all elements of D + double fDSumWD ) -> double // sum(Wi*Di) of all elements of D + { + return fASumWA2 + x*x*fASumW -2*x*fASumWA + 720*fDSumWD + (129600-720*x)*fDSumW; + }; + + // update MinAvrgAngles if lower/equal fMinSumSqrDiff found + auto TestSum= [&](double fTestAvrg, double fTestSumDiffSqr) -> void + { + if (fTestSumDiffSqr < fMinSumSqrDiff) + { + MinAvrgVals.clear(); + MinAvrgVals.insert(CircVal(fTestAvrg)); + fMinSumSqrDiff= fTestSumDiffSqr; + } + else if (fTestSumDiffSqr == fMinSumSqrDiff) + MinAvrgVals.insert(CircVal(fTestAvrg)); + }; + + // ---------------------------------------------- + for (const auto& a : A) + { + double v= CircVal(a.first); // convert to [0.360) + double w= a.second; // weight + fASumW += w ; + fASumWA+= w*v ; + fASumWA2= w*v*v; + + if (v < 180.) LowerAngles.push_back(std::pair(v,w)); + else if (v > 180.) UpperAngles.push_back(std::pair(v,w)); + } + + sort(LowerAngles.begin(), LowerAngles.end() ); // ascending [ 0,180) + sort(UpperAngles.begin(), UpperAngles.end(), std::greater>()); // descending (360,180) + + // ---------------------------------------------- + // start with avrg= 180, sets c,d are empty + // ---------------------------------------------- + MinAvrgVals.clear(); + MinAvrgVals.insert(CircVal(180.)); + fMinSumSqrDiff= SumSqr(); + + // ---------------------------------------------- + // average in (180,360), set D: values in range [0,avrg-180) + // ---------------------------------------------- + double fLowerBound= 0.; // of current sector + double fDSumW = 0.; // sum(Wi ) of all elements of D + double fDSumWD = 0.; // sum(Wi*Di) of all elements of D + + auto iter= LowerAngles.begin(); + for (size_t d= 0; d < LowerAngles.size(); ++d) + { + // 1st iteration : average in ( 180, lowerAngles[0]+180] + // next iterations: average in (lowerAngles[i-1]+180, lowerAngles[i]+180] + // set D : lowerAngles[0..d] + + fTestAvrg= (fASumWA + 360.*fDSumW)/fASumW; // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg > fLowerBound+180.) && (fTestAvrg <= (*iter).first+180.)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrD(fTestAvrg, fDSumW, fDSumWD)); // check if fTestAvrg generates lower SumSqr + + fLowerBound= (*iter).first ; + fDSumW += (*iter).second ; + fDSumWD += (*iter).second * (*iter).first; + ++iter; + } + + // last sector : average in [lowerAngles[lastIdx]+180, 360) + fTestAvrg= (fASumWA + 360.*fDSumW)/fASumW; // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg < 360.) && (fTestAvrg > fLowerBound)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrD(fTestAvrg, fDSumW, fDSumWD)); // check if fTestAvrg generates lower SumSqr + + // ---------------------------------------------- + // average in [0,180); set C: values in range (avrg+180, 360) + // ---------------------------------------------- + double fUpperBound= 360.; // of current sector + double fCSumW = 0.; // sum(Wi ) of all elements of C + double fCSumWC = 0.; // sum(Wi*Ci) of all elements of C + + iter= UpperAngles.begin(); + for (size_t c= 0; c < UpperAngles.size(); ++c) + { + // 1st iteration : average in [upperAngles[0]-180, 360 ) + // next iterations: average in [upperAngles[i]-180, upperAngles[i-1]-180) + // set C : upperAngles[0..c] (descendingly sorted) + + fTestAvrg= (fASumWA - 360.*fCSumW)/fASumW; // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg >= (*iter).first-180.) && (fTestAvrg < fUpperBound-180.)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrC(fTestAvrg, fCSumW, fCSumWC)); // check if fTestAvrg generates lower SumSqr + + fUpperBound= (*iter).first ; + fCSumW += (*iter).second ; + fCSumWC += (*iter).second * (*iter).first; + ++iter; + } + + // last sector : average in [0, upperAngles[lastIdx]-180) + fTestAvrg= (fASumWA - 360.*fCSumW)/fASumW; // average for sector, that minimizes SumDiffSqr + + if ((fTestAvrg >= 0.) && (fTestAvrg < fUpperBound)) // if fTestAvrg is within sector + TestSum(fTestAvrg, SumSqrC(fTestAvrg, fCSumW, fCSumWC)); // check if fTestAvrg generates lower SumSqr + + // ---------------------------------------------- + return MinAvrgVals; +} + +// ========================================================================== +// estimate the average of a sampled continuous-time circular signal, using circular linear interpolation +// T is a circular value type defined with the CircValTypeDef macro +template +class CAvrgSampledCircSignal +{ + size_t m_nSamples ; + CircVal m_fPrevVal ; // previous value + double m_fPrevTime; // previous time + std::vector, double>> m_Intervals; // vector of (avrg,weight) for each interval + +public: + CAvrgSampledCircSignal() + { + m_nSamples= 0; + } + + void AddMeasurement(CircVal fVal, double fTime) + { + if (m_nSamples) + { + assert(fTime > m_fPrevTime); + + double fIntervalAvrg = CircVal::Wrap((double)m_fPrevVal + CircVal::Sdist(m_fPrevVal, fVal)/2.); + double fIntervalWeight= fTime-m_fPrevTime ; + m_Intervals.push_back(std::make_pair(fIntervalAvrg, fIntervalWeight)); + } + + m_fPrevVal = fVal ; + m_fPrevTime= fTime; + ++m_nSamples; + } + + // calculate the weighted average for all intervals + bool GetAvrg(CircVal& fAvrg) + { + switch (m_nSamples) + { + case 0: + fAvrg= CircVal::GetZ(); + return false; + + case 1: + fAvrg= m_fPrevVal; + return true; + + default: + fAvrg= *WeightedCircAverage(m_Intervals).begin(); + return true; + } + } +}; + +// ========================================================================== +// calculate median set of circular-values +// return set of median values +// T is a circular value type defined with the CircValTypeDef macro +template +std::set> CircMedian(std::vector> const& A) +{ + std::set> X; // results set + + // ---------------------------------------------- + std::set> B; + if (A.size() % 2 == 0) // even number of values + { + auto S= A; + std::sort(S.begin(), S.end()); // A, sorted + + for (size_t m= 0; m < S.size(); ++m) + { + size_t n= m+1; if (n==S.size()) n= 0; + double d= CircVal::Sdist(S[m], S[n]); + + // insert average set of each two circular-consecutive values + B.insert(((double)S[m] + d/2.)); + if (d == -CircVal::GetR()/2.) + B.insert(((double)S[n] + d/2.)); + } + } + else // odd number of values + for (size_t m= 0; m < A.size(); ++m) + B.insert(A[m]); // convert vector to set - remove duplicates + + // ---------------------------------------------- + double fMinSum= std::numeric_limits::max(); + + for (const auto& b : B) + { + double fSum= 0.; // sum(|Sdist(a, b)|) + for (const auto& a : A) + fSum+= std::abs(CircVal::Sdist(b, a)); + + if (fSum==fMinSum) X.insert(b); + else if (fSum< fMinSum) { X.clear(); X.insert(b); fMinSum= fSum; } + } + + // ---------------------------------------------- + return X; +} diff --git a/thirdparty/Circular_Code/CircVal.h b/thirdparty/Circular_Code/CircVal.h new file mode 100644 index 000000000..2cac9efb7 --- /dev/null +++ b/thirdparty/Circular_Code/CircVal.h @@ -0,0 +1,346 @@ +// ========================================================================== +// Copyright (C) 2011 Lior Kogan (koganlior1@gmail.com) +// ========================================================================== +// classes defined here: +// CircVal - circular-value +// CircValTester - tester for CircVal class +// ========================================================================== + +#pragma once + +//#define _USE_MATH_DEFINES // M_PI +#include +#include +#include +#include + +#include "FPCompare.h" +#include "CircHelper.h" + +// ========================================================================== +// macro for defining a circular-value type +#define CircValTypeDef(_Name, _L, _H, _Z) \ + struct _Name \ + { \ + static const double L ; /* range: [L,H) */ \ + static const double H ; \ + static const double Z ; /* zero-value */ \ + static const double R ; /* range */ \ + static const double R_2; /* half range */ \ + }; \ + \ + const double _Name::L = (_L) ; \ + const double _Name::H = (_H) ; \ + const double _Name::Z = (_Z) ; \ + const double _Name::R = ((_H)-(_L)) ; \ + const double _Name::R_2= ((_H)-(_L))/2.; + +// ========================================================================== +// basic circular-value types +CircValTypeDef(SignedDegRange , -180., 180., 0. ) +CircValTypeDef(UnsignedDegRange, 0., 360., 0. ) +CircValTypeDef(SignedRadRange , -M_PI, M_PI, 0. ) +CircValTypeDef(UnsignedRadRange, 0., 2*M_PI, 0. ) + +// some additional circular-value types - for testing +CircValTypeDef(TestRange0 , 3., 10., 5.3) +CircValTypeDef(TestRange1 , -3., 10., -3.0) +CircValTypeDef(TestRange2 , -3., 10., 9.9) +CircValTypeDef(TestRange3 , -13., -3., -5.3) + + +// ========================================================================== +// circular-value +// Type should be defined using the CircValTypeDef macro +template +class CircVal +{ + double val; // actual value [L,H) + + // --------------------------------------------- +public: + static double GetL() { return Type::L; } + static double GetH() { return Type::H; } + static double GetZ() { return Type::Z; } + static double GetR() { return Type::R; } + + // --------------------------------------------- + static bool IsInRange(double r) + { + return (r>=Type::L && r=Type::L) + { + if (r< Type::H ) return r ; + else if (r< Type::H+Type::R) return r-Type::R; + } + else + if (r>=Type::L-Type::R) return r+Type::R; + + // general case + return Mod(r - Type::L, Type::R) + Type::L; + } + + // --------------------------------------------- + // the length of shortest directed walk from c1 to c2 + // return value is in [-R/2, R/2) + static double Sdist(const CircVal& c1, const CircVal& c2) + { + double d= c2.val-c1.val; + if (d < -Type::R_2) return d + Type::R; + if (d >= Type::R_2) return d - Type::R; + return d ; + } + + // the length of the shortest increasing walk from c1 to c2 + // return value is in [0, R) + static double Pdist(const CircVal& c1, const CircVal& c2) + { + return c2.val>=c1.val ? c2.val-c1.val : Type::R-c1.val+c2.val; + } + + // --------------------------------------------- + CircVal() + { + val= Type::Z; + } + + // construction based on a floating-point value + // should only be called when the floating-point is a value in the range! + // to translate a floating-point such that 0 is mapped to Type::Z, call ToC() + CircVal(double r) + { + val= Wrap(r); + } + + // construction based on a circular value of the same type + CircVal(const CircVal& c) + { + val= c.val; + } + + // construction based on a circular value of another type + // sample use: CircVal c= c2; -or- CircVal c(c2); + template + CircVal(const CircVal2& c2) + { + double val2= Pdist(CircVal2::GetZ(), c2); + val= Wrap(val2*Type::R / CircVal2::GetR() + Type::Z); + } + + // --------------------------------------------- + operator double() const + { + return val; + } + + // --------------------------------------------- + // assignment from a floating-point value + // should only be called when the floating-point is a value in the range! + // to translate a floating-point such that 0 is mapped to Type::Z, call ToC() + CircVal& operator= (double r) + { + val= Wrap(r); + return *this; + } + + // assignment from another type of circular value + template + CircVal& operator= (const CircVal2& c2) + { + double val2= c2.Pdist(c2.GetZ(), c2); + val= Wrap(val2*Type::R/c2.GetR() + Type::Z); + return *this; + } + + // --------------------------------------------- + // convert circular-value c to real-value [L-Z,H-Z). .Z is converted to 0 + friend double ToR(const CircVal& c) { return c.val - Type::Z; } + + // --------------------------------------------- + const CircVal operator+ ( ) const { return val; } + const CircVal operator- ( ) const { return Wrap(Type::Z-Sdist(Type::Z,val)); } // return negative circular value + const CircVal operator~ ( ) const { return Wrap(val+Type::R_2 ); } // return opposite circular-value + + const CircVal operator+ (const CircVal& c) const { return Wrap(val+c.val - Type::Z); } + const CircVal operator- (const CircVal& c) const { return Wrap(val-c.val + Type::Z); } + const CircVal operator* (const double& r) const { return Wrap((val-Type::Z)*r + Type::Z); } + const CircVal operator/ (const double& r) const { return Wrap((val-Type::Z)/r + Type::Z); } + + CircVal& operator+=(const CircVal& c) { val= Wrap(val+c.val - Type::Z); return *this; } + CircVal& operator-=(const CircVal& c) { val= Wrap(val-c.val + Type::Z); return *this; } + CircVal& operator*=(const double& r) { val= Wrap((val-Type::Z)*r + Type::Z); return *this; } + CircVal& operator/=(const double& r) { val= Wrap((val-Type::Z)/r + Type::Z); return *this; } + + CircVal& operator =(const CircVal& c) { val= c.val ; return *this; } + + bool operator==(const CircVal& c) const { return val == c.val; } + bool operator!=(const CircVal& c) const { return val != c.val; } + + // note that two circular values can be compared in several different ways. + // check carefully if this is really what you need! + bool operator> (const CircVal& c) const { return val > c.val; } + bool operator>=(const CircVal& c) const { return val >= c.val; } + bool operator< (const CircVal& c) const { return val < c.val; } + bool operator<=(const CircVal& c) const { return val <= c.val; } +}; + +// ========================================================================== +template static double sin (const CircVal& c) { return std::sin(ToR(CircVal(c))); } +template static double cos (const CircVal& c) { return std::cos(ToR(CircVal(c))); } +template static double tan (const CircVal& c) { return std::tan(ToR(CircVal(c))); } +template static CircVal asin (double r ) { return CircVal(std::asin (r )); } // calls copy ctor CircVal(CircVal) +template static CircVal acos (double r ) { return CircVal(std::acos (r )); } // calls copy ctor CircVal(CircVal) +template static CircVal atan (double r ) { return CircVal(std::atan (r )); } // calls copy ctor CircVal(CircVal) +template static CircVal atan2(double r1, double r2 ) { return CircVal(std::atan2(r1,r2)); } // calls copy ctor CircVal(CircVal) +template static CircVal ToC (double r ) { return CircVal::Wrap(r + Type::Z); } // convert real-value r to circular-value in the range. 0 is converted to Type.Z + +// ========================================================================== +// tester for CircVal class +template +class CircValTester +{ + // check if 2 circular-values are almost equal + static bool IsCircAlmostEq(const CircVal& _f, const CircVal& _g) + { + double f= _f; + double g= _g; + + if (::IsAlmostEq(f, g)) + return true; + + if (f < g) + return IsAlmostEq(f, g - Type::R); + else + return IsAlmostEq(f, g + Type::R); + } + + // assert that 2 circular-values are almost equal + static void AssertCircAlmostEq(const CircVal& f, const CircVal& g) + { + assert(IsCircAlmostEq(f, g)); + } + + static void Test() + { + CircVal ZeroVal= Type::Z; + + // -------------------------------------------------------- + AssertCircAlmostEq(ZeroVal , -ZeroVal); + + AssertAlmostEq (sin(ZeroVal) , 0. ); + AssertAlmostEq (cos(ZeroVal) , 1. ); + AssertAlmostEq (tan(ZeroVal) , 0. ); + + AssertCircAlmostEq(asin(0.), ZeroVal ); + AssertCircAlmostEq(acos(1.), ZeroVal ); + AssertCircAlmostEq(atan(0.), ZeroVal ); + + AssertCircAlmostEq(ToC(0) , ZeroVal ); + AssertAlmostEq (ToR(ZeroVal) , 0. ); + + // -------------------------------------------------------- + std::default_random_engine rand_engine ; + std::uniform_real_distribution c_uni_dist(Type::L, Type::H); + std::uniform_real_distribution r_uni_dist(0. , 1000. ); // for multiplication,division by real-value + std::uniform_real_distribution t_uni_dist(-1. , 1. ); // for inverse-trigonometric functions + + std::random_device rnd_device; + rand_engine.seed(rnd_device()); // reseed engine + + for (unsigned i= 10000; i--;) + { + CircVal c1(c_uni_dist(rand_engine)); // random circular value + CircVal c2(c_uni_dist(rand_engine)); // random circular value + CircVal c3(c_uni_dist(rand_engine)); // random circular value + double r (r_uni_dist(rand_engine)); // random real value [ 0, 1000) - for testing *,/ operators + double a1(t_uni_dist(rand_engine)); // random real value [ -1, 1) - for testing asin,acos + double a2(t_uni_dist(rand_engine)); // random real value [-1000, 1000) - for testing atan + + assert (c1 == CircVal((double)c1) ); + + AssertCircAlmostEq(+c1 , c1 ); // +c = c + AssertCircAlmostEq(-(-c1) , c1 ); // -(-c) = c + AssertCircAlmostEq(c1 + c2 , c2 + c1 ); // c1+c2 = c2+c1 + AssertCircAlmostEq(c1 + (c2 +c3) , (c1 + c2) + c3 ); // c1+(c2+c3) = (c1+c2)+c3 + AssertCircAlmostEq(c1 + -c1 , ZeroVal ); // c+(-c) = z + AssertCircAlmostEq(c1 + ZeroVal , c1 ); // c+z = c + + AssertCircAlmostEq(c1 - c1 , ZeroVal ); // c-c = z + AssertCircAlmostEq(c1 - ZeroVal , c1 ); // c-z = c + AssertCircAlmostEq(ZeroVal - c1 , -c1 ); // z-c = -c + AssertCircAlmostEq(c1 - c2 , -(c2 - c1) ); // c1-c2 = -(c2-c1) + + AssertCircAlmostEq(c1 * 0. , ZeroVal ); // c*0 = 0 + AssertCircAlmostEq(c1 * 1. , c1 ); // c*1 = c + AssertCircAlmostEq(c1 / 1. , c1 ); // c/1 = c + + AssertCircAlmostEq((c1 * (1./(r+1.))) / (1./(r+1.)) , c1 ); // (c*r)/r = c, 0=1 + + // -------------------------------------------------------- + AssertCircAlmostEq(~(~c1) , c1 ); // opposite(opposite(c) = c + AssertCircAlmostEq(c1 - (~c1) , ToC(Type::R/2.) ); // c - ~c = r/2+z + + // -------------------------------------------------------- + AssertAlmostEq (sin(ToR(CircVal(c1))), sin(c1) ); // member func sin + AssertAlmostEq (cos(ToR(CircVal(c1))), cos(c1) ); // member func cos + AssertAlmostEq (tan(ToR(CircVal(c1))), tan(c1) ); // member func tan + + AssertAlmostEq (sin(-c1) , -sin(c1) ); // sin(-c) = -sin(c) + AssertAlmostEq (cos(-c1) , cos(c1) ); // cos(-c) = cos(c) + AssertAlmostEq (tan(-c1) , -tan(c1) ); // tan(-c1) = -tan(c) the error may be large + + AssertAlmostEq (sin(c1+ToC(Type::R/4.)) , cos(c1) ); // sin(c+r/4) = cos(c) + AssertAlmostEq (cos(c1+ToC(Type::R/4.)) , -sin(c1) ); // cos(c+r/4) = -sin(c) + AssertAlmostEq (sin(c1+ToC(Type::R/2.)) , -sin(c1) ); // sin(c+r/2) = -sin(c) + AssertAlmostEq (cos(c1+ToC(Type::R/2.)) , -cos(c1) ); // cos(c+r/2) = -cos(c) + + AssertAlmostEq (Sqr(sin(c1))+Sqr(cos(c1)) , 1. ); // sin(x)^2+cos(x)^2 = 1 + + AssertAlmostEq (sin(c1)/cos(c1) , tan(c1) ); // sin(x)/cos(x) = tan(x) + + // -------------------------------------------------------- + AssertCircAlmostEq(asin(a1) , CircVal(asin(a1))); // member func asin + AssertCircAlmostEq(acos(a1) , CircVal(acos(a1))); // member func acos + AssertCircAlmostEq(atan(a2) , CircVal(atan(a2))); // member func atan + + AssertCircAlmostEq(asin(a1) + asin(-a1) , ZeroVal ); // asin(r)+asin(-r) = z + AssertCircAlmostEq(acos(a1) + acos(-a1) , ToC(Type::R/2.) ); // acos(r)+acos(-r) = r/2+z + AssertCircAlmostEq(asin(a1) + acos( a1) , ToC(Type::R/4.) ); // asin(r)+acos( r) = r/4+z + AssertCircAlmostEq(atan(a2) + atan(-a2) , ZeroVal ); // atan(r)+atan(-r) = z + + // -------------------------------------------------------- + assert (c1 > c2 == (c2 < c1) ); // c1> c2 <==> c2< c1 + assert (c1 >= c2 == (c2 <= c1) ); // c1>=c2 <==> c2<=c1 + assert (c1 >= c2 == ( (c1 > c2) || (c1 == c2)) ); // c1>=c2 <==> (c1> c2)|| (c1==c2) + assert (c1 <= c2 == ( (c1 < c2) || (c1 == c2)) ); // c1<=c2 <==> (c1< c2)|| (c1==c2) + assert (c1 > c2 == (!(c1 == c2) && !(c1 < c2)) ); // c1> c2 <==> !(c1==c2)&&!(c1< c2) + assert (c1 == c2 == (!(c1 > c2) && !(c1 < c2)) ); // c1= c2 <==> !(c1> c2)&&!(c1< c2) + assert (c1 < c2 == (!(c1 == c2) && !(c1 > c2)) ); // c1< c2 <==> !(c1==c2)&&!(c1> c2) + assert (!(c1>c2) || !(c2>c3) || (c1>c3) ); // (c1>c2)&&(c2>c3) ==> c1>c3 + + // -------------------------------------------------------- + AssertCircAlmostEq(c1 , ToC(ToR( c1) ) ); // c1 = ToC(ToR( c1) + AssertCircAlmostEq(-c1 , ToC(ToR(-c1) ) ); // -c1 = ToC(ToR(-c1) + AssertCircAlmostEq(c1 + c2 , ToC(ToR(c1)+ToR(c2)) ); // c1+c2 = ToC(ToR(c1)+ToR(c2)) + AssertCircAlmostEq(c1 - c2 , ToC(ToR(c1)-ToR(c2)) ); // c1-c2 = ToC(ToR(c1)-ToR(c2)) + AssertCircAlmostEq(c1 * r , ToC(ToR(c1)*r ) ); // c1*r = ToC(ToR(c1)*r ) + AssertCircAlmostEq(c1 / r , ToC(ToR(c1)/r ) ); // c1/r = ToC(ToR(c1)/r ) + + // -------------------------------------------------------- + } + } + +public: + CircValTester() + { + Test(); + } +}; + diff --git a/thirdparty/Circular_Code/FPCompare.h b/thirdparty/Circular_Code/FPCompare.h new file mode 100644 index 000000000..432aee8a8 --- /dev/null +++ b/thirdparty/Circular_Code/FPCompare.h @@ -0,0 +1,313 @@ +// ========================================================================== +// floating-point AlmostEquals checker +// Lior Kogan (koganlior1@gmail.com), 2011 +// based on code extracted from Google Test (http://code.google.com/p/googletest/) +// +// the following code can be used to compare two floating-point values. +// example: +// double f= // something +// double g= // something +// const FloatingPoint lhs(f), rhs(g); +// if (lhs.AlmostEquals(rhs)) { ... } +// ========================================================================== + +#pragma once + +#include +#include + +#define GTEST_OS_WINDOWS 1 + +// ========================================================================== +// Copyright 2005, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee) +// +// The Google C++ Testing Framework (Google Test) + +// ========================================================================== +// from gtest-1.5.0\include\gtest\internal\gtest-port.h: + +// This template class serves as a compile-time function from size to +// type. It maps a size in bytes to a primitive type with that +// size. e.g. +// +// TypeWithSize<4>::UInt +// +// is typedef-ed to be unsigned int (unsigned integer made up of 4 bytes). +// +// Such functionality should belong to STL, but I cannot find it there. +// +// Google Test uses this class in the implementation of floating-point comparison. +// +// For now it only handles UInt (unsigned int) as that's all Google Test +// needs. Other types can be easily added in the future if need arises. +template +class TypeWithSize +{ +public: + // This prevents the user from using TypeWithSize with incorrect values of N. + typedef void UInt; +}; + +// The specialization for size 4. +template <> +class TypeWithSize<4> +{ +public: + // unsigned int has size 4 in both gcc and MSVC. + // As base/basictypes.h doesn't compile on Windows, we cannot use uint32, uint64, and etc here. + typedef int Int ; + typedef unsigned int UInt; +}; + +// The specialization for size 8. +template <> +class TypeWithSize<8> +{ +public: + typedef int64_t Int; + typedef uint64_t UInt; +}; + +// ========================================================================== +// from gtest-1.5.0.zip\gtest-1.5.0\include\gtest\internal\gtest-internal.h: + +// This template class represents an IEEE floating-point number +// (either single-precision or double-precision, depending on the +// template parameters). +// +// The purpose of this class is to do more sophisticated number +// comparison. (Due to round-off error, etc, it's very unlikely that +// two floating-points will be equal exactly. Hence a naive +// comparison by the == operation often doesn't work.) +// +// Format of IEEE floating-point: +// +// The most-significant bit being the leftmost, an IEEE +// floating-point looks like +// +// sign_bit exponent_bits fraction_bits +// +// Here, sign_bit is a single bit that designates the sign of the +// number. +// +// For float, there are 8 exponent bits and 23 fraction bits. +// +// For double, there are 11 exponent bits and 52 fraction bits. +// +// More details can be found at +// http://en.wikipedia.org/wiki/IEEE_floating-point_standard. +// +// Template parameter: +// +// RawType: the raw floating-point type (either float or double) +template +class FloatingPoint +{ +public: + // Defines the unsigned integer type that has the same size as the floating point number + typedef typename TypeWithSize::UInt Bits; + + // Constants. + + // # of bits in a number. + static const size_t kBitCount = 8*sizeof(RawType); + + // # of fraction bits in a number. + static const size_t kFractionBitCount = + std::numeric_limits::digits - 1; + + // # of exponent bits in a number. + static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; + + // The mask for the sign bit. + static const Bits kSignBitMask = static_cast(1) << (kBitCount - 1); + + // The mask for the fraction bits. + static const Bits kFractionBitMask = + ~static_cast(0) >> (kExponentBitCount + 1); + + // The mask for the exponent bits. + static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); + + // How many ULP's (Units in the Last Place) we want to tolerate when + // comparing two numbers. The larger the value, the more error we + // allow. A 0 value means that two numbers must be exactly the same + // to be considered equal. + // + // The maximum error of a single floating-point operation is 0.5 + // units in the last place. On Intel CPU's, all floating-point + // calculations are done with 80-bit precision, while double has 64 + // bits. + // + // See the following article for more details on ULP: + // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm. + static const size_t kMaxUlps= 5000000; + + // Constructs a FloatingPoint from a raw floating-point number. + // + // On an Intel CPU, passing a non-normalized NAN (Not a Number) + // around may change its bits, although the new value is guaranteed + // to be also a NAN. Therefore, don't expect this constructor to + // preserve the bits in x when x is a NAN. + explicit FloatingPoint(const RawType& x) { u_.value_ = x; } + + // Static methods + + // Reinterprets a bit pattern as a floating-point number. + // + // This function is needed to test the AlmostEquals() method. + static RawType ReinterpretBits(const Bits bits) + { + FloatingPoint fp(0); + fp.u_.bits_ = bits; + return fp.u_.value_; + } + + // Returns the floating-point number that represent positive infinity. + static RawType Infinity() + { + return ReinterpretBits(kExponentBitMask); + } + + // Non-static methods + + // Returns the bits that represents this number. + const Bits &bits() const { return u_.bits_; } + + // Returns the exponent bits of this number. + Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } + + // Returns the fraction bits of this number. + Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } + + // Returns the sign bit of this number. + Bits sign_bit() const { return kSignBitMask & u_.bits_; } + + // Returns true iff this is NAN (not a number). + bool is_nan() const + { + // It's a NAN if the exponent bits are all ones and the fraction + // bits are not entirely zeros. + return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); + } + + // Returns true iff this number is at most kMaxUlps ULP's away from + // rhs. In particular, this function: + // + // - returns false if either number is (or both are) NAN. + // - treats really large numbers as almost equal to infinity. + // - thinks +0.0 and -0.0 are 0 DLP's apart. + bool AlmostEquals(const FloatingPoint& rhs) const + { + // The IEEE standard says that any comparison operation involving + // a NAN must return false. + if (is_nan() || rhs.is_nan()) + return false; + + // Lior Kogan, 25/9/2010: e.g. for comparing 1e-13 with exact 0 + if (fabs(u_.value_ - rhs.u_.value_) < 1e-12) + return true; + + Bits bits= DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_); + + //if (bits > kMaxUlps && bits<100000000) + // __debugbreak(); + + return bits <= kMaxUlps; + } + + private: + // The data type used to store the actual floating-point number. + union FloatingPointUnion + { + RawType value_; // The raw floating-point number. + Bits bits_; // The bits that represent the number. + }; + + // Converts an integer from the sign-and-magnitude representation to + // the biased representation. More precisely, let N be 2 to the + // power of (kBitCount - 1), an integer x is represented by the + // unsigned number x + N. + // + // For instance, + // + // -N + 1 (the most negative number representable using + // sign-and-magnitude) is represented by 1; + // 0 is represented by N; and + // N - 1 (the biggest number representable using + // sign-and-magnitude) is represented by 2N - 1. + // + // Read http://en.wikipedia.org/wiki/Signed_number_representations + // for more details on signed number representations. + static Bits SignAndMagnitudeToBiased(const Bits &sam) + { + if (kSignBitMask & sam) // sam represents a negative number. + return ~sam + 1; + else // sam represents a positive number. + return kSignBitMask | sam; + } + + // Given two numbers in the sign-and-magnitude representation, + // returns the distance between them as an unsigned number. + static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, + const Bits &sam2) + { + const Bits biased1 = SignAndMagnitudeToBiased(sam1); + const Bits biased2 = SignAndMagnitudeToBiased(sam2); + Bits bits= (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); + return bits; + } + + FloatingPointUnion u_; +}; + +// ========================================================================== +// Lior Kogan, 25/9/2010 +// ========================================================================== + +// check if two floating-points are almost equal +template +static bool IsAlmostEq(T x, T y) +{ + static_assert(!std::numeric_limits::is_exact , "IsAlmostEq: floating-point type expected"); + + FloatingPoint f(x); + FloatingPoint g(y); + + return f.AlmostEquals(g); +} + +// assert that 2 floating-points are almost equal +static void AssertAlmostEq(const double f, const double g) +{ + assert(IsAlmostEq(f, g)); +} diff --git a/thirdparty/Circular_Code/TruncNormalDist.h b/thirdparty/Circular_Code/TruncNormalDist.h new file mode 100644 index 000000000..8da24ec65 --- /dev/null +++ b/thirdparty/Circular_Code/TruncNormalDist.h @@ -0,0 +1,300 @@ +// ========================================================================== +// truncated normal distribution +// Lior Kogan (koganlior1@gmail.com), 2012 +// based on VC 2012 std::normal_distribution (random) as a skeleton +// and on C. H. Jackson's R's implementation of the following paper: +// Robert, C. P. Simulation of truncated normal variables. Statistics and Computing (1995) 5, 121-125 +// ========================================================================== + +#pragma once + +// ========================================================================== +// TEMPLATE CLASS truncated_normal_distribution +template +class truncated_normal_distribution +{ // template class for truncated normal distribution +public: + typedef truncated_normal_distribution<_Ty> _Myt; + typedef _Ty input_type ; + typedef _Ty result_type; + + struct param_type + { // parameter package + typedef _Myt distribution_type; + + param_type(_Ty _Mean0= 0., _Ty _Sigma0= 1., _Ty _A0= 0., _Ty _B0= 0.) + { // construct from parameters + _Init(_Mean0, _Sigma0, _A0, _B0); + } + + bool operator==(const param_type& _Right) const + { // test for equality + return _Mean == _Right._Mean && + _Sigma == _Right._Sigma && + _A == _Right._A && + _B == _Right._B ; + } + + bool operator!=(const param_type& _Right) const + { // test for inequality + return !(*this == _Right); + } + + _Ty mean() const + { // return mean value + return _Mean; + } + + _Ty sigma() const + { // return sigma value + return _Sigma; + } + + _Ty a() const + { // return truncation-range lower-bound + return _A; + } + + _Ty b() const + { // return truncation-range upper-bound + return _B; + } + + _Ty stddev() const + { // return sigma value + return _Sigma; + } + + int alg() const + { // return fastest algorithm for the given parameters + return _Alg; + } + + void _Init(_Ty _Mean0, _Ty _Sigma0, _Ty _A0, _Ty _B0) + { // set internal state + _RNG_ASSERT(0. < _Sigma0, "invalid sigma argument for truncated_normal_distribution"); + _RNG_ASSERT(_A0 < _B0 , "invalid truncation-range for truncated_normal_distribution"); + _Mean = _Mean0 ; + _Sigma= _Sigma0; + _A = _A0 ; + _B = _B0 ; + + _NA= (_A - _Mean) / _Sigma; + _NB= (_B - _Mean) / _Sigma; + + // decide on the fastest algorithm for our case + _Alg= 3; + if ((_NA < 0 ) && ( _NB > 0) && (_NB - _NA > sqrt(_2Pi))) _Alg= 0; + else if ((_NA >= 0) && ( _NB > _NA + 2.*sqrt(exp(1.)) / ( _NA + sqrt(Sqr(_NA) + 4.)) * exp((_NA*2. - _NA*sqrt(Sqr(_NA) + 4.))/4.))) _Alg= 1; + else if ((_NB <= 0) && (-_NA > -_NB + 2.*sqrt(exp(1.)) / (-_NB + sqrt(Sqr(_NB) + 4.)) * exp((_NB*2. - -_NB*sqrt(Sqr(_NB) + 4.))/4.))) _Alg= 2; + } + + _Ty _Mean ; + _Ty _Sigma; + _Ty _A ; + _Ty _B ; + + _Ty _NA ; // _A normalized + _Ty _NB ; // _B normalized + int _Alg ; // algorithm to use + }; + + explicit truncated_normal_distribution(_Ty _Mean0 = 0. , + _Ty _Sigma0= 1. , + _Ty _A0 = std::numeric_limits< _Ty>::min(), // truncation-range lower-bound + _Ty _B0 = std::numeric_limits< _Ty>::max() ) // truncation-range upper-bound + + : _Par(_Mean0, _Sigma0, _A0, _B0), _Valid(false), _X2(0) + { // construct + } + + explicit truncated_normal_distribution(param_type _Par0) + : _Par(_Par0), _Valid(false), _X2(0) + { // construct from parameter package + } + + _Ty mean() const + { // return mean value + return _Par.mean(); + } + + _Ty sigma() const + { // return sigma value + return _Par.sigma(); + } + + _Ty a() const + { // return truncation-range lower-bound + return _Par.a(); + } + + _Ty b() const + { // return truncation-range upper-bound + return _Par.b(); + } + + _Ty stddev() const + { // return sigma value + return _Par.sigma(); + } + + param_type param() const + { // return parameter package + return _Par; + } + + void param(const param_type& _Par0) + { // set parameter package + _Par= _Par0; + reset(); + } + + result_type (min)() const + { // get smallest possible result + return _Par._A; + } + + result_type (max)() const + { // get largest possible result + return _Par._B; + } + + void reset() + { // clear internal state + _Valid= false; + } + + template + result_type operator()(_Engine& _Eng) + { // return next value + return _Eval(_Eng, _Par); + } + + template + result_type operator()(_Engine& _Eng, const param_type& _Par0) + { // return next value, given parameter package + reset(); + return _Eval(_Eng, _Par0, false); + } + + template + basic_istream<_Elem, _Traits>& _Read(basic_istream<_Elem, _Traits>& _Istr) + { // read state from _Istr + _Ty _Mean0 ; + _Ty _Sigma0; + _Ty _A0 ; + _Ty _B0 ; + _In(_Istr, _Mean0 ); + _In(_Istr, _Sigma0); + _In(_Istr, _A0 ); + _In(_Istr, _B0 ); + _Par._Init(_Mean0, _Sigma0, _A0, _B0); + + _Istr >> _Valid; + _In(_Istr, _X2); + return _Istr; + } + + template + basic_ostream<_Elem, _Traits>& _Write(basic_ostream<_Elem, _Traits>& _Ostr) const + { // write state to _Ostr + _Out(_Ostr, _Par._Mean ); + _Out(_Ostr, _Par._Sigma); + _Out(_Ostr, _Par._A ); + _Out(_Ostr, _Par._B ); + + _Ostr << ' ' << _Valid; + _Out(_Ostr, _X2); + return _Ostr; + } + +private: + template + result_type _Eval(_Engine& _Eng, const param_type& _Par0, bool _Keep= true) + { + _Ty r; + + switch (_Par0._Alg) + { + case 0 : + { + normal_distribution<_Ty> nd; + do { r= nd(_Eng); } + while (r<_Par0._NA || r>_Par0._NB); + break; + } + + case 1 : + { + exponential_distribution<_Ty> ed; + _Ty a,u,z; + + do + { + a= (_Par0._NA + sqrt(Sqr(_Par0._NA)+4.))/2.; + z= ed(_Eng, a) + _Par0._NA; + u= _NRAND(_Eng, _Ty); + } + while ((u>exp(-Sqr(z-a)/2.)) || (z>_Par0._NB)); + + r= z; + break; + } + + case 2 : + { + exponential_distribution<_Ty> ed; + _Ty a,u,z; + + do + { + a= (-_Par0._NB + sqrt(Sqr(_Par0._NB)+4.))/2.; + z= ed(_Eng, a) - _Par0._NB; + u= _NRAND(_Eng, _Ty); + } + while ((u>exp(-Sqr(z-a)/2.)) || (z>-_Par0._NA)); + + r= -z; + break; + } + + default: + { + _Ty z,u,rho; + + do + { + uniform_real<_Ty> ud(_Par0._NA, _Par0._NB); + z= ud(_Eng); + u= _NRAND(_Eng, _Ty); + + if (_Par0._NA>0) rho= exp((Sqr(_Par0._NA)-Sqr(z))/2.); + else if (_Par0._NB<0) rho= exp((Sqr(_Par0._NB)-Sqr(z))/2.); + else rho= exp( -Sqr(z) /2.); + } + while (u>rho); + + r= z; + } + } + + return r * _Par0._Sigma + _Par0._Mean; // denormalize result + } + + int _Alg ; // which algorithm to use + param_type _Par ; + bool _Valid; + _Ty _X2 ; +}; + +template +basic_istream<_Elem, _Traits>& operator>>(basic_istream<_Elem, _Traits>& _Istr, truncated_normal_distribution<_Ty>& _Dist) +{ // read state from _Istr + return _Dist._Read(_Istr); +} + +template +basic_ostream<_Elem, _Traits>& operator<<(basic_ostream<_Elem, _Traits>& _Ostr, const truncated_normal_distribution<_Ty>& _Dist) +{ // write state to _Ostr + return _Dist._Write(_Ostr); +} diff --git a/thirdparty/Circular_Code/WrappedNormalDist.h b/thirdparty/Circular_Code/WrappedNormalDist.h new file mode 100644 index 000000000..b141baf66 --- /dev/null +++ b/thirdparty/Circular_Code/WrappedNormalDist.h @@ -0,0 +1,245 @@ +// ========================================================================== +// wrapped normal distribution +// Lior Kogan (koganlior1@gmail.com), 2012 +// based on VC 2012 std::normal_distribution (random) as a skeleton +// ========================================================================== + +#pragma once + +#include "CircHelper.h" // Mod + +// ========================================================================== +// TEMPLATE CLASS wrapped_normal_distribution +template +class wrapped_normal_distribution +{ // template class for wrapped normal distribution +public: + typedef wrapped_normal_distribution<_Ty> _Myt; + typedef _Ty input_type ; + typedef _Ty result_type; + + struct param_type + { // parameter package + typedef _Myt distribution_type; + + param_type(_Ty _Mean0= 0., _Ty _Sigma0= 1., _Ty _L0= 0., _Ty _H0= 0.) + { // construct from parameters + _Init(_Mean0, _Sigma0, _L0, _H0); + } + + bool operator==(const param_type& _Right) const + { // test for equality + return _Mean == _Right._Mean && + _Sigma == _Right._Sigma && + _L == _Right._L && + _H == _Right._H ; + + } + + bool operator!=(const param_type& _Right) const + { // test for inequality + return !(*this == _Right); + } + + _Ty mean() const + { // return mean value + return _Mean; + } + + _Ty sigma() const + { // return sigma value + return _Sigma; + } + + _Ty l() const + { // return wrapping-range lower-bound + return _L; + } + + _Ty h() const + { // return wrapping-range upper-bound + return _H; + } + + _Ty stddev() const + { // return sigma value + return _Sigma; + } + + void _Init(_Ty _Mean0, _Ty _Sigma0, _Ty _L0, _Ty _H0) + { // set internal state + _RNG_ASSERT(0. < _Sigma0, "invalid sigma argument for wrapped_normal_distribution"); + _RNG_ASSERT(_L0 < _H0 , "invalid wrapping-range for wrapped_normal_distribution"); + _Mean = _Mean0 ; + _Sigma= _Sigma0; + _L = _L0 ; + _H = _H0 ; + } + + _Ty _Mean ; + _Ty _Sigma; + _Ty _L ; + _Ty _H ; + }; + + explicit wrapped_normal_distribution(_Ty _Mean0 = 0., + _Ty _Sigma0= 45., + _Ty _L0 = -180., // wrapping-range lower-bound + _Ty _H0 = 180. ) // wrapping-range upper-bound + : _Par(_Mean0, _Sigma0, _L0, _H0), _Valid(false), _X2(0) + { // construct + } + + explicit wrapped_normal_distribution(param_type _Par0) + : _Par(_Par0), _Valid(false), _X2(0) + { // construct from parameter package + } + + _Ty mean() const + { // return mean value + return _Par.mean(); + } + + _Ty sigma() const + { // return sigma value + return _Par.sigma(); + } + + _Ty l() const + { // return wrapping-range lower-bound + return _Par.l(); + } + + _Ty h() const + { // return wrapping-range upper-bound + return _Par.h(); + } + + _Ty stddev() const + { // return sigma value + return _Par.sigma(); + } + + param_type param() const + { // return parameter package + return _Par; + } + + void param(const param_type& _Par0) + { // set parameter package + _Par= _Par0; + reset(); + } + + result_type (min)() const + { // get smallest possible result + return _Par._L; + } + + result_type (max)() const + { // get largest possible result + return _Par._H; + } + + void reset() + { // clear internal state + _Valid= false; + } + + template + result_type operator()(_Engine& _Eng) + { // return next value + return _Eval(_Eng, _Par); + } + + template + result_type operator()(_Engine& _Eng, const param_type& _Par0) + { // return next value, given parameter package + reset(); + return _Eval(_Eng, _Par0, false); + } + + template + basic_istream<_Elem, _Traits>& _Read(basic_istream<_Elem, _Traits>& _Istr) + { // read state from _Istr + _Ty _Mean0 ; + _Ty _Sigma0; + _Ty _L0 ; + _Ty _H0 ; + _In(_Istr, _Mean0 ); + _In(_Istr, _Sigma0); + _In(_Istr, _L0 ); + _In(_Istr, _H0 ); + _Par._Init(_Mean0, _Sigma0, _L0, _H0); + + _Istr >> _Valid; + _In(_Istr, _X2); + return _Istr; + } + + template + basic_ostream<_Elem, _Traits>& _Write(basic_ostream<_Elem, _Traits>& _Ostr) const + { // write state to _Ostr + _Out(_Ostr, _Par._Mean ); + _Out(_Ostr, _Par._Sigma); + _Out(_Ostr, _Par._L ); + _Out(_Ostr, _Par._H ); + + _Ostr << ' ' << _Valid; + _Out(_Ostr, _X2); + return _Ostr; + } + +private: + template result_type _Eval(_Engine& _Eng, const param_type& _Par0, bool _Keep= true) + { // compute next value + // Knuth, vol. 2, p. 122, alg. P + _Ty r; + + if (_Keep && _Valid) + { // return stored value + r = _X2 ; + _Valid= false; + } + else + { // generate two values, store one, return one + double _V1, _V2, _Sx; + for (; ; ) + { // reject bad values + _V1= 2 * _NRAND(_Eng, _Ty) - 1.; + _V2= 2 * _NRAND(_Eng, _Ty) - 1.; + _Sx= _V1 * _V1 + _V2 * _V2; + if (_Sx < 1.) + break; + } + + double _Fx= _CSTD sqrt(-2. * _CSTD log(_Sx) / _Sx); + if (_Keep) + { // save second value for next call + _X2 = _Fx * _V2; + _Valid= true ; + } + + r= _Fx * _V1; + } + + result_type d= r * _Par0._Sigma + _Par0._Mean; // denormalize result + return Mod(d - _Par0._L, _Par0._H - _Par0._L) + _Par0._L; // wrap result + } + + param_type _Par ; + bool _Valid; + _Ty _X2 ; +}; + +template +basic_istream<_Elem, _Traits>& operator>>(basic_istream<_Elem, _Traits>& _Istr, wrapped_normal_distribution<_Ty>& _Dist) +{ // read state from _Istr + return _Dist._Read(_Istr); +} + +template +basic_ostream<_Elem, _Traits>& operator<<(basic_ostream<_Elem, _Traits>& _Ostr, const wrapped_normal_distribution<_Ty>& _Dist) +{ // write state to _Ostr + return _Dist._Write(_Ostr); +} diff --git a/thirdparty/Circular_Code/WrappedTruncNormalDist.h b/thirdparty/Circular_Code/WrappedTruncNormalDist.h new file mode 100644 index 000000000..f11e728ef --- /dev/null +++ b/thirdparty/Circular_Code/WrappedTruncNormalDist.h @@ -0,0 +1,339 @@ +// ========================================================================== +// wrapped truncated normal distribution +// Lior Kogan (koganlior1@gmail.com), 2012 +// based on VC 2012 std::normal_distribution (random) as a skeleton +// and on C. H. Jackson's R's implementation of the following paper: +// Robert, C. P. Simulation of truncated normal variables. Statistics and Computing (1995) 5, 121-125 +// ========================================================================== + +#pragma once + +#include "CircHelper.h" // _2Pi, Sqr, Mod + +// ========================================================================== +// TEMPLATE CLASS wrapped_truncated_normal_distribution +template +class wrapped_truncated_normal_distribution +{ // template class for wrapped truncated normal distribution +public: + typedef wrapped_truncated_normal_distribution<_Ty> _Myt; + typedef _Ty input_type ; + typedef _Ty result_type; + + struct param_type + { // parameter package + typedef _Myt distribution_type; + + param_type(_Ty _Mean0= 0., _Ty _Sigma0= 1., _Ty _A0= 0., _Ty _B0= 0., _Ty _L0= 0., _Ty _H0= 0.) + { // construct from parameters + _Init(_Mean0, _Sigma0, _A0, _B0, _L0, _H0); + } + + bool operator==(const param_type& _Right) const + { // test for equality + return _Mean == _Right._Mean && + _Sigma == _Right._Sigma && + _A == _Right._A && + _B == _Right._B && + _L == _Right._L && + _H == _Right._H ; + } + + bool operator!=(const param_type& _Right) const + { // test for inequality + return !(*this == _Right); + } + + _Ty mean() const + { // return mean value + return _Mean; + } + + _Ty sigma() const + { // return sigma value + return _Sigma; + } + + _Ty a() const + { // return truncation-range lower-bound + return _A; + } + + _Ty b() const + { // return truncation-range upper-bound + return _B; + } + + _Ty l() const + { // return wrapping-range lower-bound + return _L; + } + + _Ty h() const + { // return wrapping-range upper-bound + return _H; + } + + _Ty stddev() const + { // return sigma value + return _Sigma; + } + + int alg() const + { // return fastest algorithm for the given parameters + return _Alg; + } + + void _Init(_Ty _Mean0, _Ty _Sigma0, _Ty _A0, _Ty _B0, _Ty _L0, _Ty _H0) + { // set internal state + _RNG_ASSERT(0. < _Sigma0, "invalid sigma argument for wrapped_truncated_normal_distribution" ); + _RNG_ASSERT(_A0 < _B0 , "invalid truncation-range for wrapped_truncated_normal_distribution"); + _RNG_ASSERT(_L0 < _H0 , "invalid wrapping-range for wrapped_truncated_normal_distribution" ); + _Mean = _Mean0 ; + _Sigma= _Sigma0; + _A = _A0 ; + _B = _B0 ; + _L = _L0 ; + _H = _H0 ; + + _NA= (_A - _Mean) / _Sigma; + _NB= (_B - _Mean) / _Sigma; + + // decide on the fastest algorithm for our case + _Alg= 3; + if ((_NA < 0 ) && ( _NB > 0) && (_NB - _NA > sqrt(_2Pi))) _Alg= 0; + else if ((_NA >= 0) && ( _NB > _NA + 2.*sqrt(exp(1.)) / ( _NA + sqrt(Sqr(_NA) + 4.)) * exp((_NA*2. - _NA*sqrt(Sqr(_NA) + 4.))/4.))) _Alg= 1; + else if ((_NB <= 0) && (-_NA > -_NB + 2.*sqrt(exp(1.)) / (-_NB + sqrt(Sqr(_NB) + 4.)) * exp((_NB*2. - -_NB*sqrt(Sqr(_NB) + 4.))/4.))) _Alg= 2; + } + + _Ty _Mean ; + _Ty _Sigma; + _Ty _A ; + _Ty _B ; + _Ty _L ; + _Ty _H ; + + _Ty _NA ; // _A normalized + _Ty _NB ; // _B normalized + int _Alg ; // algorithm to use + }; + + // normal distribution is first truncated, and then wrapped + explicit wrapped_truncated_normal_distribution(_Ty _Mean0 = 0. , + _Ty _Sigma0= 1. , + _Ty _A0 = std::numeric_limits< _Ty>::min(), // truncation-range lower-bound + _Ty _B0 = std::numeric_limits< _Ty>::max(), // truncation-range upper-bound + _Ty _L0 = -180. , // wrapping -range lower-bound + _Ty _H0 = 180. ) // wrapping -range upper-bound + + : _Par(_Mean0, _Sigma0, _A0, _B0, _L0, _H0), _Valid(false), _X2(0) + { // construct + } + + explicit wrapped_truncated_normal_distribution(param_type _Par0) + : _Par(_Par0), _Valid(false), _X2(0) + { // construct from parameter package + } + + _Ty mean() const + { // return mean value + return _Par.mean(); + } + + _Ty sigma() const + { // return sigma value + return _Par.sigma(); + } + + _Ty a() const + { // return truncation-range lower-bound + return _Par.a(); + } + + _Ty b() const + { // return truncation-range upper-bound + return _Par.b(); + } + + _Ty l() const + { // return wrapping-range lower-bound + return _Par.l(); + } + + _Ty h() const + { // return wrapping-range upper-bound + return _Par.h(); + } + + _Ty stddev() const + { // return sigma value + return _Par.sigma(); + } + + param_type param() const + { // return parameter package + return _Par; + } + + void param(const param_type& _Par0) + { // set parameter package + _Par= _Par0; + reset(); + } + + result_type (min)() const + { // get smallest possible result + return _Par._A; + } + + result_type (max)() const + { // get largest possible result + return _Par._B; + } + + void reset() + { // clear internal state + _Valid= false; + } + + template + result_type operator()(_Engine& _Eng) + { // return next value + return _Eval(_Eng, _Par); + } + + template + result_type operator()(_Engine& _Eng, const param_type& _Par0) + { // return next value, given parameter package + reset(); + return _Eval(_Eng, _Par0, false); + } + + template + basic_istream<_Elem, _Traits>& _Read(basic_istream<_Elem, _Traits>& _Istr) + { // read state from _Istr + _Ty _Mean0 ; + _Ty _Sigma0; + _Ty _A0 ; + _Ty _B0 ; + _Ty _L0 ; + _Ty _H0 ; + _In(_Istr, _Mean0 ); + _In(_Istr, _Sigma0); + _In(_Istr, _A0 ); + _In(_Istr, _B0 ); + _In(_Istr, _L0 ); + _In(_Istr, _H0 ); + _Par._Init(_Mean0, _Sigma0, _A0, _B0, _L0, _H0); + + _Istr >> _Valid; + _In(_Istr, _X2); + return _Istr; + } + + template + basic_ostream<_Elem, _Traits>& _Write(basic_ostream<_Elem, _Traits>& _Ostr) const + { // write state to _Ostr + _Out(_Ostr, _Par._Mean ); + _Out(_Ostr, _Par._Sigma); + _Out(_Ostr, _Par._A ); + _Out(_Ostr, _Par._B ); + _Out(_Ostr, _Par._L ); + _Out(_Ostr, _Par._H ); + + _Ostr << ' ' << _Valid; + _Out(_Ostr, _X2); + return _Ostr; + } + +private: + template + result_type _Eval(_Engine& _Eng, const param_type& _Par0, bool _Keep= true) + { + _Ty r; + + switch (_Par0._Alg) + { + case 0 : + { + normal_distribution<_Ty> nd; + do { r= nd(_Eng); } + while (r<_Par0._NA || r>_Par0._NB); + break; + } + + case 1 : + { + exponential_distribution<_Ty> ed; + _Ty a,u,z; + + do + { + a= (_Par0._NA + sqrt(Sqr(_Par0._NA)+4.))/2.; + z= ed(_Eng, a) + _Par0._NA; + u= _NRAND(_Eng, _Ty); + } + while ((u>exp(-Sqr(z-a)/2.)) || (z>_Par0._NB)); + + r= z; + break; + } + + case 2 : + { + exponential_distribution<_Ty> ed; + _Ty a,u,z; + + do + { + a= (-_Par0._NB + sqrt(Sqr(_Par0._NB)+4.))/2.; + z= ed(_Eng, a) - _Par0._NB; + u= _NRAND(_Eng, _Ty); + } + while ((u>exp(-Sqr(z-a)/2.)) || (z>-_Par0._NA)); + + r= -z; + break; + } + + default: + { + _Ty z,u,rho; + + do + { + uniform_real<_Ty> ud(_Par0._NA, _Par0._NB); + z= ud(_Eng); + u= _NRAND(_Eng, _Ty); + + if (_Par0._NA>0) rho= exp((Sqr(_Par0._NA)-Sqr(z))/2.); + else if (_Par0._NB<0) rho= exp((Sqr(_Par0._NB)-Sqr(z))/2.); + else rho= exp( -Sqr(z) /2.); + } + while (u>rho); + + r= z; + } + } + + result_type d= r * _Par0._Sigma + _Par0._Mean; // denormalize result + return Mod(d - _Par0._L, _Par0._H - _Par0._L) + _Par0._L; // wrap result + } + + int _Alg ; // which algorithm to use + param_type _Par ; + bool _Valid; + _Ty _X2 ; +}; + +template +basic_istream<_Elem, _Traits>& operator>>(basic_istream<_Elem, _Traits>& _Istr, wrapped_truncated_normal_distribution<_Ty>& _Dist) +{ // read state from _Istr + return _Dist._Read(_Istr); +} + +template +basic_ostream<_Elem, _Traits>& operator<<(basic_ostream<_Elem, _Traits>& _Ostr, const wrapped_truncated_normal_distribution<_Ty>& _Dist) +{ // write state to _Ostr + return _Dist._Write(_Ostr); +} diff --git a/thirdparty/inih/CMakeLists.txt b/thirdparty/inih/CMakeLists.txt index 72dbf5e2f..a4b78575d 100644 --- a/thirdparty/inih/CMakeLists.txt +++ b/thirdparty/inih/CMakeLists.txt @@ -9,3 +9,12 @@ set(INIH_HEADERS ini.h INIReader.h) ADD_LIBRARY(inih ${INIH_SOURCE} ${INIH_HEADERS}) set_target_properties(inih PROPERTIES FOLDER "libs") + +install(TARGETS ${PROJECT_NAME} + EXPORT MTTrackingExports + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) + +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") \ No newline at end of file diff --git a/thirdparty/pybind11/.appveyor.yml b/thirdparty/pybind11/.appveyor.yml new file mode 100644 index 000000000..8fbb72610 --- /dev/null +++ b/thirdparty/pybind11/.appveyor.yml @@ -0,0 +1,70 @@ +version: 1.0.{build} +image: +- Visual Studio 2017 +- Visual Studio 2015 +test: off +skip_branch_with_pr: true +build: + parallel: true +platform: +- x64 +- x86 +environment: + matrix: + - PYTHON: 36 + CPP: 14 + CONFIG: Debug + - PYTHON: 27 + CPP: 14 + CONFIG: Debug + - CONDA: 36 + CPP: latest + CONFIG: Release +matrix: + exclude: + - image: Visual Studio 2015 + platform: x86 + - image: Visual Studio 2015 + CPP: latest + - image: Visual Studio 2017 + CPP: latest + platform: x86 +install: +- ps: | + if ($env:PLATFORM -eq "x64") { $env:CMAKE_ARCH = "x64" } + if ($env:APPVEYOR_JOB_NAME -like "*Visual Studio 2017*") { + $env:CMAKE_GENERATOR = "Visual Studio 15 2017" + $env:CMAKE_INCLUDE_PATH = "C:\Libraries\boost_1_64_0" + $env:CXXFLAGS = "-permissive-" + } else { + $env:CMAKE_GENERATOR = "Visual Studio 14 2015" + } + if ($env:PYTHON) { + if ($env:PLATFORM -eq "x64") { $env:PYTHON = "$env:PYTHON-x64" } + $env:PATH = "C:\Python$env:PYTHON\;C:\Python$env:PYTHON\Scripts\;$env:PATH" + python -W ignore -m pip install --upgrade pip wheel + python -W ignore -m pip install pytest numpy --no-warn-script-location + } elseif ($env:CONDA) { + if ($env:CONDA -eq "27") { $env:CONDA = "" } + if ($env:PLATFORM -eq "x64") { $env:CONDA = "$env:CONDA-x64" } + $env:PATH = "C:\Miniconda$env:CONDA\;C:\Miniconda$env:CONDA\Scripts\;$env:PATH" + $env:PYTHONHOME = "C:\Miniconda$env:CONDA" + conda --version + conda install -y -q pytest numpy scipy + } +- ps: | + Start-FileDownload 'http://bitbucket.org/eigen/eigen/get/3.3.3.zip' + 7z x 3.3.3.zip -y > $null + $env:CMAKE_INCLUDE_PATH = "eigen-eigen-67e894c6cd8f;$env:CMAKE_INCLUDE_PATH" +build_script: +- cmake -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%" + -DPYBIND11_CPP_STANDARD=/std:c++%CPP% + -DPYBIND11_WERROR=ON + -DDOWNLOAD_CATCH=ON + -DCMAKE_SUPPRESS_REGENERATION=1 + . +- set MSBuildLogger="C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" +- cmake --build . --config %CONFIG% --target pytest -- /m /v:m /logger:%MSBuildLogger% +- cmake --build . --config %CONFIG% --target cpptest -- /m /v:m /logger:%MSBuildLogger% +- if "%CPP%"=="latest" (cmake --build . --config %CONFIG% --target test_cmake_build -- /m /v:m /logger:%MSBuildLogger%) +on_failure: if exist "tests\test_cmake_build" type tests\test_cmake_build\*.log* diff --git a/thirdparty/pybind11/.gitignore b/thirdparty/pybind11/.gitignore new file mode 100644 index 000000000..979fd4431 --- /dev/null +++ b/thirdparty/pybind11/.gitignore @@ -0,0 +1,38 @@ +CMakeCache.txt +CMakeFiles +Makefile +cmake_install.cmake +.DS_Store +*.so +*.pyd +*.dll +*.sln +*.sdf +*.opensdf +*.vcxproj +*.filters +example.dir +Win32 +x64 +Release +Debug +.vs +CTestTestfile.cmake +Testing +autogen +MANIFEST +/.ninja_* +/*.ninja +/docs/.build +*.py[co] +*.egg-info +*~ +.*.swp +.DS_Store +/dist +/build +/cmake/ +.cache/ +sosize-*.txt +pybind11Config*.cmake +pybind11Targets.cmake diff --git a/thirdparty/pybind11/.gitmodules b/thirdparty/pybind11/.gitmodules new file mode 100644 index 000000000..d063a8e89 --- /dev/null +++ b/thirdparty/pybind11/.gitmodules @@ -0,0 +1,3 @@ +[submodule "tools/clang"] + path = tools/clang + url = ../../wjakob/clang-cindex-python3 diff --git a/thirdparty/pybind11/.readthedocs.yml b/thirdparty/pybind11/.readthedocs.yml new file mode 100644 index 000000000..c9c61617c --- /dev/null +++ b/thirdparty/pybind11/.readthedocs.yml @@ -0,0 +1,3 @@ +python: + version: 3 +requirements_file: docs/requirements.txt diff --git a/thirdparty/pybind11/.travis.yml b/thirdparty/pybind11/.travis.yml new file mode 100644 index 000000000..d81cd8c7b --- /dev/null +++ b/thirdparty/pybind11/.travis.yml @@ -0,0 +1,306 @@ +language: cpp +matrix: + include: + # This config does a few things: + # - Checks C++ and Python code styles (check-style.sh and flake8). + # - Makes sure sphinx can build the docs without any errors or warnings. + # - Tests setup.py sdist and install (all header files should be present). + # - Makes sure that everything still works without optional deps (numpy/scipy/eigen) and + # also tests the automatic discovery functions in CMake (Python version, C++ standard). + - os: linux + dist: xenial # Necessary to run doxygen 1.8.15 + name: Style, docs, and pip + cache: false + before_install: + - pyenv global $(pyenv whence 2to3) # activate all python versions + - PY_CMD=python3 + - $PY_CMD -m pip install --user --upgrade pip wheel setuptools + install: + # breathe 4.14 doesn't work with bit fields. See https://github.com/michaeljones/breathe/issues/462 + - $PY_CMD -m pip install --user --upgrade sphinx sphinx_rtd_theme breathe==4.13.1 flake8 pep8-naming pytest + - curl -fsSL https://sourceforge.net/projects/doxygen/files/rel-1.8.15/doxygen-1.8.15.linux.bin.tar.gz/download | tar xz + - export PATH="$PWD/doxygen-1.8.15/bin:$PATH" + script: + - tools/check-style.sh + - flake8 + - $PY_CMD -m sphinx -W -b html docs docs/.build + - | + # Make sure setup.py distributes and installs all the headers + $PY_CMD setup.py sdist + $PY_CMD -m pip install --user -U ./dist/* + installed=$($PY_CMD -c "import pybind11; print(pybind11.get_include(True) + '/pybind11')") + diff -rq $installed ./include/pybind11 + - | + # Barebones build + cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) . + make pytest -j 2 && make cpptest -j 2 + # The following are regular test configurations, including optional dependencies. + # With regard to each other they differ in Python version, C++ standard and compiler. + - os: linux + dist: trusty + name: Python 2.7, c++11, gcc 4.8 + env: PYTHON=2.7 CPP=11 GCC=4.8 + addons: + apt: + packages: + - cmake=2.\* + - cmake-data=2.\* + - os: linux + dist: trusty + name: Python 3.6, c++11, gcc 4.8 + env: PYTHON=3.6 CPP=11 GCC=4.8 + addons: + apt: + sources: + - deadsnakes + packages: + - python3.6-dev + - python3.6-venv + - cmake=2.\* + - cmake-data=2.\* + - os: linux + dist: trusty + env: PYTHON=2.7 CPP=14 GCC=6 CMAKE=1 + name: Python 2.7, c++14, gcc 6, CMake test + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-6 + - os: linux + dist: trusty + name: Python 3.5, c++14, gcc 6, Debug build + # N.B. `ensurepip` could be installed transitively by `python3.5-venv`, but + # seems to have apt conflicts (at least for Trusty). Use Docker instead. + services: docker + env: DOCKER=debian:stretch PYTHON=3.5 CPP=14 GCC=6 DEBUG=1 + - os: linux + dist: xenial + env: PYTHON=3.6 CPP=17 GCC=7 + name: Python 3.6, c++17, gcc 7 + addons: + apt: + sources: + - deadsnakes + - ubuntu-toolchain-r-test + packages: + - g++-7 + - python3.6-dev + - python3.6-venv + - os: linux + dist: xenial + env: PYTHON=3.6 CPP=17 CLANG=7 + name: Python 3.6, c++17, Clang 7 + addons: + apt: + sources: + - deadsnakes + - llvm-toolchain-xenial-7 + packages: + - python3.6-dev + - python3.6-venv + - clang-7 + - libclang-7-dev + - llvm-7-dev + - lld-7 + - libc++-7-dev + - libc++abi-7-dev # Why is this necessary??? + - os: linux + dist: xenial + env: PYTHON=3.8 CPP=17 GCC=7 + name: Python 3.8, c++17, gcc 7 (w/o numpy/scipy) # TODO: update build name when the numpy/scipy wheels become available + addons: + apt: + sources: + - deadsnakes + - ubuntu-toolchain-r-test + packages: + - g++-7 + - python3.8-dev + - python3.8-venv + # Currently there is no numpy/scipy wheels available for python3.8 + # TODO: remove next before_install, install and script clause when the wheels become available + before_install: + - pyenv global $(pyenv whence 2to3) # activate all python versions + - PY_CMD=python3 + - $PY_CMD -m pip install --user --upgrade pip wheel setuptools + install: + - $PY_CMD -m pip install --user --upgrade pytest + script: + - | + # Barebones build + cmake -DCMAKE_BUILD_TYPE=Debug -DPYBIND11_WERROR=ON -DDOWNLOAD_CATCH=ON -DPYTHON_EXECUTABLE=$(which $PY_CMD) . + make pytest -j 2 && make cpptest -j 2 + - os: osx + name: Python 2.7, c++14, AppleClang 7.3, CMake test + osx_image: xcode7.3 + env: PYTHON=2.7 CPP=14 CLANG CMAKE=1 + - os: osx + name: Python 3.7, c++14, AppleClang 9, Debug build + osx_image: xcode9.4 + env: PYTHON=3.7 CPP=14 CLANG DEBUG=1 + # Test a PyPy 2.7 build + - os: linux + dist: trusty + env: PYPY=5.8 PYTHON=2.7 CPP=11 GCC=4.8 + name: PyPy 5.8, Python 2.7, c++11, gcc 4.8 + addons: + apt: + packages: + - libblas-dev + - liblapack-dev + - gfortran + # Build in 32-bit mode and tests against the CMake-installed version + - os: linux + dist: trusty + services: docker + env: DOCKER=i386/debian:stretch PYTHON=3.5 CPP=14 GCC=6 INSTALL=1 + name: Python 3.5, c++14, gcc 6, 32-bit + script: + - | + # Consolidated 32-bit Docker Build + Install + set -ex + $SCRIPT_RUN_PREFIX sh -c " + set -ex + cmake ${CMAKE_EXTRA_ARGS} -DPYBIND11_INSTALL=1 -DPYBIND11_TEST=0 . + make install + cp -a tests /pybind11-tests + mkdir /build-tests && cd /build-tests + cmake ../pybind11-tests ${CMAKE_EXTRA_ARGS} -DPYBIND11_WERROR=ON + make pytest -j 2" + set +ex +cache: + directories: + - $HOME/.local/bin + - $HOME/.local/lib + - $HOME/.local/include + - $HOME/Library/Python +before_install: +- | + # Configure build variables + set -ex + if [ "$TRAVIS_OS_NAME" = "linux" ]; then + if [ -n "$CLANG" ]; then + export CXX=clang++-$CLANG CC=clang-$CLANG + EXTRA_PACKAGES+=" clang-$CLANG llvm-$CLANG-dev" + else + if [ -z "$GCC" ]; then GCC=4.8 + else EXTRA_PACKAGES+=" g++-$GCC" + fi + export CXX=g++-$GCC CC=gcc-$GCC + fi + elif [ "$TRAVIS_OS_NAME" = "osx" ]; then + export CXX=clang++ CC=clang; + fi + if [ -n "$CPP" ]; then CPP=-std=c++$CPP; fi + if [ "${PYTHON:0:1}" = "3" ]; then PY=3; fi + if [ -n "$DEBUG" ]; then CMAKE_EXTRA_ARGS+=" -DCMAKE_BUILD_TYPE=Debug"; fi + set +ex +- | + # Initialize environment + set -ex + if [ -n "$DOCKER" ]; then + docker pull $DOCKER + + containerid=$(docker run --detach --tty \ + --volume="$PWD":/pybind11 --workdir=/pybind11 \ + --env="CC=$CC" --env="CXX=$CXX" --env="DEBIAN_FRONTEND=$DEBIAN_FRONTEND" \ + --env=GCC_COLORS=\ \ + $DOCKER) + SCRIPT_RUN_PREFIX="docker exec --tty $containerid" + $SCRIPT_RUN_PREFIX sh -c 'for s in 0 15; do sleep $s; apt-get update && apt-get -qy dist-upgrade && break; done' + else + if [ "$PYPY" = "5.8" ]; then + curl -fSL https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.8.0-linux64.tar.bz2 | tar xj + PY_CMD=$(echo `pwd`/pypy2-v5.8.0-linux64/bin/pypy) + CMAKE_EXTRA_ARGS+=" -DPYTHON_EXECUTABLE:FILEPATH=$PY_CMD" + else + PY_CMD=python$PYTHON + if [ "$TRAVIS_OS_NAME" = "osx" ]; then + if [ "$PY" = "3" ]; then + brew update && brew unlink python@2 && brew upgrade python + else + curl -fsSL https://bootstrap.pypa.io/get-pip.py | $PY_CMD - --user + fi + fi + fi + if [ "$PY" = 3 ] || [ -n "$PYPY" ]; then + $PY_CMD -m ensurepip --user + fi + $PY_CMD --version + $PY_CMD -m pip install --user --upgrade pip wheel + fi + set +ex +install: +- | + # Install dependencies + set -ex + cmake --version + if [ -n "$DOCKER" ]; then + if [ -n "$DEBUG" ]; then + PY_DEBUG="python$PYTHON-dbg python$PY-scipy-dbg" + CMAKE_EXTRA_ARGS+=" -DPYTHON_EXECUTABLE=/usr/bin/python${PYTHON}dm" + fi + $SCRIPT_RUN_PREFIX sh -c "for s in 0 15; do sleep \$s; \ + apt-get -qy --no-install-recommends install \ + $PY_DEBUG python$PYTHON-dev python$PY-pytest python$PY-scipy \ + libeigen3-dev libboost-dev cmake make ${EXTRA_PACKAGES} && break; done" + else + + if [ "$CLANG" = "7" ]; then + export CXXFLAGS="-stdlib=libc++" + fi + + export NPY_NUM_BUILD_JOBS=2 + echo "Installing pytest, numpy, scipy..." + local PIP_CMD="" + if [ -n $PYPY ]; then + # For expediency, install only versions that are available on the extra index. + travis_wait 30 \ + $PY_CMD -m pip install --user --upgrade --extra-index-url https://imaginary.ca/trusty-pypi \ + pytest numpy==1.15.4 scipy==1.2.0 + else + $PY_CMD -m pip install --user --upgrade pytest numpy scipy + fi + echo "done." + + mkdir eigen + curl -fsSL https://bitbucket.org/eigen/eigen/get/3.3.4.tar.bz2 | \ + tar --extract -j --directory=eigen --strip-components=1 + export CMAKE_INCLUDE_PATH="${CMAKE_INCLUDE_PATH:+$CMAKE_INCLUDE_PATH:}$PWD/eigen" + fi + set +ex +script: +- | + # CMake Configuration + set -ex + $SCRIPT_RUN_PREFIX cmake ${CMAKE_EXTRA_ARGS} \ + -DPYBIND11_PYTHON_VERSION=$PYTHON \ + -DPYBIND11_CPP_STANDARD=$CPP \ + -DPYBIND11_WERROR=${WERROR:-ON} \ + -DDOWNLOAD_CATCH=${DOWNLOAD_CATCH:-ON} \ + . + set +ex +- | + # pytest + set -ex + $SCRIPT_RUN_PREFIX make pytest -j 2 VERBOSE=1 + set +ex +- | + # cpptest + set -ex + $SCRIPT_RUN_PREFIX make cpptest -j 2 + set +ex +- | + # CMake Build Interface + set -ex + if [ -n "$CMAKE" ]; then $SCRIPT_RUN_PREFIX make test_cmake_build; fi + set +ex +after_failure: cat tests/test_cmake_build/*.log* +after_script: +- | + # Cleanup (Docker) + set -ex + if [ -n "$DOCKER" ]; then docker stop "$containerid"; docker rm "$containerid"; fi + set +ex diff --git a/thirdparty/pybind11/CMakeLists.txt b/thirdparty/pybind11/CMakeLists.txt new file mode 100644 index 000000000..97ee24867 --- /dev/null +++ b/thirdparty/pybind11/CMakeLists.txt @@ -0,0 +1,159 @@ +# CMakeLists.txt -- Build system for the pybind11 modules +# +# Copyright (c) 2015 Wenzel Jakob +# +# All rights reserved. Use of this source code is governed by a +# BSD-style license that can be found in the LICENSE file. + +cmake_minimum_required(VERSION 2.8.12) + +if (POLICY CMP0048) + # cmake warns if loaded from a min-3.0-required parent dir, so silence the warning: + cmake_policy(SET CMP0048 NEW) +endif() + +# CMake versions < 3.4.0 do not support try_compile/pthread checks without C as active language. +if(CMAKE_VERSION VERSION_LESS 3.4.0) + project(pybind11) +else() + project(pybind11 CXX) +endif() + +# Check if pybind11 is being used directly or via add_subdirectory +set(PYBIND11_MASTER_PROJECT OFF) +if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR) + set(PYBIND11_MASTER_PROJECT ON) +endif() + +option(PYBIND11_INSTALL "Install pybind11 header files?" ${PYBIND11_MASTER_PROJECT}) +option(PYBIND11_TEST "Build pybind11 test suite?" ${PYBIND11_MASTER_PROJECT}) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/tools") + +include(pybind11Tools) + +# Cache variables so pybind11_add_module can be used in parent projects +set(PYBIND11_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}/include" CACHE INTERNAL "") +set(PYTHON_INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS} CACHE INTERNAL "") +set(PYTHON_LIBRARIES ${PYTHON_LIBRARIES} CACHE INTERNAL "") +set(PYTHON_MODULE_PREFIX ${PYTHON_MODULE_PREFIX} CACHE INTERNAL "") +set(PYTHON_MODULE_EXTENSION ${PYTHON_MODULE_EXTENSION} CACHE INTERNAL "") +set(PYTHON_VERSION_MAJOR ${PYTHON_VERSION_MAJOR} CACHE INTERNAL "") +set(PYTHON_VERSION_MINOR ${PYTHON_VERSION_MINOR} CACHE INTERNAL "") + +# NB: when adding a header don't forget to also add it to setup.py +set(PYBIND11_HEADERS + include/pybind11/detail/class.h + include/pybind11/detail/common.h + include/pybind11/detail/descr.h + include/pybind11/detail/init.h + include/pybind11/detail/internals.h + include/pybind11/detail/typeid.h + include/pybind11/attr.h + include/pybind11/buffer_info.h + include/pybind11/cast.h + include/pybind11/chrono.h + include/pybind11/common.h + include/pybind11/complex.h + include/pybind11/options.h + include/pybind11/eigen.h + include/pybind11/embed.h + include/pybind11/eval.h + include/pybind11/functional.h + include/pybind11/numpy.h + include/pybind11/operators.h + include/pybind11/pybind11.h + include/pybind11/pytypes.h + include/pybind11/stl.h + include/pybind11/stl_bind.h +) +string(REPLACE "include/" "${CMAKE_CURRENT_SOURCE_DIR}/include/" + PYBIND11_HEADERS "${PYBIND11_HEADERS}") + +if (PYBIND11_TEST) + add_subdirectory(tests) +endif() + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +# extract project version from source +file(STRINGS "${PYBIND11_INCLUDE_DIR}/pybind11/detail/common.h" pybind11_version_defines + REGEX "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) ") +foreach(ver ${pybind11_version_defines}) + if (ver MATCHES "#define PYBIND11_VERSION_(MAJOR|MINOR|PATCH) +([^ ]+)$") + set(PYBIND11_VERSION_${CMAKE_MATCH_1} "${CMAKE_MATCH_2}" CACHE INTERNAL "") + endif() +endforeach() +set(${PROJECT_NAME}_VERSION ${PYBIND11_VERSION_MAJOR}.${PYBIND11_VERSION_MINOR}.${PYBIND11_VERSION_PATCH}) +message(STATUS "pybind11 v${${PROJECT_NAME}_VERSION}") + +option (USE_PYTHON_INCLUDE_DIR "Install pybind11 headers in Python include directory instead of default installation prefix" OFF) +if (USE_PYTHON_INCLUDE_DIR) + file(RELATIVE_PATH CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX} ${PYTHON_INCLUDE_DIRS}) +endif() + +if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) # CMake >= 3.0 + # Build an interface library target: + add_library(pybind11 INTERFACE) + add_library(pybind11::pybind11 ALIAS pybind11) # to match exported target + target_include_directories(pybind11 INTERFACE $ + $ + $) + target_compile_options(pybind11 INTERFACE $) + + add_library(module INTERFACE) + add_library(pybind11::module ALIAS module) + if(NOT MSVC) + target_compile_options(module INTERFACE -fvisibility=hidden) + endif() + target_link_libraries(module INTERFACE pybind11::pybind11) + if(WIN32 OR CYGWIN) + target_link_libraries(module INTERFACE $) + elseif(APPLE) + target_link_libraries(module INTERFACE "-undefined dynamic_lookup") + endif() + + add_library(embed INTERFACE) + add_library(pybind11::embed ALIAS embed) + target_link_libraries(embed INTERFACE pybind11::pybind11 $) +endif() + +if (PYBIND11_INSTALL) + install(DIRECTORY ${PYBIND11_INCLUDE_DIR}/pybind11 DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + # GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share". + set(PYBIND11_CMAKECONFIG_INSTALL_DIR "share/cmake/${PROJECT_NAME}" CACHE STRING "install path for pybind11Config.cmake") + + configure_package_config_file(tools/${PROJECT_NAME}Config.cmake.in + "${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + # Remove CMAKE_SIZEOF_VOID_P from ConfigVersion.cmake since the library does + # not depend on architecture specific settings or libraries. + set(_PYBIND11_CMAKE_SIZEOF_VOID_P ${CMAKE_SIZEOF_VOID_P}) + unset(CMAKE_SIZEOF_VOID_P) + write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + VERSION ${${PROJECT_NAME}_VERSION} + COMPATIBILITY AnyNewerVersion) + set(CMAKE_SIZEOF_VOID_P ${_PYBIND11_CMAKE_SIZEOF_VOID_P}) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake + tools/FindPythonLibsNew.cmake + tools/pybind11Tools.cmake + DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + + if(NOT (CMAKE_VERSION VERSION_LESS 3.0)) + if(NOT PYBIND11_EXPORT_NAME) + set(PYBIND11_EXPORT_NAME "${PROJECT_NAME}Targets") + endif() + + install(TARGETS pybind11 module embed + EXPORT "${PYBIND11_EXPORT_NAME}") + if(PYBIND11_MASTER_PROJECT) + install(EXPORT "${PYBIND11_EXPORT_NAME}" + NAMESPACE "${PROJECT_NAME}::" + DESTINATION ${PYBIND11_CMAKECONFIG_INSTALL_DIR}) + endif() + endif() +endif() + +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") \ No newline at end of file diff --git a/thirdparty/pybind11/CONTRIBUTING.md b/thirdparty/pybind11/CONTRIBUTING.md new file mode 100644 index 000000000..01596d94f --- /dev/null +++ b/thirdparty/pybind11/CONTRIBUTING.md @@ -0,0 +1,49 @@ +Thank you for your interest in this project! Please refer to the following +sections on how to contribute code and bug reports. + +### Reporting bugs + +At the moment, this project is run in the spare time of a single person +([Wenzel Jakob](http://rgl.epfl.ch/people/wjakob)) with very limited resources +for issue tracker tickets. Thus, before submitting a question or bug report, +please take a moment of your time and ensure that your issue isn't already +discussed in the project documentation provided at +[http://pybind11.readthedocs.org/en/latest](http://pybind11.readthedocs.org/en/latest). + +Assuming that you have identified a previously unknown problem or an important +question, it's essential that you submit a self-contained and minimal piece of +code that reproduces the problem. In other words: no external dependencies, +isolate the function(s) that cause breakage, submit matched and complete C++ +and Python snippets that can be easily compiled and run on my end. + +## Pull requests +Contributions are submitted, reviewed, and accepted using Github pull requests. +Please refer to [this +article](https://help.github.com/articles/using-pull-requests) for details and +adhere to the following rules to make the process as smooth as possible: + +* Make a new branch for every feature you're working on. +* Make small and clean pull requests that are easy to review but make sure they + do add value by themselves. +* Add tests for any new functionality and run the test suite (``make pytest``) + to ensure that no existing features break. +* Please run ``flake8`` and ``tools/check-style.sh`` to check your code matches + the project style. (Note that ``check-style.sh`` requires ``gawk``.) +* This project has a strong focus on providing general solutions using a + minimal amount of code, thus small pull requests are greatly preferred. + +### Licensing of contributions + +pybind11 is provided under a BSD-style license that can be found in the +``LICENSE`` file. By using, distributing, or contributing to this project, you +agree to the terms and conditions of this license. + +You are under no obligation whatsoever to provide any bug fixes, patches, or +upgrades to the features, functionality or performance of the source code +("Enhancements") to anyone; however, if you choose to make your Enhancements +available either publicly, or directly to the author of this software, without +imposing a separate written license agreement for such Enhancements, then you +hereby grant the following license: a non-exclusive, royalty-free perpetual +license to install, use, modify, prepare derivative works, incorporate into +other computer software, distribute, and sublicense such enhancements or +derivative works thereof, in binary and source code form. diff --git a/thirdparty/pybind11/ISSUE_TEMPLATE.md b/thirdparty/pybind11/ISSUE_TEMPLATE.md new file mode 100644 index 000000000..75df39981 --- /dev/null +++ b/thirdparty/pybind11/ISSUE_TEMPLATE.md @@ -0,0 +1,17 @@ +Make sure you've completed the following steps before submitting your issue -- thank you! + +1. Check if your question has already been answered in the [FAQ](http://pybind11.readthedocs.io/en/latest/faq.html) section. +2. Make sure you've read the [documentation](http://pybind11.readthedocs.io/en/latest/). Your issue may be addressed there. +3. If those resources didn't help and you only have a short question (not a bug report), consider asking in the [Gitter chat room](https://gitter.im/pybind/Lobby). +4. If you have a genuine bug report or a more complex question which is not answered in the previous items (or not suitable for chat), please fill in the details below. +5. Include a self-contained and minimal piece of code that reproduces the problem. If that's not possible, try to make the description as clear as possible. + +*After reading, remove this checklist and the template text in parentheses below.* + +## Issue description + +(Provide a short description, state the expected behavior and what actually happens.) + +## Reproducible example code + +(The code should be minimal, have no external dependencies, isolate the function(s) that cause breakage. Submit matched and complete C++ and Python snippets that can be easily compiled and run to diagnose the issue.) diff --git a/thirdparty/pybind11/LICENSE b/thirdparty/pybind11/LICENSE new file mode 100644 index 000000000..6f15578cc --- /dev/null +++ b/thirdparty/pybind11/LICENSE @@ -0,0 +1,29 @@ +Copyright (c) 2016 Wenzel Jakob , All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Please also refer to the file CONTRIBUTING.md, which clarifies licensing of +external contributions to this project including patches, pull requests, etc. diff --git a/thirdparty/pybind11/MANIFEST.in b/thirdparty/pybind11/MANIFEST.in new file mode 100644 index 000000000..6e57baeee --- /dev/null +++ b/thirdparty/pybind11/MANIFEST.in @@ -0,0 +1,2 @@ +recursive-include include/pybind11 *.h +include LICENSE README.md CONTRIBUTING.md diff --git a/thirdparty/pybind11/README.md b/thirdparty/pybind11/README.md new file mode 100644 index 000000000..35d2d76ff --- /dev/null +++ b/thirdparty/pybind11/README.md @@ -0,0 +1,129 @@ +![pybind11 logo](https://github.com/pybind/pybind11/raw/master/docs/pybind11-logo.png) + +# pybind11 — Seamless operability between C++11 and Python + +[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=master)](http://pybind11.readthedocs.org/en/master/?badge=master) +[![Documentation Status](https://readthedocs.org/projects/pybind11/badge/?version=stable)](http://pybind11.readthedocs.org/en/stable/?badge=stable) +[![Gitter chat](https://img.shields.io/gitter/room/gitterHQ/gitter.svg)](https://gitter.im/pybind/Lobby) +[![Build Status](https://travis-ci.org/pybind/pybind11.svg?branch=master)](https://travis-ci.org/pybind/pybind11) +[![Build status](https://ci.appveyor.com/api/projects/status/riaj54pn4h08xy40?svg=true)](https://ci.appveyor.com/project/wjakob/pybind11) + +**pybind11** is a lightweight header-only library that exposes C++ types in Python +and vice versa, mainly to create Python bindings of existing C++ code. Its +goals and syntax are similar to the excellent +[Boost.Python](http://www.boost.org/doc/libs/1_58_0/libs/python/doc/) library +by David Abrahams: to minimize boilerplate code in traditional extension +modules by inferring type information using compile-time introspection. + +The main issue with Boost.Python—and the reason for creating such a similar +project—is Boost. Boost is an enormously large and complex suite of utility +libraries that works with almost every C++ compiler in existence. This +compatibility has its cost: arcane template tricks and workarounds are +necessary to support the oldest and buggiest of compiler specimens. Now that +C++11-compatible compilers are widely available, this heavy machinery has +become an excessively large and unnecessary dependency. + +Think of this library as a tiny self-contained version of Boost.Python with +everything stripped away that isn't relevant for binding generation. Without +comments, the core header files only require ~4K lines of code and depend on +Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This +compact implementation was possible thanks to some of the new C++11 language +features (specifically: tuples, lambda functions and variadic templates). Since +its creation, this library has grown beyond Boost.Python in many ways, leading +to dramatically simpler binding code in many common situations. + +Tutorial and reference documentation is provided at +[http://pybind11.readthedocs.org/en/master](http://pybind11.readthedocs.org/en/master). +A PDF version of the manual is available +[here](https://media.readthedocs.org/pdf/pybind11/master/pybind11.pdf). + +## Core features +pybind11 can map the following core C++ features to Python + +- Functions accepting and returning custom data structures per value, reference, or pointer +- Instance methods and static methods +- Overloaded functions +- Instance attributes and static attributes +- Arbitrary exception types +- Enumerations +- Callbacks +- Iterators and ranges +- Custom operators +- Single and multiple inheritance +- STL data structures +- Smart pointers with reference counting like ``std::shared_ptr`` +- Internal references with correct reference counting +- C++ classes with virtual (and pure virtual) methods can be extended in Python + +## Goodies +In addition to the core functionality, pybind11 provides some extra goodies: + +- Python 2.7, 3.x, and PyPy (PyPy2.7 >= 5.7) are supported with an + implementation-agnostic interface. + +- It is possible to bind C++11 lambda functions with captured variables. The + lambda capture data is stored inside the resulting Python function object. + +- pybind11 uses C++11 move constructors and move assignment operators whenever + possible to efficiently transfer custom data types. + +- It's easy to expose the internal storage of custom data types through + Pythons' buffer protocols. This is handy e.g. for fast conversion between + C++ matrix classes like Eigen and NumPy without expensive copy operations. + +- pybind11 can automatically vectorize functions so that they are transparently + applied to all entries of one or more NumPy array arguments. + +- Python's slice-based access and assignment operations can be supported with + just a few lines of code. + +- Everything is contained in just a few header files; there is no need to link + against any additional libraries. + +- Binaries are generally smaller by a factor of at least 2 compared to + equivalent bindings generated by Boost.Python. A recent pybind11 conversion + of PyRosetta, an enormous Boost.Python binding project, + [reported](http://graylab.jhu.edu/RosettaCon2016/PyRosetta-4.pdf) a binary + size reduction of **5.4x** and compile time reduction by **5.8x**. + +- Function signatures are precomputed at compile time (using ``constexpr``), + leading to smaller binaries. + +- With little extra effort, C++ types can be pickled and unpickled similar to + regular Python objects. + +## Supported compilers + +1. Clang/LLVM 3.3 or newer (for Apple Xcode's clang, this is 5.0.0 or newer) +2. GCC 4.8 or newer +3. Microsoft Visual Studio 2015 Update 3 or newer +4. Intel C++ compiler 17 or newer (16 with pybind11 v2.0 and 15 with pybind11 v2.0 and a [workaround](https://github.com/pybind/pybind11/issues/276)) +5. Cygwin/GCC (tested on 2.5.1) + +## About + +This project was created by [Wenzel Jakob](http://rgl.epfl.ch/people/wjakob). +Significant features and/or improvements to the code were contributed by +Jonas Adler, +Lori A. Burns, +Sylvain Corlay, +Trent Houliston, +Axel Huebl, +@hulucc, +Sergey Lyskov +Johan Mabille, +Tomasz Miąsko, +Dean Moldovan, +Ben Pritchard, +Jason Rhinelander, +Boris Schäling, +Pim Schellart, +Henry Schreiner, +Ivan Smirnov, and +Patrick Stewart. + +### License + +pybind11 is provided under a BSD-style license that can be found in the +``LICENSE`` file. By using, distributing, or contributing to this project, +you agree to the terms and conditions of this license. diff --git a/thirdparty/pybind11/include/pybind11/attr.h b/thirdparty/pybind11/include/pybind11/attr.h new file mode 100644 index 000000000..6962d6fc5 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/attr.h @@ -0,0 +1,493 @@ +/* + pybind11/attr.h: Infrastructure for processing custom + type and function attributes + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "cast.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// \addtogroup annotations +/// @{ + +/// Annotation for methods +struct is_method { handle class_; is_method(const handle &c) : class_(c) { } }; + +/// Annotation for operators +struct is_operator { }; + +/// Annotation for parent scope +struct scope { handle value; scope(const handle &s) : value(s) { } }; + +/// Annotation for documentation +struct doc { const char *value; doc(const char *value) : value(value) { } }; + +/// Annotation for function names +struct name { const char *value; name(const char *value) : value(value) { } }; + +/// Annotation indicating that a function is an overload associated with a given "sibling" +struct sibling { handle value; sibling(const handle &value) : value(value.ptr()) { } }; + +/// Annotation indicating that a class derives from another given type +template struct base { + PYBIND11_DEPRECATED("base() was deprecated in favor of specifying 'T' as a template argument to class_") + base() { } +}; + +/// Keep patient alive while nurse lives +template struct keep_alive { }; + +/// Annotation indicating that a class is involved in a multiple inheritance relationship +struct multiple_inheritance { }; + +/// Annotation which enables dynamic attributes, i.e. adds `__dict__` to a class +struct dynamic_attr { }; + +/// Annotation which enables the buffer protocol for a type +struct buffer_protocol { }; + +/// Annotation which requests that a special metaclass is created for a type +struct metaclass { + handle value; + + PYBIND11_DEPRECATED("py::metaclass() is no longer required. It's turned on by default now.") + metaclass() {} + + /// Override pybind11's default metaclass + explicit metaclass(handle value) : value(value) { } +}; + +/// Annotation that marks a class as local to the module: +struct module_local { const bool value; constexpr module_local(bool v = true) : value(v) { } }; + +/// Annotation to mark enums as an arithmetic type +struct arithmetic { }; + +/** \rst + A call policy which places one or more guard variables (``Ts...``) around the function call. + + For example, this definition: + + .. code-block:: cpp + + m.def("foo", foo, py::call_guard()); + + is equivalent to the following pseudocode: + + .. code-block:: cpp + + m.def("foo", [](args...) { + T scope_guard; + return foo(args...); // forwarded arguments + }); + \endrst */ +template struct call_guard; + +template <> struct call_guard<> { using type = detail::void_type; }; + +template +struct call_guard { + static_assert(std::is_default_constructible::value, + "The guard type must be default constructible"); + + using type = T; +}; + +template +struct call_guard { + struct type { + T guard{}; // Compose multiple guard types with left-to-right default-constructor order + typename call_guard::type next{}; + }; +}; + +/// @} annotations + +NAMESPACE_BEGIN(detail) +/* Forward declarations */ +enum op_id : int; +enum op_type : int; +struct undefined_t; +template struct op_; +inline void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret); + +/// Internal data structure which holds metadata about a keyword argument +struct argument_record { + const char *name; ///< Argument name + const char *descr; ///< Human-readable version of the argument value + handle value; ///< Associated Python object + bool convert : 1; ///< True if the argument is allowed to convert when loading + bool none : 1; ///< True if None is allowed when loading + + argument_record(const char *name, const char *descr, handle value, bool convert, bool none) + : name(name), descr(descr), value(value), convert(convert), none(none) { } +}; + +/// Internal data structure which holds metadata about a bound function (signature, overloads, etc.) +struct function_record { + function_record() + : is_constructor(false), is_new_style_constructor(false), is_stateless(false), + is_operator(false), has_args(false), has_kwargs(false), is_method(false) { } + + /// Function name + char *name = nullptr; /* why no C++ strings? They generate heavier code.. */ + + // User-specified documentation string + char *doc = nullptr; + + /// Human-readable version of the function signature + char *signature = nullptr; + + /// List of registered keyword arguments + std::vector args; + + /// Pointer to lambda function which converts arguments and performs the actual call + handle (*impl) (function_call &) = nullptr; + + /// Storage for the wrapped function pointer and captured data, if any + void *data[3] = { }; + + /// Pointer to custom destructor for 'data' (if needed) + void (*free_data) (function_record *ptr) = nullptr; + + /// Return value policy associated with this function + return_value_policy policy = return_value_policy::automatic; + + /// True if name == '__init__' + bool is_constructor : 1; + + /// True if this is a new-style `__init__` defined in `detail/init.h` + bool is_new_style_constructor : 1; + + /// True if this is a stateless function pointer + bool is_stateless : 1; + + /// True if this is an operator (__add__), etc. + bool is_operator : 1; + + /// True if the function has a '*args' argument + bool has_args : 1; + + /// True if the function has a '**kwargs' argument + bool has_kwargs : 1; + + /// True if this is a method + bool is_method : 1; + + /// Number of arguments (including py::args and/or py::kwargs, if present) + std::uint16_t nargs; + + /// Python method object + PyMethodDef *def = nullptr; + + /// Python handle to the parent scope (a class or a module) + handle scope; + + /// Python handle to the sibling function representing an overload chain + handle sibling; + + /// Pointer to next overload + function_record *next = nullptr; +}; + +/// Special data structure which (temporarily) holds metadata about a bound class +struct type_record { + PYBIND11_NOINLINE type_record() + : multiple_inheritance(false), dynamic_attr(false), buffer_protocol(false), + default_holder(true), module_local(false) { } + + /// Handle to the parent scope + handle scope; + + /// Name of the class + const char *name = nullptr; + + // Pointer to RTTI type_info data structure + const std::type_info *type = nullptr; + + /// How large is the underlying C++ type? + size_t type_size = 0; + + /// What is the alignment of the underlying C++ type? + size_t type_align = 0; + + /// How large is the type's holder? + size_t holder_size = 0; + + /// The global operator new can be overridden with a class-specific variant + void *(*operator_new)(size_t) = nullptr; + + /// Function pointer to class_<..>::init_instance + void (*init_instance)(instance *, const void *) = nullptr; + + /// Function pointer to class_<..>::dealloc + void (*dealloc)(detail::value_and_holder &) = nullptr; + + /// List of base classes of the newly created type + list bases; + + /// Optional docstring + const char *doc = nullptr; + + /// Custom metaclass (optional) + handle metaclass; + + /// Multiple inheritance marker + bool multiple_inheritance : 1; + + /// Does the class manage a __dict__? + bool dynamic_attr : 1; + + /// Does the class implement the buffer protocol? + bool buffer_protocol : 1; + + /// Is the default (unique_ptr) holder type used? + bool default_holder : 1; + + /// Is the class definition local to the module shared object? + bool module_local : 1; + + PYBIND11_NOINLINE void add_base(const std::type_info &base, void *(*caster)(void *)) { + auto base_info = detail::get_type_info(base, false); + if (!base_info) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + + "\" referenced unknown base type \"" + tname + "\""); + } + + if (default_holder != base_info->default_holder) { + std::string tname(base.name()); + detail::clean_type_id(tname); + pybind11_fail("generic_type: type \"" + std::string(name) + "\" " + + (default_holder ? "does not have" : "has") + + " a non-default holder type while its base \"" + tname + "\" " + + (base_info->default_holder ? "does not" : "does")); + } + + bases.append((PyObject *) base_info->type); + + if (base_info->type->tp_dictoffset != 0) + dynamic_attr = true; + + if (caster) + base_info->implicit_casts.emplace_back(type, caster); + } +}; + +inline function_call::function_call(const function_record &f, handle p) : + func(f), parent(p) { + args.reserve(f.nargs); + args_convert.reserve(f.nargs); +} + +/// Tag for a new-style `__init__` defined in `detail/init.h` +struct is_new_style_constructor { }; + +/** + * Partial template specializations to process custom attributes provided to + * cpp_function_ and class_. These are either used to initialize the respective + * fields in the type_record and function_record data structures or executed at + * runtime to deal with custom call policies (e.g. keep_alive). + */ +template struct process_attribute; + +template struct process_attribute_default { + /// Default implementation: do nothing + static void init(const T &, function_record *) { } + static void init(const T &, type_record *) { } + static void precall(function_call &) { } + static void postcall(function_call &, handle) { } +}; + +/// Process an attribute specifying the function's name +template <> struct process_attribute : process_attribute_default { + static void init(const name &n, function_record *r) { r->name = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring +template <> struct process_attribute : process_attribute_default { + static void init(const doc &n, function_record *r) { r->doc = const_cast(n.value); } +}; + +/// Process an attribute specifying the function's docstring (provided as a C-style string) +template <> struct process_attribute : process_attribute_default { + static void init(const char *d, function_record *r) { r->doc = const_cast(d); } + static void init(const char *d, type_record *r) { r->doc = const_cast(d); } +}; +template <> struct process_attribute : process_attribute { }; + +/// Process an attribute indicating the function's return value policy +template <> struct process_attribute : process_attribute_default { + static void init(const return_value_policy &p, function_record *r) { r->policy = p; } +}; + +/// Process an attribute which indicates that this is an overloaded function associated with a given sibling +template <> struct process_attribute : process_attribute_default { + static void init(const sibling &s, function_record *r) { r->sibling = s.value; } +}; + +/// Process an attribute which indicates that this function is a method +template <> struct process_attribute : process_attribute_default { + static void init(const is_method &s, function_record *r) { r->is_method = true; r->scope = s.class_; } +}; + +/// Process an attribute which indicates the parent scope of a method +template <> struct process_attribute : process_attribute_default { + static void init(const scope &s, function_record *r) { r->scope = s.value; } +}; + +/// Process an attribute which indicates that this function is an operator +template <> struct process_attribute : process_attribute_default { + static void init(const is_operator &, function_record *r) { r->is_operator = true; } +}; + +template <> struct process_attribute : process_attribute_default { + static void init(const is_new_style_constructor &, function_record *r) { r->is_new_style_constructor = true; } +}; + +/// Process a keyword argument attribute (*without* a default value) +template <> struct process_attribute : process_attribute_default { + static void init(const arg &a, function_record *r) { + if (r->is_method && r->args.empty()) + r->args.emplace_back("self", nullptr, handle(), true /*convert*/, false /*none not allowed*/); + r->args.emplace_back(a.name, nullptr, handle(), !a.flag_noconvert, a.flag_none); + } +}; + +/// Process a keyword argument attribute (*with* a default value) +template <> struct process_attribute : process_attribute_default { + static void init(const arg_v &a, function_record *r) { + if (r->is_method && r->args.empty()) + r->args.emplace_back("self", nullptr /*descr*/, handle() /*parent*/, true /*convert*/, false /*none not allowed*/); + + if (!a.value) { +#if !defined(NDEBUG) + std::string descr("'"); + if (a.name) descr += std::string(a.name) + ": "; + descr += a.type + "'"; + if (r->is_method) { + if (r->name) + descr += " in method '" + (std::string) str(r->scope) + "." + (std::string) r->name + "'"; + else + descr += " in method of '" + (std::string) str(r->scope) + "'"; + } else if (r->name) { + descr += " in function '" + (std::string) r->name + "'"; + } + pybind11_fail("arg(): could not convert default argument " + + descr + " into a Python object (type not registered yet?)"); +#else + pybind11_fail("arg(): could not convert default argument " + "into a Python object (type not registered yet?). " + "Compile in debug mode for more information."); +#endif + } + r->args.emplace_back(a.name, a.descr, a.value.inc_ref(), !a.flag_noconvert, a.flag_none); + } +}; + +/// Process a parent class attribute. Single inheritance only (class_ itself already guarantees that) +template +struct process_attribute::value>> : process_attribute_default { + static void init(const handle &h, type_record *r) { r->bases.append(h); } +}; + +/// Process a parent class attribute (deprecated, does not support multiple inheritance) +template +struct process_attribute> : process_attribute_default> { + static void init(const base &, type_record *r) { r->add_base(typeid(T), nullptr); } +}; + +/// Process a multiple inheritance attribute +template <> +struct process_attribute : process_attribute_default { + static void init(const multiple_inheritance &, type_record *r) { r->multiple_inheritance = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const dynamic_attr &, type_record *r) { r->dynamic_attr = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const buffer_protocol &, type_record *r) { r->buffer_protocol = true; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const metaclass &m, type_record *r) { r->metaclass = m.value; } +}; + +template <> +struct process_attribute : process_attribute_default { + static void init(const module_local &l, type_record *r) { r->module_local = l.value; } +}; + +/// Process an 'arithmetic' attribute for enums (does nothing here) +template <> +struct process_attribute : process_attribute_default {}; + +template +struct process_attribute> : process_attribute_default> { }; + +/** + * Process a keep_alive call policy -- invokes keep_alive_impl during the + * pre-call handler if both Nurse, Patient != 0 and use the post-call handler + * otherwise + */ +template struct process_attribute> : public process_attribute_default> { + template = 0> + static void precall(function_call &call) { keep_alive_impl(Nurse, Patient, call, handle()); } + template = 0> + static void postcall(function_call &, handle) { } + template = 0> + static void precall(function_call &) { } + template = 0> + static void postcall(function_call &call, handle ret) { keep_alive_impl(Nurse, Patient, call, ret); } +}; + +/// Recursively iterate over variadic template arguments +template struct process_attributes { + static void init(const Args&... args, function_record *r) { + int unused[] = { 0, (process_attribute::type>::init(args, r), 0) ... }; + ignore_unused(unused); + } + static void init(const Args&... args, type_record *r) { + int unused[] = { 0, (process_attribute::type>::init(args, r), 0) ... }; + ignore_unused(unused); + } + static void precall(function_call &call) { + int unused[] = { 0, (process_attribute::type>::precall(call), 0) ... }; + ignore_unused(unused); + } + static void postcall(function_call &call, handle fn_ret) { + int unused[] = { 0, (process_attribute::type>::postcall(call, fn_ret), 0) ... }; + ignore_unused(unused); + } +}; + +template +using is_call_guard = is_instantiation; + +/// Extract the ``type`` from the first `call_guard` in `Extras...` (or `void_type` if none found) +template +using extract_guard_t = typename exactly_one_t, Extra...>::type; + +/// Check the number of named arguments at compile time +template ::value...), + size_t self = constexpr_sum(std::is_same::value...)> +constexpr bool expected_num_args(size_t nargs, bool has_args, bool has_kwargs) { + return named == 0 || (self + named + has_args + has_kwargs) == nargs; +} + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/buffer_info.h b/thirdparty/pybind11/include/pybind11/buffer_info.h new file mode 100644 index 000000000..1f4115a1f --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/buffer_info.h @@ -0,0 +1,114 @@ +/* + pybind11/buffer_info.h: Python buffer object interface + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// Information record describing a Python buffer object +struct buffer_info { + void *ptr = nullptr; // Pointer to the underlying storage + ssize_t itemsize = 0; // Size of individual items in bytes + ssize_t size = 0; // Total number of entries + std::string format; // For homogeneous buffers, this should be set to format_descriptor::format() + ssize_t ndim = 0; // Number of dimensions + std::vector shape; // Shape of the tensor (1 entry per dimension) + std::vector strides; // Number of bytes between adjacent entries (for each per dimension) + bool readonly = false; // flag to indicate if the underlying storage may be written to + + buffer_info() { } + + buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, + detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) + : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim), + shape(std::move(shape_in)), strides(std::move(strides_in)), readonly(readonly) { + if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size()) + pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length"); + for (size_t i = 0; i < (size_t) ndim; ++i) + size *= shape[i]; + } + + template + buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in, bool readonly=false) + : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in), readonly) { } + + buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size, bool readonly=false) + : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}, readonly) { } + + template + buffer_info(T *ptr, ssize_t size, bool readonly=false) + : buffer_info(ptr, sizeof(T), format_descriptor::format(), size, readonly) { } + + template + buffer_info(const T *ptr, ssize_t size, bool readonly=true) + : buffer_info(const_cast(ptr), sizeof(T), format_descriptor::format(), size, readonly) { } + + explicit buffer_info(Py_buffer *view, bool ownview = true) + : buffer_info(view->buf, view->itemsize, view->format, view->ndim, + {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}, view->readonly) { + this->view = view; + this->ownview = ownview; + } + + buffer_info(const buffer_info &) = delete; + buffer_info& operator=(const buffer_info &) = delete; + + buffer_info(buffer_info &&other) { + (*this) = std::move(other); + } + + buffer_info& operator=(buffer_info &&rhs) { + ptr = rhs.ptr; + itemsize = rhs.itemsize; + size = rhs.size; + format = std::move(rhs.format); + ndim = rhs.ndim; + shape = std::move(rhs.shape); + strides = std::move(rhs.strides); + std::swap(view, rhs.view); + std::swap(ownview, rhs.ownview); + readonly = rhs.readonly; + return *this; + } + + ~buffer_info() { + if (view && ownview) { PyBuffer_Release(view); delete view; } + } + +private: + struct private_ctr_tag { }; + + buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim, + detail::any_container &&shape_in, detail::any_container &&strides_in, bool readonly) + : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in), readonly) { } + + Py_buffer *view = nullptr; + bool ownview = false; +}; + +NAMESPACE_BEGIN(detail) + +template struct compare_buffer_info { + static bool compare(const buffer_info& b) { + return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T); + } +}; + +template struct compare_buffer_info::value>> { + static bool compare(const buffer_info& b) { + return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value || + ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) || + ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n"))); + } +}; + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/cast.h b/thirdparty/pybind11/include/pybind11/cast.h new file mode 100644 index 000000000..a0b4d1ba9 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/cast.h @@ -0,0 +1,2179 @@ +/* + pybind11/cast.h: Partial template specializations to cast between + C++ and Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pytypes.h" +#include "detail/typeid.h" +#include "detail/descr.h" +#include "detail/internals.h" +#include +#include +#include +#include + +#if defined(PYBIND11_CPP17) +# if defined(__has_include) +# if __has_include() +# define PYBIND11_HAS_STRING_VIEW +# endif +# elif defined(_MSC_VER) +# define PYBIND11_HAS_STRING_VIEW +# endif +#endif +#ifdef PYBIND11_HAS_STRING_VIEW +#include +#endif + +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +/// A life support system for temporary objects created by `type_caster::load()`. +/// Adding a patient will keep it alive up until the enclosing function returns. +class loader_life_support { +public: + /// A new patient frame is created when a function is entered + loader_life_support() { + get_internals().loader_patient_stack.push_back(nullptr); + } + + /// ... and destroyed after it returns + ~loader_life_support() { + auto &stack = get_internals().loader_patient_stack; + if (stack.empty()) + pybind11_fail("loader_life_support: internal error"); + + auto ptr = stack.back(); + stack.pop_back(); + Py_CLEAR(ptr); + + // A heuristic to reduce the stack's capacity (e.g. after long recursive calls) + if (stack.capacity() > 16 && stack.size() != 0 && stack.capacity() / stack.size() > 2) + stack.shrink_to_fit(); + } + + /// This can only be used inside a pybind11-bound function, either by `argument_loader` + /// at argument preparation time or by `py::cast()` at execution time. + PYBIND11_NOINLINE static void add_patient(handle h) { + auto &stack = get_internals().loader_patient_stack; + if (stack.empty()) + throw cast_error("When called outside a bound function, py::cast() cannot " + "do Python -> C++ conversions which require the creation " + "of temporary values"); + + auto &list_ptr = stack.back(); + if (list_ptr == nullptr) { + list_ptr = PyList_New(1); + if (!list_ptr) + pybind11_fail("loader_life_support: error allocating list"); + PyList_SET_ITEM(list_ptr, 0, h.inc_ref().ptr()); + } else { + auto result = PyList_Append(list_ptr, h.ptr()); + if (result == -1) + pybind11_fail("loader_life_support: error adding patient"); + } + } +}; + +// Gets the cache entry for the given type, creating it if necessary. The return value is the pair +// returned by emplace, i.e. an iterator for the entry and a bool set to `true` if the entry was +// just created. +inline std::pair all_type_info_get_cache(PyTypeObject *type); + +// Populates a just-created cache entry. +PYBIND11_NOINLINE inline void all_type_info_populate(PyTypeObject *t, std::vector &bases) { + std::vector check; + for (handle parent : reinterpret_borrow(t->tp_bases)) + check.push_back((PyTypeObject *) parent.ptr()); + + auto const &type_dict = get_internals().registered_types_py; + for (size_t i = 0; i < check.size(); i++) { + auto type = check[i]; + // Ignore Python2 old-style class super type: + if (!PyType_Check((PyObject *) type)) continue; + + // Check `type` in the current set of registered python types: + auto it = type_dict.find(type); + if (it != type_dict.end()) { + // We found a cache entry for it, so it's either pybind-registered or has pre-computed + // pybind bases, but we have to make sure we haven't already seen the type(s) before: we + // want to follow Python/virtual C++ rules that there should only be one instance of a + // common base. + for (auto *tinfo : it->second) { + // NB: Could use a second set here, rather than doing a linear search, but since + // having a large number of immediate pybind11-registered types seems fairly + // unlikely, that probably isn't worthwhile. + bool found = false; + for (auto *known : bases) { + if (known == tinfo) { found = true; break; } + } + if (!found) bases.push_back(tinfo); + } + } + else if (type->tp_bases) { + // It's some python type, so keep follow its bases classes to look for one or more + // registered types + if (i + 1 == check.size()) { + // When we're at the end, we can pop off the current element to avoid growing + // `check` when adding just one base (which is typical--i.e. when there is no + // multiple inheritance) + check.pop_back(); + i--; + } + for (handle parent : reinterpret_borrow(type->tp_bases)) + check.push_back((PyTypeObject *) parent.ptr()); + } + } +} + +/** + * Extracts vector of type_info pointers of pybind-registered roots of the given Python type. Will + * be just 1 pybind type for the Python type of a pybind-registered class, or for any Python-side + * derived class that uses single inheritance. Will contain as many types as required for a Python + * class that uses multiple inheritance to inherit (directly or indirectly) from multiple + * pybind-registered classes. Will be empty if neither the type nor any base classes are + * pybind-registered. + * + * The value is cached for the lifetime of the Python type. + */ +inline const std::vector &all_type_info(PyTypeObject *type) { + auto ins = all_type_info_get_cache(type); + if (ins.second) + // New cache entry: populate it + all_type_info_populate(type, ins.first->second); + + return ins.first->second; +} + +/** + * Gets a single pybind11 type info for a python type. Returns nullptr if neither the type nor any + * ancestors are pybind11-registered. Throws an exception if there are multiple bases--use + * `all_type_info` instead if you want to support multiple bases. + */ +PYBIND11_NOINLINE inline detail::type_info* get_type_info(PyTypeObject *type) { + auto &bases = all_type_info(type); + if (bases.size() == 0) + return nullptr; + if (bases.size() > 1) + pybind11_fail("pybind11::detail::get_type_info: type has multiple pybind11-registered bases"); + return bases.front(); +} + +inline detail::type_info *get_local_type_info(const std::type_index &tp) { + auto &locals = registered_local_types_cpp(); + auto it = locals.find(tp); + if (it != locals.end()) + return it->second; + return nullptr; +} + +inline detail::type_info *get_global_type_info(const std::type_index &tp) { + auto &types = get_internals().registered_types_cpp; + auto it = types.find(tp); + if (it != types.end()) + return it->second; + return nullptr; +} + +/// Return the type info for a given C++ type; on lookup failure can either throw or return nullptr. +PYBIND11_NOINLINE inline detail::type_info *get_type_info(const std::type_index &tp, + bool throw_if_missing = false) { + if (auto ltype = get_local_type_info(tp)) + return ltype; + if (auto gtype = get_global_type_info(tp)) + return gtype; + + if (throw_if_missing) { + std::string tname = tp.name(); + detail::clean_type_id(tname); + pybind11_fail("pybind11::detail::get_type_info: unable to find type info for \"" + tname + "\""); + } + return nullptr; +} + +PYBIND11_NOINLINE inline handle get_type_handle(const std::type_info &tp, bool throw_if_missing) { + detail::type_info *type_info = get_type_info(tp, throw_if_missing); + return handle(type_info ? ((PyObject *) type_info->type) : nullptr); +} + +struct value_and_holder { + instance *inst = nullptr; + size_t index = 0u; + const detail::type_info *type = nullptr; + void **vh = nullptr; + + // Main constructor for a found value/holder: + value_and_holder(instance *i, const detail::type_info *type, size_t vpos, size_t index) : + inst{i}, index{index}, type{type}, + vh{inst->simple_layout ? inst->simple_value_holder : &inst->nonsimple.values_and_holders[vpos]} + {} + + // Default constructor (used to signal a value-and-holder not found by get_value_and_holder()) + value_and_holder() {} + + // Used for past-the-end iterator + value_and_holder(size_t index) : index{index} {} + + template V *&value_ptr() const { + return reinterpret_cast(vh[0]); + } + // True if this `value_and_holder` has a non-null value pointer + explicit operator bool() const { return value_ptr(); } + + template H &holder() const { + return reinterpret_cast(vh[1]); + } + bool holder_constructed() const { + return inst->simple_layout + ? inst->simple_holder_constructed + : inst->nonsimple.status[index] & instance::status_holder_constructed; + } + void set_holder_constructed(bool v = true) { + if (inst->simple_layout) + inst->simple_holder_constructed = v; + else if (v) + inst->nonsimple.status[index] |= instance::status_holder_constructed; + else + inst->nonsimple.status[index] &= (uint8_t) ~instance::status_holder_constructed; + } + bool instance_registered() const { + return inst->simple_layout + ? inst->simple_instance_registered + : inst->nonsimple.status[index] & instance::status_instance_registered; + } + void set_instance_registered(bool v = true) { + if (inst->simple_layout) + inst->simple_instance_registered = v; + else if (v) + inst->nonsimple.status[index] |= instance::status_instance_registered; + else + inst->nonsimple.status[index] &= (uint8_t) ~instance::status_instance_registered; + } +}; + +// Container for accessing and iterating over an instance's values/holders +struct values_and_holders { +private: + instance *inst; + using type_vec = std::vector; + const type_vec &tinfo; + +public: + values_and_holders(instance *inst) : inst{inst}, tinfo(all_type_info(Py_TYPE(inst))) {} + + struct iterator { + private: + instance *inst = nullptr; + const type_vec *types = nullptr; + value_and_holder curr; + friend struct values_and_holders; + iterator(instance *inst, const type_vec *tinfo) + : inst{inst}, types{tinfo}, + curr(inst /* instance */, + types->empty() ? nullptr : (*types)[0] /* type info */, + 0, /* vpos: (non-simple types only): the first vptr comes first */ + 0 /* index */) + {} + // Past-the-end iterator: + iterator(size_t end) : curr(end) {} + public: + bool operator==(const iterator &other) { return curr.index == other.curr.index; } + bool operator!=(const iterator &other) { return curr.index != other.curr.index; } + iterator &operator++() { + if (!inst->simple_layout) + curr.vh += 1 + (*types)[curr.index]->holder_size_in_ptrs; + ++curr.index; + curr.type = curr.index < types->size() ? (*types)[curr.index] : nullptr; + return *this; + } + value_and_holder &operator*() { return curr; } + value_and_holder *operator->() { return &curr; } + }; + + iterator begin() { return iterator(inst, &tinfo); } + iterator end() { return iterator(tinfo.size()); } + + iterator find(const type_info *find_type) { + auto it = begin(), endit = end(); + while (it != endit && it->type != find_type) ++it; + return it; + } + + size_t size() { return tinfo.size(); } +}; + +/** + * Extracts C++ value and holder pointer references from an instance (which may contain multiple + * values/holders for python-side multiple inheritance) that match the given type. Throws an error + * if the given type (or ValueType, if omitted) is not a pybind11 base of the given instance. If + * `find_type` is omitted (or explicitly specified as nullptr) the first value/holder are returned, + * regardless of type (and the resulting .type will be nullptr). + * + * The returned object should be short-lived: in particular, it must not outlive the called-upon + * instance. + */ +PYBIND11_NOINLINE inline value_and_holder instance::get_value_and_holder(const type_info *find_type /*= nullptr default in common.h*/, bool throw_if_missing /*= true in common.h*/) { + // Optimize common case: + if (!find_type || Py_TYPE(this) == find_type->type) + return value_and_holder(this, find_type, 0, 0); + + detail::values_and_holders vhs(this); + auto it = vhs.find(find_type); + if (it != vhs.end()) + return *it; + + if (!throw_if_missing) + return value_and_holder(); + +#if defined(NDEBUG) + pybind11_fail("pybind11::detail::instance::get_value_and_holder: " + "type is not a pybind11 base of the given instance " + "(compile in debug mode for type details)"); +#else + pybind11_fail("pybind11::detail::instance::get_value_and_holder: `" + + std::string(find_type->type->tp_name) + "' is not a pybind11 base of the given `" + + std::string(Py_TYPE(this)->tp_name) + "' instance"); +#endif +} + +PYBIND11_NOINLINE inline void instance::allocate_layout() { + auto &tinfo = all_type_info(Py_TYPE(this)); + + const size_t n_types = tinfo.size(); + + if (n_types == 0) + pybind11_fail("instance allocation failed: new instance has no pybind11-registered base types"); + + simple_layout = + n_types == 1 && tinfo.front()->holder_size_in_ptrs <= instance_simple_holder_in_ptrs(); + + // Simple path: no python-side multiple inheritance, and a small-enough holder + if (simple_layout) { + simple_value_holder[0] = nullptr; + simple_holder_constructed = false; + simple_instance_registered = false; + } + else { // multiple base types or a too-large holder + // Allocate space to hold: [v1*][h1][v2*][h2]...[bb...] where [vN*] is a value pointer, + // [hN] is the (uninitialized) holder instance for value N, and [bb...] is a set of bool + // values that tracks whether each associated holder has been initialized. Each [block] is + // padded, if necessary, to an integer multiple of sizeof(void *). + size_t space = 0; + for (auto t : tinfo) { + space += 1; // value pointer + space += t->holder_size_in_ptrs; // holder instance + } + size_t flags_at = space; + space += size_in_ptrs(n_types); // status bytes (holder_constructed and instance_registered) + + // Allocate space for flags, values, and holders, and initialize it to 0 (flags and values, + // in particular, need to be 0). Use Python's memory allocation functions: in Python 3.6 + // they default to using pymalloc, which is designed to be efficient for small allocations + // like the one we're doing here; in earlier versions (and for larger allocations) they are + // just wrappers around malloc. +#if PY_VERSION_HEX >= 0x03050000 + nonsimple.values_and_holders = (void **) PyMem_Calloc(space, sizeof(void *)); + if (!nonsimple.values_and_holders) throw std::bad_alloc(); +#else + nonsimple.values_and_holders = (void **) PyMem_New(void *, space); + if (!nonsimple.values_and_holders) throw std::bad_alloc(); + std::memset(nonsimple.values_and_holders, 0, space * sizeof(void *)); +#endif + nonsimple.status = reinterpret_cast(&nonsimple.values_and_holders[flags_at]); + } + owned = true; +} + +PYBIND11_NOINLINE inline void instance::deallocate_layout() { + if (!simple_layout) + PyMem_Free(nonsimple.values_and_holders); +} + +PYBIND11_NOINLINE inline bool isinstance_generic(handle obj, const std::type_info &tp) { + handle type = detail::get_type_handle(tp, false); + if (!type) + return false; + return isinstance(obj, type); +} + +PYBIND11_NOINLINE inline std::string error_string() { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "Unknown internal error occurred"); + return "Unknown internal error occurred"; + } + + error_scope scope; // Preserve error state + + std::string errorString; + if (scope.type) { + errorString += handle(scope.type).attr("__name__").cast(); + errorString += ": "; + } + if (scope.value) + errorString += (std::string) str(scope.value); + + PyErr_NormalizeException(&scope.type, &scope.value, &scope.trace); + +#if PY_MAJOR_VERSION >= 3 + if (scope.trace != nullptr) + PyException_SetTraceback(scope.value, scope.trace); +#endif + +#if !defined(PYPY_VERSION) + if (scope.trace) { + PyTracebackObject *trace = (PyTracebackObject *) scope.trace; + + /* Get the deepest trace possible */ + while (trace->tb_next) + trace = trace->tb_next; + + PyFrameObject *frame = trace->tb_frame; + errorString += "\n\nAt:\n"; + while (frame) { + int lineno = PyFrame_GetLineNumber(frame); + errorString += + " " + handle(frame->f_code->co_filename).cast() + + "(" + std::to_string(lineno) + "): " + + handle(frame->f_code->co_name).cast() + "\n"; + frame = frame->f_back; + } + } +#endif + + return errorString; +} + +PYBIND11_NOINLINE inline handle get_object_handle(const void *ptr, const detail::type_info *type ) { + auto &instances = get_internals().registered_instances; + auto range = instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + for (auto vh : values_and_holders(it->second)) { + if (vh.type == type) + return handle((PyObject *) it->second); + } + } + return handle(); +} + +inline PyThreadState *get_thread_state_unchecked() { +#if defined(PYPY_VERSION) + return PyThreadState_GET(); +#elif PY_VERSION_HEX < 0x03000000 + return _PyThreadState_Current; +#elif PY_VERSION_HEX < 0x03050000 + return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current); +#elif PY_VERSION_HEX < 0x03050200 + return (PyThreadState*) _PyThreadState_Current.value; +#else + return _PyThreadState_UncheckedGet(); +#endif +} + +// Forward declarations +inline void keep_alive_impl(handle nurse, handle patient); +inline PyObject *make_new_instance(PyTypeObject *type); + +class type_caster_generic { +public: + PYBIND11_NOINLINE type_caster_generic(const std::type_info &type_info) + : typeinfo(get_type_info(type_info)), cpptype(&type_info) { } + + type_caster_generic(const type_info *typeinfo) + : typeinfo(typeinfo), cpptype(typeinfo ? typeinfo->cpptype : nullptr) { } + + bool load(handle src, bool convert) { + return load_impl(src, convert); + } + + PYBIND11_NOINLINE static handle cast(const void *_src, return_value_policy policy, handle parent, + const detail::type_info *tinfo, + void *(*copy_constructor)(const void *), + void *(*move_constructor)(const void *), + const void *existing_holder = nullptr) { + if (!tinfo) // no type info: error will be set already + return handle(); + + void *src = const_cast(_src); + if (src == nullptr) + return none().release(); + + auto it_instances = get_internals().registered_instances.equal_range(src); + for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) { + for (auto instance_type : detail::all_type_info(Py_TYPE(it_i->second))) { + if (instance_type && same_type(*instance_type->cpptype, *tinfo->cpptype)) + return handle((PyObject *) it_i->second).inc_ref(); + } + } + + auto inst = reinterpret_steal(make_new_instance(tinfo->type)); + auto wrapper = reinterpret_cast(inst.ptr()); + wrapper->owned = false; + void *&valueptr = values_and_holders(wrapper).begin()->value_ptr(); + + switch (policy) { + case return_value_policy::automatic: + case return_value_policy::take_ownership: + valueptr = src; + wrapper->owned = true; + break; + + case return_value_policy::automatic_reference: + case return_value_policy::reference: + valueptr = src; + wrapper->owned = false; + break; + + case return_value_policy::copy: + if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = copy, but type is " + "non-copyable! (compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = copy, but type " + + type_name + " is non-copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::move: + if (move_constructor) + valueptr = move_constructor(src); + else if (copy_constructor) + valueptr = copy_constructor(src); + else { +#if defined(NDEBUG) + throw cast_error("return_value_policy = move, but type is neither " + "movable nor copyable! " + "(compile in debug mode for details)"); +#else + std::string type_name(tinfo->cpptype->name()); + detail::clean_type_id(type_name); + throw cast_error("return_value_policy = move, but type " + + type_name + " is neither movable nor copyable!"); +#endif + } + wrapper->owned = true; + break; + + case return_value_policy::reference_internal: + valueptr = src; + wrapper->owned = false; + keep_alive_impl(inst, parent); + break; + + default: + throw cast_error("unhandled return_value_policy: should not happen!"); + } + + tinfo->init_instance(wrapper, existing_holder); + + return inst.release(); + } + + // Base methods for generic caster; there are overridden in copyable_holder_caster + void load_value(value_and_holder &&v_h) { + auto *&vptr = v_h.value_ptr(); + // Lazy allocation for unallocated values: + if (vptr == nullptr) { + auto *type = v_h.type ? v_h.type : typeinfo; + if (type->operator_new) { + vptr = type->operator_new(type->type_size); + } else { + #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + if (type->type_align > __STDCPP_DEFAULT_NEW_ALIGNMENT__) + vptr = ::operator new(type->type_size, + std::align_val_t(type->type_align)); + else + #endif + vptr = ::operator new(type->type_size); + } + } + value = vptr; + } + bool try_implicit_casts(handle src, bool convert) { + for (auto &cast : typeinfo->implicit_casts) { + type_caster_generic sub_caster(*cast.first); + if (sub_caster.load(src, convert)) { + value = cast.second(sub_caster.value); + return true; + } + } + return false; + } + bool try_direct_conversions(handle src) { + for (auto &converter : *typeinfo->direct_conversions) { + if (converter(src.ptr(), value)) + return true; + } + return false; + } + void check_holder_compat() {} + + PYBIND11_NOINLINE static void *local_load(PyObject *src, const type_info *ti) { + auto caster = type_caster_generic(ti); + if (caster.load(src, false)) + return caster.value; + return nullptr; + } + + /// Try to load with foreign typeinfo, if available. Used when there is no + /// native typeinfo, or when the native one wasn't able to produce a value. + PYBIND11_NOINLINE bool try_load_foreign_module_local(handle src) { + constexpr auto *local_key = PYBIND11_MODULE_LOCAL_ID; + const auto pytype = src.get_type(); + if (!hasattr(pytype, local_key)) + return false; + + type_info *foreign_typeinfo = reinterpret_borrow(getattr(pytype, local_key)); + // Only consider this foreign loader if actually foreign and is a loader of the correct cpp type + if (foreign_typeinfo->module_local_load == &local_load + || (cpptype && !same_type(*cpptype, *foreign_typeinfo->cpptype))) + return false; + + if (auto result = foreign_typeinfo->module_local_load(src.ptr(), foreign_typeinfo)) { + value = result; + return true; + } + return false; + } + + // Implementation of `load`; this takes the type of `this` so that it can dispatch the relevant + // bits of code between here and copyable_holder_caster where the two classes need different + // logic (without having to resort to virtual inheritance). + template + PYBIND11_NOINLINE bool load_impl(handle src, bool convert) { + if (!src) return false; + if (!typeinfo) return try_load_foreign_module_local(src); + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + value = nullptr; + return true; + } + + auto &this_ = static_cast(*this); + this_.check_holder_compat(); + + PyTypeObject *srctype = Py_TYPE(src.ptr()); + + // Case 1: If src is an exact type match for the target type then we can reinterpret_cast + // the instance's value pointer to the target type: + if (srctype == typeinfo->type) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder()); + return true; + } + // Case 2: We have a derived class + else if (PyType_IsSubtype(srctype, typeinfo->type)) { + auto &bases = all_type_info(srctype); + bool no_cpp_mi = typeinfo->simple_type; + + // Case 2a: the python type is a Python-inherited derived class that inherits from just + // one simple (no MI) pybind11 class, or is an exact match, so the C++ instance is of + // the right type and we can use reinterpret_cast. + // (This is essentially the same as case 2b, but because not using multiple inheritance + // is extremely common, we handle it specially to avoid the loop iterator and type + // pointer lookup overhead) + if (bases.size() == 1 && (no_cpp_mi || bases.front()->type == typeinfo->type)) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder()); + return true; + } + // Case 2b: the python type inherits from multiple C++ bases. Check the bases to see if + // we can find an exact match (or, for a simple C++ type, an inherited match); if so, we + // can safely reinterpret_cast to the relevant pointer. + else if (bases.size() > 1) { + for (auto base : bases) { + if (no_cpp_mi ? PyType_IsSubtype(base->type, typeinfo->type) : base->type == typeinfo->type) { + this_.load_value(reinterpret_cast(src.ptr())->get_value_and_holder(base)); + return true; + } + } + } + + // Case 2c: C++ multiple inheritance is involved and we couldn't find an exact type match + // in the registered bases, above, so try implicit casting (needed for proper C++ casting + // when MI is involved). + if (this_.try_implicit_casts(src, convert)) + return true; + } + + // Perform an implicit conversion + if (convert) { + for (auto &converter : typeinfo->implicit_conversions) { + auto temp = reinterpret_steal(converter(src.ptr(), typeinfo->type)); + if (load_impl(temp, false)) { + loader_life_support::add_patient(temp); + return true; + } + } + if (this_.try_direct_conversions(src)) + return true; + } + + // Failed to match local typeinfo. Try again with global. + if (typeinfo->module_local) { + if (auto gtype = get_global_type_info(*typeinfo->cpptype)) { + typeinfo = gtype; + return load(src, false); + } + } + + // Global typeinfo has precedence over foreign module_local + return try_load_foreign_module_local(src); + } + + + // Called to do type lookup and wrap the pointer and type in a pair when a dynamic_cast + // isn't needed or can't be used. If the type is unknown, sets the error and returns a pair + // with .second = nullptr. (p.first = nullptr is not an error: it becomes None). + PYBIND11_NOINLINE static std::pair src_and_type( + const void *src, const std::type_info &cast_type, const std::type_info *rtti_type = nullptr) { + if (auto *tpi = get_type_info(cast_type)) + return {src, const_cast(tpi)}; + + // Not found, set error: + std::string tname = rtti_type ? rtti_type->name() : cast_type.name(); + detail::clean_type_id(tname); + std::string msg = "Unregistered type : " + tname; + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return {nullptr, nullptr}; + } + + const type_info *typeinfo = nullptr; + const std::type_info *cpptype = nullptr; + void *value = nullptr; +}; + +/** + * Determine suitable casting operator for pointer-or-lvalue-casting type casters. The type caster + * needs to provide `operator T*()` and `operator T&()` operators. + * + * If the type supports moving the value away via an `operator T&&() &&` method, it should use + * `movable_cast_op_type` instead. + */ +template +using cast_op_type = + conditional_t>::value, + typename std::add_pointer>::type, + typename std::add_lvalue_reference>::type>; + +/** + * Determine suitable casting operator for a type caster with a movable value. Such a type caster + * needs to provide `operator T*()`, `operator T&()`, and `operator T&&() &&`. The latter will be + * called in appropriate contexts where the value can be moved rather than copied. + * + * These operator are automatically provided when using the PYBIND11_TYPE_CASTER macro. + */ +template +using movable_cast_op_type = + conditional_t::type>::value, + typename std::add_pointer>::type, + conditional_t::value, + typename std::add_rvalue_reference>::type, + typename std::add_lvalue_reference>::type>>; + +// std::is_copy_constructible isn't quite enough: it lets std::vector (and similar) through when +// T is non-copyable, but code containing such a copy constructor fails to actually compile. +template struct is_copy_constructible : std::is_copy_constructible {}; + +// Specialization for types that appear to be copy constructible but also look like stl containers +// (we specifically check for: has `value_type` and `reference` with `reference = value_type&`): if +// so, copy constructability depends on whether the value_type is copy constructible. +template struct is_copy_constructible, + std::is_same, + // Avoid infinite recursion + negation> + >::value>> : is_copy_constructible {}; + +// Likewise for std::pair +// (after C++17 it is mandatory that the copy constructor not exist when the two types aren't themselves +// copy constructible, but this can not be relied upon when T1 or T2 are themselves containers). +template struct is_copy_constructible> + : all_of, is_copy_constructible> {}; + +// The same problems arise with std::is_copy_assignable, so we use the same workaround. +template struct is_copy_assignable : std::is_copy_assignable {}; +template struct is_copy_assignable, + std::is_same + >::value>> : is_copy_assignable {}; +template struct is_copy_assignable> + : all_of, is_copy_assignable> {}; + +NAMESPACE_END(detail) + +// polymorphic_type_hook::get(src, tinfo) determines whether the object pointed +// to by `src` actually is an instance of some class derived from `itype`. +// If so, it sets `tinfo` to point to the std::type_info representing that derived +// type, and returns a pointer to the start of the most-derived object of that type +// (in which `src` is a subobject; this will be the same address as `src` in most +// single inheritance cases). If not, or if `src` is nullptr, it simply returns `src` +// and leaves `tinfo` at its default value of nullptr. +// +// The default polymorphic_type_hook just returns src. A specialization for polymorphic +// types determines the runtime type of the passed object and adjusts the this-pointer +// appropriately via dynamic_cast. This is what enables a C++ Animal* to appear +// to Python as a Dog (if Dog inherits from Animal, Animal is polymorphic, Dog is +// registered with pybind11, and this Animal is in fact a Dog). +// +// You may specialize polymorphic_type_hook yourself for types that want to appear +// polymorphic to Python but do not use C++ RTTI. (This is a not uncommon pattern +// in performance-sensitive applications, used most notably in LLVM.) +template +struct polymorphic_type_hook +{ + static const void *get(const itype *src, const std::type_info*&) { return src; } +}; +template +struct polymorphic_type_hook::value>> +{ + static const void *get(const itype *src, const std::type_info*& type) { + type = src ? &typeid(*src) : nullptr; + return dynamic_cast(src); + } +}; + +NAMESPACE_BEGIN(detail) + +/// Generic type caster for objects stored on the heap +template class type_caster_base : public type_caster_generic { + using itype = intrinsic_t; + +public: + static constexpr auto name = _(); + + type_caster_base() : type_caster_base(typeid(type)) { } + explicit type_caster_base(const std::type_info &info) : type_caster_generic(info) { } + + static handle cast(const itype &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast(&src, policy, parent); + } + + static handle cast(itype &&src, return_value_policy, handle parent) { + return cast(&src, return_value_policy::move, parent); + } + + // Returns a (pointer, type_info) pair taking care of necessary type lookup for a + // polymorphic type (using RTTI by default, but can be overridden by specializing + // polymorphic_type_hook). If the instance isn't derived, returns the base version. + static std::pair src_and_type(const itype *src) { + auto &cast_type = typeid(itype); + const std::type_info *instance_type = nullptr; + const void *vsrc = polymorphic_type_hook::get(src, instance_type); + if (instance_type && !same_type(cast_type, *instance_type)) { + // This is a base pointer to a derived type. If the derived type is registered + // with pybind11, we want to make the full derived object available. + // In the typical case where itype is polymorphic, we get the correct + // derived pointer (which may be != base pointer) by a dynamic_cast to + // most derived type. If itype is not polymorphic, we won't get here + // except via a user-provided specialization of polymorphic_type_hook, + // and the user has promised that no this-pointer adjustment is + // required in that case, so it's OK to use static_cast. + if (const auto *tpi = get_type_info(*instance_type)) + return {vsrc, tpi}; + } + // Otherwise we have either a nullptr, an `itype` pointer, or an unknown derived pointer, so + // don't do a cast + return type_caster_generic::src_and_type(src, cast_type, instance_type); + } + + static handle cast(const itype *src, return_value_policy policy, handle parent) { + auto st = src_and_type(src); + return type_caster_generic::cast( + st.first, policy, parent, st.second, + make_copy_constructor(src), make_move_constructor(src)); + } + + static handle cast_holder(const itype *src, const void *holder) { + auto st = src_and_type(src); + return type_caster_generic::cast( + st.first, return_value_policy::take_ownership, {}, st.second, + nullptr, nullptr, holder); + } + + template using cast_op_type = detail::cast_op_type; + + operator itype*() { return (type *) value; } + operator itype&() { if (!value) throw reference_cast_error(); return *((itype *) value); } + +protected: + using Constructor = void *(*)(const void *); + + /* Only enabled when the types are {copy,move}-constructible *and* when the type + does not have a private operator new implementation. */ + template ::value>> + static auto make_copy_constructor(const T *x) -> decltype(new T(*x), Constructor{}) { + return [](const void *arg) -> void * { + return new T(*reinterpret_cast(arg)); + }; + } + + template ::value>> + static auto make_move_constructor(const T *x) -> decltype(new T(std::move(*const_cast(x))), Constructor{}) { + return [](const void *arg) -> void * { + return new T(std::move(*const_cast(reinterpret_cast(arg)))); + }; + } + + static Constructor make_copy_constructor(...) { return nullptr; } + static Constructor make_move_constructor(...) { return nullptr; } +}; + +template class type_caster : public type_caster_base { }; +template using make_caster = type_caster>; + +// Shortcut for calling a caster's `cast_op_type` cast operator for casting a type_caster to a T +template typename make_caster::template cast_op_type cast_op(make_caster &caster) { + return caster.operator typename make_caster::template cast_op_type(); +} +template typename make_caster::template cast_op_type::type> +cast_op(make_caster &&caster) { + return std::move(caster).operator + typename make_caster::template cast_op_type::type>(); +} + +template class type_caster> { +private: + using caster_t = make_caster; + caster_t subcaster; + using subcaster_cast_op_type = typename caster_t::template cast_op_type; + static_assert(std::is_same::type &, subcaster_cast_op_type>::value, + "std::reference_wrapper caster requires T to have a caster with an `T &` operator"); +public: + bool load(handle src, bool convert) { return subcaster.load(src, convert); } + static constexpr auto name = caster_t::name; + static handle cast(const std::reference_wrapper &src, return_value_policy policy, handle parent) { + // It is definitely wrong to take ownership of this pointer, so mask that rvp + if (policy == return_value_policy::take_ownership || policy == return_value_policy::automatic) + policy = return_value_policy::automatic_reference; + return caster_t::cast(&src.get(), policy, parent); + } + template using cast_op_type = std::reference_wrapper; + operator std::reference_wrapper() { return subcaster.operator subcaster_cast_op_type&(); } +}; + +#define PYBIND11_TYPE_CASTER(type, py_name) \ + protected: \ + type value; \ + public: \ + static constexpr auto name = py_name; \ + template >::value, int> = 0> \ + static handle cast(T_ *src, return_value_policy policy, handle parent) { \ + if (!src) return none().release(); \ + if (policy == return_value_policy::take_ownership) { \ + auto h = cast(std::move(*src), policy, parent); delete src; return h; \ + } else { \ + return cast(*src, policy, parent); \ + } \ + } \ + operator type*() { return &value; } \ + operator type&() { return value; } \ + operator type&&() && { return std::move(value); } \ + template using cast_op_type = pybind11::detail::movable_cast_op_type + + +template using is_std_char_type = any_of< + std::is_same, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif + std::is_same, /* std::u16string */ + std::is_same, /* std::u32string */ + std::is_same /* std::wstring */ +>; + +template +struct type_caster::value && !is_std_char_type::value>> { + using _py_type_0 = conditional_t; + using _py_type_1 = conditional_t::value, _py_type_0, typename std::make_unsigned<_py_type_0>::type>; + using py_type = conditional_t::value, double, _py_type_1>; +public: + + bool load(handle src, bool convert) { + py_type py_value; + + if (!src) + return false; + + if (std::is_floating_point::value) { + if (convert || PyFloat_Check(src.ptr())) + py_value = (py_type) PyFloat_AsDouble(src.ptr()); + else + return false; + } else if (PyFloat_Check(src.ptr())) { + return false; + } else if (std::is_unsigned::value) { + py_value = as_unsigned(src.ptr()); + } else { // signed integer: + py_value = sizeof(T) <= sizeof(long) + ? (py_type) PyLong_AsLong(src.ptr()) + : (py_type) PYBIND11_LONG_AS_LONGLONG(src.ptr()); + } + + bool py_err = py_value == (py_type) -1 && PyErr_Occurred(); + + // Protect std::numeric_limits::min/max with parentheses + if (py_err || (std::is_integral::value && sizeof(py_type) != sizeof(T) && + (py_value < (py_type) (std::numeric_limits::min)() || + py_value > (py_type) (std::numeric_limits::max)()))) { + bool type_error = py_err && PyErr_ExceptionMatches( +#if PY_VERSION_HEX < 0x03000000 && !defined(PYPY_VERSION) + PyExc_SystemError +#else + PyExc_TypeError +#endif + ); + PyErr_Clear(); + if (type_error && convert && PyNumber_Check(src.ptr())) { + auto tmp = reinterpret_steal(std::is_floating_point::value + ? PyNumber_Float(src.ptr()) + : PyNumber_Long(src.ptr())); + PyErr_Clear(); + return load(tmp, false); + } + return false; + } + + value = (T) py_value; + return true; + } + + template + static typename std::enable_if::value, handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyFloat_FromDouble((double) src); + } + + template + static typename std::enable_if::value && std::is_signed::value && (sizeof(U) <= sizeof(long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_SIGNED((long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) <= sizeof(unsigned long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PYBIND11_LONG_FROM_UNSIGNED((unsigned long) src); + } + + template + static typename std::enable_if::value && std::is_signed::value && (sizeof(U) > sizeof(long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromLongLong((long long) src); + } + + template + static typename std::enable_if::value && std::is_unsigned::value && (sizeof(U) > sizeof(unsigned long)), handle>::type + cast(U src, return_value_policy /* policy */, handle /* parent */) { + return PyLong_FromUnsignedLongLong((unsigned long long) src); + } + + PYBIND11_TYPE_CASTER(T, _::value>("int", "float")); +}; + +template struct void_caster { +public: + bool load(handle src, bool) { + if (src && src.is_none()) + return true; + return false; + } + static handle cast(T, return_value_policy /* policy */, handle /* parent */) { + return none().inc_ref(); + } + PYBIND11_TYPE_CASTER(T, _("None")); +}; + +template <> class type_caster : public void_caster {}; + +template <> class type_caster : public type_caster { +public: + using type_caster::cast; + + bool load(handle h, bool) { + if (!h) { + return false; + } else if (h.is_none()) { + value = nullptr; + return true; + } + + /* Check if this is a capsule */ + if (isinstance(h)) { + value = reinterpret_borrow(h); + return true; + } + + /* Check if this is a C++ type */ + auto &bases = all_type_info((PyTypeObject *) h.get_type().ptr()); + if (bases.size() == 1) { // Only allowing loading from a single-value type + value = values_and_holders(reinterpret_cast(h.ptr())).begin()->value_ptr(); + return true; + } + + /* Fail */ + return false; + } + + static handle cast(const void *ptr, return_value_policy /* policy */, handle /* parent */) { + if (ptr) + return capsule(ptr).release(); + else + return none().inc_ref(); + } + + template using cast_op_type = void*&; + operator void *&() { return value; } + static constexpr auto name = _("capsule"); +private: + void *value = nullptr; +}; + +template <> class type_caster : public void_caster { }; + +template <> class type_caster { +public: + bool load(handle src, bool convert) { + if (!src) return false; + else if (src.ptr() == Py_True) { value = true; return true; } + else if (src.ptr() == Py_False) { value = false; return true; } + else if (convert || !strcmp("numpy.bool_", Py_TYPE(src.ptr())->tp_name)) { + // (allow non-implicit conversion for numpy booleans) + + Py_ssize_t res = -1; + if (src.is_none()) { + res = 0; // None is implicitly converted to False + } + #if defined(PYPY_VERSION) + // On PyPy, check that "__bool__" (or "__nonzero__" on Python 2.7) attr exists + else if (hasattr(src, PYBIND11_BOOL_ATTR)) { + res = PyObject_IsTrue(src.ptr()); + } + #else + // Alternate approach for CPython: this does the same as the above, but optimized + // using the CPython API so as to avoid an unneeded attribute lookup. + else if (auto tp_as_number = src.ptr()->ob_type->tp_as_number) { + if (PYBIND11_NB_BOOL(tp_as_number)) { + res = (*PYBIND11_NB_BOOL(tp_as_number))(src.ptr()); + } + } + #endif + if (res == 0 || res == 1) { + value = (bool) res; + return true; + } else { + PyErr_Clear(); + } + } + return false; + } + static handle cast(bool src, return_value_policy /* policy */, handle /* parent */) { + return handle(src ? Py_True : Py_False).inc_ref(); + } + PYBIND11_TYPE_CASTER(bool, _("bool")); +}; + +// Helper class for UTF-{8,16,32} C++ stl strings: +template struct string_caster { + using CharT = typename StringType::value_type; + + // Simplify life by being able to assume standard char sizes (the standard only guarantees + // minimums, but Python requires exact sizes) + static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1"); +#endif + static_assert(!std::is_same::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2"); + static_assert(!std::is_same::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4"); + // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) + static_assert(!std::is_same::value || sizeof(CharT) == 2 || sizeof(CharT) == 4, + "Unsupported wchar_t size != 2/4"); + static constexpr size_t UTF_N = 8 * sizeof(CharT); + + bool load(handle src, bool) { +#if PY_MAJOR_VERSION < 3 + object temp; +#endif + handle load_src = src; + if (!src) { + return false; + } else if (!PyUnicode_Check(load_src.ptr())) { +#if PY_MAJOR_VERSION >= 3 + return load_bytes(load_src); +#else + if (std::is_same::value) { + return load_bytes(load_src); + } + + // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false + if (!PYBIND11_BYTES_CHECK(load_src.ptr())) + return false; + + temp = reinterpret_steal(PyUnicode_FromObject(load_src.ptr())); + if (!temp) { PyErr_Clear(); return false; } + load_src = temp; +#endif + } + + object utfNbytes = reinterpret_steal(PyUnicode_AsEncodedString( + load_src.ptr(), UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr)); + if (!utfNbytes) { PyErr_Clear(); return false; } + + const CharT *buffer = reinterpret_cast(PYBIND11_BYTES_AS_STRING(utfNbytes.ptr())); + size_t length = (size_t) PYBIND11_BYTES_SIZE(utfNbytes.ptr()) / sizeof(CharT); + if (UTF_N > 8) { buffer++; length--; } // Skip BOM for UTF-16/32 + value = StringType(buffer, length); + + // If we're loading a string_view we need to keep the encoded Python object alive: + if (IsView) + loader_life_support::add_patient(utfNbytes); + + return true; + } + + static handle cast(const StringType &src, return_value_policy /* policy */, handle /* parent */) { + const char *buffer = reinterpret_cast(src.data()); + ssize_t nbytes = ssize_t(src.size() * sizeof(CharT)); + handle s = decode_utfN(buffer, nbytes); + if (!s) throw error_already_set(); + return s; + } + + PYBIND11_TYPE_CASTER(StringType, _(PYBIND11_STRING_NAME)); + +private: + static handle decode_utfN(const char *buffer, ssize_t nbytes) { +#if !defined(PYPY_VERSION) + return + UTF_N == 8 ? PyUnicode_DecodeUTF8(buffer, nbytes, nullptr) : + UTF_N == 16 ? PyUnicode_DecodeUTF16(buffer, nbytes, nullptr, nullptr) : + PyUnicode_DecodeUTF32(buffer, nbytes, nullptr, nullptr); +#else + // PyPy seems to have multiple problems related to PyUnicode_UTF*: the UTF8 version + // sometimes segfaults for unknown reasons, while the UTF16 and 32 versions require a + // non-const char * arguments, which is also a nuisance, so bypass the whole thing by just + // passing the encoding as a string value, which works properly: + return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr); +#endif + } + + // When loading into a std::string or char*, accept a bytes object as-is (i.e. + // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. + // which supports loading a unicode from a str, doesn't take this path. + template + bool load_bytes(enable_if_t::value, handle> src) { + if (PYBIND11_BYTES_CHECK(src.ptr())) { + // We were passed a Python 3 raw bytes; accept it into a std::string or char* + // without any encoding attempt. + const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr()); + if (bytes) { + value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr())); + return true; + } + } + + return false; + } + + template + bool load_bytes(enable_if_t::value, handle>) { return false; } +}; + +template +struct type_caster, enable_if_t::value>> + : string_caster> {}; + +#ifdef PYBIND11_HAS_STRING_VIEW +template +struct type_caster, enable_if_t::value>> + : string_caster, true> {}; +#endif + +// Type caster for C-style strings. We basically use a std::string type caster, but also add the +// ability to use None as a nullptr char* (which the string caster doesn't allow). +template struct type_caster::value>> { + using StringType = std::basic_string; + using StringCaster = type_caster; + StringCaster str_caster; + bool none = false; + CharT one_char = 0; +public: + bool load(handle src, bool convert) { + if (!src) return false; + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + none = true; + return true; + } + return str_caster.load(src, convert); + } + + static handle cast(const CharT *src, return_value_policy policy, handle parent) { + if (src == nullptr) return pybind11::none().inc_ref(); + return StringCaster::cast(StringType(src), policy, parent); + } + + static handle cast(CharT src, return_value_policy policy, handle parent) { + if (std::is_same::value) { + handle s = PyUnicode_DecodeLatin1((const char *) &src, 1, nullptr); + if (!s) throw error_already_set(); + return s; + } + return StringCaster::cast(StringType(1, src), policy, parent); + } + + operator CharT*() { return none ? nullptr : const_cast(static_cast(str_caster).c_str()); } + operator CharT&() { + if (none) + throw value_error("Cannot convert None to a character"); + + auto &value = static_cast(str_caster); + size_t str_len = value.size(); + if (str_len == 0) + throw value_error("Cannot convert empty string to a character"); + + // If we're in UTF-8 mode, we have two possible failures: one for a unicode character that + // is too high, and one for multiple unicode characters (caught later), so we need to figure + // out how long the first encoded character is in bytes to distinguish between these two + // errors. We also allow want to allow unicode characters U+0080 through U+00FF, as those + // can fit into a single char value. + if (StringCaster::UTF_N == 8 && str_len > 1 && str_len <= 4) { + unsigned char v0 = static_cast(value[0]); + size_t char0_bytes = !(v0 & 0x80) ? 1 : // low bits only: 0-127 + (v0 & 0xE0) == 0xC0 ? 2 : // 0b110xxxxx - start of 2-byte sequence + (v0 & 0xF0) == 0xE0 ? 3 : // 0b1110xxxx - start of 3-byte sequence + 4; // 0b11110xxx - start of 4-byte sequence + + if (char0_bytes == str_len) { + // If we have a 128-255 value, we can decode it into a single char: + if (char0_bytes == 2 && (v0 & 0xFC) == 0xC0) { // 0x110000xx 0x10xxxxxx + one_char = static_cast(((v0 & 3) << 6) + (static_cast(value[1]) & 0x3F)); + return one_char; + } + // Otherwise we have a single character, but it's > U+00FF + throw value_error("Character code point not in range(0x100)"); + } + } + + // UTF-16 is much easier: we can only have a surrogate pair for values above U+FFFF, thus a + // surrogate pair with total length 2 instantly indicates a range error (but not a "your + // string was too long" error). + else if (StringCaster::UTF_N == 16 && str_len == 2) { + one_char = static_cast(value[0]); + if (one_char >= 0xD800 && one_char < 0xE000) + throw value_error("Character code point not in range(0x10000)"); + } + + if (str_len != 1) + throw value_error("Expected a character, but multi-character string found"); + + one_char = value[0]; + return one_char; + } + + static constexpr auto name = _(PYBIND11_STRING_NAME); + template using cast_op_type = pybind11::detail::cast_op_type<_T>; +}; + +// Base implementation for std::tuple and std::pair +template class Tuple, typename... Ts> class tuple_caster { + using type = Tuple; + static constexpr auto size = sizeof...(Ts); + using indices = make_index_sequence; +public: + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + const auto seq = reinterpret_borrow(src); + if (seq.size() != size) + return false; + return load_impl(seq, convert, indices{}); + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + return cast_impl(std::forward(src), policy, parent, indices{}); + } + + static constexpr auto name = _("Tuple[") + concat(make_caster::name...) + _("]"); + + template using cast_op_type = type; + + operator type() & { return implicit_cast(indices{}); } + operator type() && { return std::move(*this).implicit_cast(indices{}); } + +protected: + template + type implicit_cast(index_sequence) & { return type(cast_op(std::get(subcasters))...); } + template + type implicit_cast(index_sequence) && { return type(cast_op(std::move(std::get(subcasters)))...); } + + static constexpr bool load_impl(const sequence &, bool, index_sequence<>) { return true; } + + template + bool load_impl(const sequence &seq, bool convert, index_sequence) { +#ifdef __cpp_fold_expressions + if ((... || !std::get(subcasters).load(seq[Is], convert))) + return false; +#else + for (bool r : {std::get(subcasters).load(seq[Is], convert)...}) + if (!r) + return false; +#endif + return true; + } + + /* Implementation: Convert a C++ tuple into a Python tuple */ + template + static handle cast_impl(T &&src, return_value_policy policy, handle parent, index_sequence) { + std::array entries{{ + reinterpret_steal(make_caster::cast(std::get(std::forward(src)), policy, parent))... + }}; + for (const auto &entry: entries) + if (!entry) + return handle(); + tuple result(size); + int counter = 0; + for (auto & entry: entries) + PyTuple_SET_ITEM(result.ptr(), counter++, entry.release().ptr()); + return result.release(); + } + + Tuple...> subcasters; +}; + +template class type_caster> + : public tuple_caster {}; + +template class type_caster> + : public tuple_caster {}; + +/// Helper class which abstracts away certain actions. Users can provide specializations for +/// custom holders, but it's only necessary if the type has a non-standard interface. +template +struct holder_helper { + static auto get(const T &p) -> decltype(p.get()) { return p.get(); } +}; + +/// Type caster for holder types like std::shared_ptr, etc. +template +struct copyable_holder_caster : public type_caster_base { +public: + using base = type_caster_base; + static_assert(std::is_base_of>::value, + "Holder classes are only supported for custom types"); + using base::base; + using base::cast; + using base::typeinfo; + using base::value; + + bool load(handle src, bool convert) { + return base::template load_impl>(src, convert); + } + + explicit operator type*() { return this->value; } + explicit operator type&() { return *(this->value); } + explicit operator holder_type*() { return std::addressof(holder); } + + // Workaround for Intel compiler bug + // see pybind11 issue 94 + #if defined(__ICC) || defined(__INTEL_COMPILER) + operator holder_type&() { return holder; } + #else + explicit operator holder_type&() { return holder; } + #endif + + static handle cast(const holder_type &src, return_value_policy, handle) { + const auto *ptr = holder_helper::get(src); + return type_caster_base::cast_holder(ptr, &src); + } + +protected: + friend class type_caster_generic; + void check_holder_compat() { + if (typeinfo->default_holder) + throw cast_error("Unable to load a custom holder type from a default-holder instance"); + } + + bool load_value(value_and_holder &&v_h) { + if (v_h.holder_constructed()) { + value = v_h.value_ptr(); + holder = v_h.template holder(); + return true; + } else { + throw cast_error("Unable to cast from non-held to held instance (T& to Holder) " +#if defined(NDEBUG) + "(compile in debug mode for type information)"); +#else + "of type '" + type_id() + "''"); +#endif + } + } + + template ::value, int> = 0> + bool try_implicit_casts(handle, bool) { return false; } + + template ::value, int> = 0> + bool try_implicit_casts(handle src, bool convert) { + for (auto &cast : typeinfo->implicit_casts) { + copyable_holder_caster sub_caster(*cast.first); + if (sub_caster.load(src, convert)) { + value = cast.second(sub_caster.value); + holder = holder_type(sub_caster.holder, (type *) value); + return true; + } + } + return false; + } + + static bool try_direct_conversions(handle) { return false; } + + + holder_type holder; +}; + +/// Specialize for the common std::shared_ptr, so users don't need to +template +class type_caster> : public copyable_holder_caster> { }; + +template +struct move_only_holder_caster { + static_assert(std::is_base_of, type_caster>::value, + "Holder classes are only supported for custom types"); + + static handle cast(holder_type &&src, return_value_policy, handle) { + auto *ptr = holder_helper::get(src); + return type_caster_base::cast_holder(ptr, std::addressof(src)); + } + static constexpr auto name = type_caster_base::name; +}; + +template +class type_caster> + : public move_only_holder_caster> { }; + +template +using type_caster_holder = conditional_t::value, + copyable_holder_caster, + move_only_holder_caster>; + +template struct always_construct_holder { static constexpr bool value = Value; }; + +/// Create a specialization for custom holder types (silently ignores std::shared_ptr) +#define PYBIND11_DECLARE_HOLDER_TYPE(type, holder_type, ...) \ + namespace pybind11 { namespace detail { \ + template \ + struct always_construct_holder : always_construct_holder { }; \ + template \ + class type_caster::value>> \ + : public type_caster_holder { }; \ + }} + +// PYBIND11_DECLARE_HOLDER_TYPE holder types: +template struct is_holder_type : + std::is_base_of, detail::type_caster> {}; +// Specialization for always-supported unique_ptr holders: +template struct is_holder_type> : + std::true_type {}; + +template struct handle_type_name { static constexpr auto name = _(); }; +template <> struct handle_type_name { static constexpr auto name = _(PYBIND11_BYTES_NAME); }; +template <> struct handle_type_name { static constexpr auto name = _("*args"); }; +template <> struct handle_type_name { static constexpr auto name = _("**kwargs"); }; + +template +struct pyobject_caster { + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { value = src; return static_cast(value); } + + template ::value, int> = 0> + bool load(handle src, bool /* convert */) { + if (!isinstance(src)) + return false; + value = reinterpret_borrow(src); + return true; + } + + static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) { + return src.inc_ref(); + } + PYBIND11_TYPE_CASTER(type, handle_type_name::name); +}; + +template +class type_caster::value>> : public pyobject_caster { }; + +// Our conditions for enabling moving are quite restrictive: +// At compile time: +// - T needs to be a non-const, non-pointer, non-reference type +// - type_caster::operator T&() must exist +// - the type must be move constructible (obviously) +// At run-time: +// - if the type is non-copy-constructible, the object must be the sole owner of the type (i.e. it +// must have ref_count() == 1)h +// If any of the above are not satisfied, we fall back to copying. +template using move_is_plain_type = satisfies_none_of; +template struct move_always : std::false_type {}; +template struct move_always, + negation>, + std::is_move_constructible, + std::is_same>().operator T&()), T&> +>::value>> : std::true_type {}; +template struct move_if_unreferenced : std::false_type {}; +template struct move_if_unreferenced, + negation>, + std::is_move_constructible, + std::is_same>().operator T&()), T&> +>::value>> : std::true_type {}; +template using move_never = none_of, move_if_unreferenced>; + +// Detect whether returning a `type` from a cast on type's type_caster is going to result in a +// reference or pointer to a local variable of the type_caster. Basically, only +// non-reference/pointer `type`s and reference/pointers from a type_caster_generic are safe; +// everything else returns a reference/pointer to a local variable. +template using cast_is_temporary_value_reference = bool_constant< + (std::is_reference::value || std::is_pointer::value) && + !std::is_base_of>::value && + !std::is_same, void>::value +>; + +// When a value returned from a C++ function is being cast back to Python, we almost always want to +// force `policy = move`, regardless of the return value policy the function/method was declared +// with. +template struct return_value_policy_override { + static return_value_policy policy(return_value_policy p) { return p; } +}; + +template struct return_value_policy_override>::value, void>> { + static return_value_policy policy(return_value_policy p) { + return !std::is_lvalue_reference::value && + !std::is_pointer::value + ? return_value_policy::move : p; + } +}; + +// Basic python -> C++ casting; throws if casting fails +template type_caster &load_type(type_caster &conv, const handle &handle) { + if (!conv.load(handle, true)) { +#if defined(NDEBUG) + throw cast_error("Unable to cast Python instance to C++ type (compile in debug mode for details)"); +#else + throw cast_error("Unable to cast Python instance of type " + + (std::string) str(handle.get_type()) + " to C++ type '" + type_id() + "'"); +#endif + } + return conv; +} +// Wrapper around the above that also constructs and returns a type_caster +template make_caster load_type(const handle &handle) { + make_caster conv; + load_type(conv, handle); + return conv; +} + +NAMESPACE_END(detail) + +// pytype -> C++ type +template ::value, int> = 0> +T cast(const handle &handle) { + using namespace detail; + static_assert(!cast_is_temporary_value_reference::value, + "Unable to cast type to reference: value is local to type caster"); + return cast_op(load_type(handle)); +} + +// pytype -> pytype (calls converting constructor) +template ::value, int> = 0> +T cast(const handle &handle) { return T(reinterpret_borrow(handle)); } + +// C++ type -> py::object +template ::value, int> = 0> +object cast(const T &value, return_value_policy policy = return_value_policy::automatic_reference, + handle parent = handle()) { + if (policy == return_value_policy::automatic) + policy = std::is_pointer::value ? return_value_policy::take_ownership : return_value_policy::copy; + else if (policy == return_value_policy::automatic_reference) + policy = std::is_pointer::value ? return_value_policy::reference : return_value_policy::copy; + return reinterpret_steal(detail::make_caster::cast(value, policy, parent)); +} + +template T handle::cast() const { return pybind11::cast(*this); } +template <> inline void handle::cast() const { return; } + +template +detail::enable_if_t::value, T> move(object &&obj) { + if (obj.ref_count() > 1) +#if defined(NDEBUG) + throw cast_error("Unable to cast Python instance to C++ rvalue: instance has multiple references" + " (compile in debug mode for details)"); +#else + throw cast_error("Unable to move from Python " + (std::string) str(obj.get_type()) + + " instance to C++ " + type_id() + " instance: instance has multiple references"); +#endif + + // Move into a temporary and return that, because the reference may be a local value of `conv` + T ret = std::move(detail::load_type(obj).operator T&()); + return ret; +} + +// Calling cast() on an rvalue calls pybind::cast with the object rvalue, which does: +// - If we have to move (because T has no copy constructor), do it. This will fail if the moved +// object has multiple references, but trying to copy will fail to compile. +// - If both movable and copyable, check ref count: if 1, move; otherwise copy +// - Otherwise (not movable), copy. +template detail::enable_if_t::value, T> cast(object &&object) { + return move(std::move(object)); +} +template detail::enable_if_t::value, T> cast(object &&object) { + if (object.ref_count() > 1) + return cast(object); + else + return move(std::move(object)); +} +template detail::enable_if_t::value, T> cast(object &&object) { + return cast(object); +} + +template T object::cast() const & { return pybind11::cast(*this); } +template T object::cast() && { return pybind11::cast(std::move(*this)); } +template <> inline void object::cast() const & { return; } +template <> inline void object::cast() && { return; } + +NAMESPACE_BEGIN(detail) + +// Declared in pytypes.h: +template ::value, int>> +object object_or_cast(T &&o) { return pybind11::cast(std::forward(o)); } + +struct overload_unused {}; // Placeholder type for the unneeded (and dead code) static variable in the OVERLOAD_INT macro +template using overload_caster_t = conditional_t< + cast_is_temporary_value_reference::value, make_caster, overload_unused>; + +// Trampoline use: for reference/pointer types to value-converted values, we do a value cast, then +// store the result in the given variable. For other types, this is a no-op. +template enable_if_t::value, T> cast_ref(object &&o, make_caster &caster) { + return cast_op(load_type(caster, o)); +} +template enable_if_t::value, T> cast_ref(object &&, overload_unused &) { + pybind11_fail("Internal error: cast_ref fallback invoked"); } + +// Trampoline use: Having a pybind11::cast with an invalid reference type is going to static_assert, even +// though if it's in dead code, so we provide a "trampoline" to pybind11::cast that only does anything in +// cases where pybind11::cast is valid. +template enable_if_t::value, T> cast_safe(object &&o) { + return pybind11::cast(std::move(o)); } +template enable_if_t::value, T> cast_safe(object &&) { + pybind11_fail("Internal error: cast_safe fallback invoked"); } +template <> inline void cast_safe(object &&) {} + +NAMESPACE_END(detail) + +template +tuple make_tuple() { return tuple(0); } + +template tuple make_tuple(Args&&... args_) { + constexpr size_t size = sizeof...(Args); + std::array args { + { reinterpret_steal(detail::make_caster::cast( + std::forward(args_), policy, nullptr))... } + }; + for (size_t i = 0; i < args.size(); i++) { + if (!args[i]) { +#if defined(NDEBUG) + throw cast_error("make_tuple(): unable to convert arguments to Python object (compile in debug mode for details)"); +#else + std::array argtypes { {type_id()...} }; + throw cast_error("make_tuple(): unable to convert argument of type '" + + argtypes[i] + "' to Python object"); +#endif + } + } + tuple result(size); + int counter = 0; + for (auto &arg_value : args) + PyTuple_SET_ITEM(result.ptr(), counter++, arg_value.release().ptr()); + return result; +} + +/// \ingroup annotations +/// Annotation for arguments +struct arg { + /// Constructs an argument with the name of the argument; if null or omitted, this is a positional argument. + constexpr explicit arg(const char *name = nullptr) : name(name), flag_noconvert(false), flag_none(true) { } + /// Assign a value to this argument + template arg_v operator=(T &&value) const; + /// Indicate that the type should not be converted in the type caster + arg &noconvert(bool flag = true) { flag_noconvert = flag; return *this; } + /// Indicates that the argument should/shouldn't allow None (e.g. for nullable pointer args) + arg &none(bool flag = true) { flag_none = flag; return *this; } + + const char *name; ///< If non-null, this is a named kwargs argument + bool flag_noconvert : 1; ///< If set, do not allow conversion (requires a supporting type caster!) + bool flag_none : 1; ///< If set (the default), allow None to be passed to this argument +}; + +/// \ingroup annotations +/// Annotation for arguments with values +struct arg_v : arg { +private: + template + arg_v(arg &&base, T &&x, const char *descr = nullptr) + : arg(base), + value(reinterpret_steal( + detail::make_caster::cast(x, return_value_policy::automatic, {}) + )), + descr(descr) +#if !defined(NDEBUG) + , type(type_id()) +#endif + { } + +public: + /// Direct construction with name, default, and description + template + arg_v(const char *name, T &&x, const char *descr = nullptr) + : arg_v(arg(name), std::forward(x), descr) { } + + /// Called internally when invoking `py::arg("a") = value` + template + arg_v(const arg &base, T &&x, const char *descr = nullptr) + : arg_v(arg(base), std::forward(x), descr) { } + + /// Same as `arg::noconvert()`, but returns *this as arg_v&, not arg& + arg_v &noconvert(bool flag = true) { arg::noconvert(flag); return *this; } + + /// Same as `arg::nonone()`, but returns *this as arg_v&, not arg& + arg_v &none(bool flag = true) { arg::none(flag); return *this; } + + /// The default value + object value; + /// The (optional) description of the default value + const char *descr; +#if !defined(NDEBUG) + /// The C++ type name of the default value (only available when compiled in debug mode) + std::string type; +#endif +}; + +template +arg_v arg::operator=(T &&value) const { return {std::move(*this), std::forward(value)}; } + +/// Alias for backward compatibility -- to be removed in version 2.0 +template using arg_t = arg_v; + +inline namespace literals { +/** \rst + String literal version of `arg` + \endrst */ +constexpr arg operator"" _a(const char *name, size_t) { return arg(name); } +} + +NAMESPACE_BEGIN(detail) + +// forward declaration (definition in attr.h) +struct function_record; + +/// Internal data associated with a single function call +struct function_call { + function_call(const function_record &f, handle p); // Implementation in attr.h + + /// The function data: + const function_record &func; + + /// Arguments passed to the function: + std::vector args; + + /// The `convert` value the arguments should be loaded with + std::vector args_convert; + + /// Extra references for the optional `py::args` and/or `py::kwargs` arguments (which, if + /// present, are also in `args` but without a reference). + object args_ref, kwargs_ref; + + /// The parent, if any + handle parent; + + /// If this is a call to an initializer, this argument contains `self` + handle init_self; +}; + + +/// Helper class which loads arguments for C++ functions called from Python +template +class argument_loader { + using indices = make_index_sequence; + + template using argument_is_args = std::is_same, args>; + template using argument_is_kwargs = std::is_same, kwargs>; + // Get args/kwargs argument positions relative to the end of the argument list: + static constexpr auto args_pos = constexpr_first() - (int) sizeof...(Args), + kwargs_pos = constexpr_first() - (int) sizeof...(Args); + + static constexpr bool args_kwargs_are_last = kwargs_pos >= - 1 && args_pos >= kwargs_pos - 1; + + static_assert(args_kwargs_are_last, "py::args/py::kwargs are only permitted as the last argument(s) of a function"); + +public: + static constexpr bool has_kwargs = kwargs_pos < 0; + static constexpr bool has_args = args_pos < 0; + + static constexpr auto arg_names = concat(type_descr(make_caster::name)...); + + bool load_args(function_call &call) { + return load_impl_sequence(call, indices{}); + } + + template + enable_if_t::value, Return> call(Func &&f) && { + return std::move(*this).template call_impl(std::forward(f), indices{}, Guard{}); + } + + template + enable_if_t::value, void_type> call(Func &&f) && { + std::move(*this).template call_impl(std::forward(f), indices{}, Guard{}); + return void_type(); + } + +private: + + static bool load_impl_sequence(function_call &, index_sequence<>) { return true; } + + template + bool load_impl_sequence(function_call &call, index_sequence) { +#ifdef __cpp_fold_expressions + if ((... || !std::get(argcasters).load(call.args[Is], call.args_convert[Is]))) + return false; +#else + for (bool r : {std::get(argcasters).load(call.args[Is], call.args_convert[Is])...}) + if (!r) + return false; +#endif + return true; + } + + template + Return call_impl(Func &&f, index_sequence, Guard &&) && { + return std::forward(f)(cast_op(std::move(std::get(argcasters)))...); + } + + std::tuple...> argcasters; +}; + +/// Helper class which collects only positional arguments for a Python function call. +/// A fancier version below can collect any argument, but this one is optimal for simple calls. +template +class simple_collector { +public: + template + explicit simple_collector(Ts &&...values) + : m_args(pybind11::make_tuple(std::forward(values)...)) { } + + const tuple &args() const & { return m_args; } + dict kwargs() const { return {}; } + + tuple args() && { return std::move(m_args); } + + /// Call a Python function and pass the collected arguments + object call(PyObject *ptr) const { + PyObject *result = PyObject_CallObject(ptr, m_args.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); + } + +private: + tuple m_args; +}; + +/// Helper class which collects positional, keyword, * and ** arguments for a Python function call +template +class unpacking_collector { +public: + template + explicit unpacking_collector(Ts &&...values) { + // Tuples aren't (easily) resizable so a list is needed for collection, + // but the actual function call strictly requires a tuple. + auto args_list = list(); + int _[] = { 0, (process(args_list, std::forward(values)), 0)... }; + ignore_unused(_); + + m_args = std::move(args_list); + } + + const tuple &args() const & { return m_args; } + const dict &kwargs() const & { return m_kwargs; } + + tuple args() && { return std::move(m_args); } + dict kwargs() && { return std::move(m_kwargs); } + + /// Call a Python function and pass the collected arguments + object call(PyObject *ptr) const { + PyObject *result = PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); + } + +private: + template + void process(list &args_list, T &&x) { + auto o = reinterpret_steal(detail::make_caster::cast(std::forward(x), policy, {})); + if (!o) { +#if defined(NDEBUG) + argument_cast_error(); +#else + argument_cast_error(std::to_string(args_list.size()), type_id()); +#endif + } + args_list.append(o); + } + + void process(list &args_list, detail::args_proxy ap) { + for (const auto &a : ap) + args_list.append(a); + } + + void process(list &/*args_list*/, arg_v a) { + if (!a.name) +#if defined(NDEBUG) + nameless_argument_error(); +#else + nameless_argument_error(a.type); +#endif + + if (m_kwargs.contains(a.name)) { +#if defined(NDEBUG) + multiple_values_error(); +#else + multiple_values_error(a.name); +#endif + } + if (!a.value) { +#if defined(NDEBUG) + argument_cast_error(); +#else + argument_cast_error(a.name, a.type); +#endif + } + m_kwargs[a.name] = a.value; + } + + void process(list &/*args_list*/, detail::kwargs_proxy kp) { + if (!kp) + return; + for (const auto &k : reinterpret_borrow(kp)) { + if (m_kwargs.contains(k.first)) { +#if defined(NDEBUG) + multiple_values_error(); +#else + multiple_values_error(str(k.first)); +#endif + } + m_kwargs[k.first] = k.second; + } + } + + [[noreturn]] static void nameless_argument_error() { + throw type_error("Got kwargs without a name; only named arguments " + "may be passed via py::arg() to a python function call. " + "(compile in debug mode for details)"); + } + [[noreturn]] static void nameless_argument_error(std::string type) { + throw type_error("Got kwargs without a name of type '" + type + "'; only named " + "arguments may be passed via py::arg() to a python function call. "); + } + [[noreturn]] static void multiple_values_error() { + throw type_error("Got multiple values for keyword argument " + "(compile in debug mode for details)"); + } + + [[noreturn]] static void multiple_values_error(std::string name) { + throw type_error("Got multiple values for keyword argument '" + name + "'"); + } + + [[noreturn]] static void argument_cast_error() { + throw cast_error("Unable to convert call argument to Python object " + "(compile in debug mode for details)"); + } + + [[noreturn]] static void argument_cast_error(std::string name, std::string type) { + throw cast_error("Unable to convert call argument '" + name + + "' of type '" + type + "' to Python object"); + } + +private: + tuple m_args; + dict m_kwargs; +}; + +/// Collect only positional arguments for a Python function call +template ...>::value>> +simple_collector collect_arguments(Args &&...args) { + return simple_collector(std::forward(args)...); +} + +/// Collect all arguments, including keywords and unpacking (only instantiated when needed) +template ...>::value>> +unpacking_collector collect_arguments(Args &&...args) { + // Following argument order rules for generalized unpacking according to PEP 448 + static_assert( + constexpr_last() < constexpr_first() + && constexpr_last() < constexpr_first(), + "Invalid function call: positional args must precede keywords and ** unpacking; " + "* unpacking must precede ** unpacking" + ); + return unpacking_collector(std::forward(args)...); +} + +template +template +object object_api::operator()(Args &&...args) const { + return detail::collect_arguments(std::forward(args)...).call(derived().ptr()); +} + +template +template +object object_api::call(Args &&...args) const { + return operator()(std::forward(args)...); +} + +NAMESPACE_END(detail) + +#define PYBIND11_MAKE_OPAQUE(...) \ + namespace pybind11 { namespace detail { \ + template<> class type_caster<__VA_ARGS__> : public type_caster_base<__VA_ARGS__> { }; \ + }} + +/// Lets you pass a type containing a `,` through a macro parameter without needing a separate +/// typedef, e.g.: `PYBIND11_OVERLOAD(PYBIND11_TYPE(ReturnType), PYBIND11_TYPE(Parent), f, arg)` +#define PYBIND11_TYPE(...) __VA_ARGS__ + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/chrono.h b/thirdparty/pybind11/include/pybind11/chrono.h new file mode 100644 index 000000000..ea777e696 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/chrono.h @@ -0,0 +1,184 @@ +/* + pybind11/chrono.h: Transparent conversion between std::chrono and python's datetime + + Copyright (c) 2016 Trent Houliston and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include +#include +#include +#include + +// Backport the PyDateTime_DELTA functions from Python3.3 if required +#ifndef PyDateTime_DELTA_GET_DAYS +#define PyDateTime_DELTA_GET_DAYS(o) (((PyDateTime_Delta*)o)->days) +#endif +#ifndef PyDateTime_DELTA_GET_SECONDS +#define PyDateTime_DELTA_GET_SECONDS(o) (((PyDateTime_Delta*)o)->seconds) +#endif +#ifndef PyDateTime_DELTA_GET_MICROSECONDS +#define PyDateTime_DELTA_GET_MICROSECONDS(o) (((PyDateTime_Delta*)o)->microseconds) +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +template class duration_caster { +public: + typedef typename type::rep rep; + typedef typename type::period period; + + typedef std::chrono::duration> days; + + bool load(handle src, bool) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + if (!src) return false; + // If invoked with datetime.delta object + if (PyDelta_Check(src.ptr())) { + value = type(duration_cast>( + days(PyDateTime_DELTA_GET_DAYS(src.ptr())) + + seconds(PyDateTime_DELTA_GET_SECONDS(src.ptr())) + + microseconds(PyDateTime_DELTA_GET_MICROSECONDS(src.ptr())))); + return true; + } + // If invoked with a float we assume it is seconds and convert + else if (PyFloat_Check(src.ptr())) { + value = type(duration_cast>(duration(PyFloat_AsDouble(src.ptr())))); + return true; + } + else return false; + } + + // If this is a duration just return it back + static const std::chrono::duration& get_duration(const std::chrono::duration &src) { + return src; + } + + // If this is a time_point get the time_since_epoch + template static std::chrono::duration get_duration(const std::chrono::time_point> &src) { + return src.time_since_epoch(); + } + + static handle cast(const type &src, return_value_policy /* policy */, handle /* parent */) { + using namespace std::chrono; + + // Use overloaded function to get our duration from our source + // Works out if it is a duration or time_point and get the duration + auto d = get_duration(src); + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + // Declare these special duration types so the conversions happen with the correct primitive types (int) + using dd_t = duration>; + using ss_t = duration>; + using us_t = duration; + + auto dd = duration_cast(d); + auto subd = d - dd; + auto ss = duration_cast(subd); + auto us = duration_cast(subd - ss); + return PyDelta_FromDSU(dd.count(), ss.count(), us.count()); + } + + PYBIND11_TYPE_CASTER(type, _("datetime.timedelta")); +}; + +// This is for casting times on the system clock into datetime.datetime instances +template class type_caster> { +public: + typedef std::chrono::time_point type; + bool load(handle src, bool) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + if (!src) return false; + + std::tm cal; + microseconds msecs; + + if (PyDateTime_Check(src.ptr())) { + cal.tm_sec = PyDateTime_DATE_GET_SECOND(src.ptr()); + cal.tm_min = PyDateTime_DATE_GET_MINUTE(src.ptr()); + cal.tm_hour = PyDateTime_DATE_GET_HOUR(src.ptr()); + cal.tm_mday = PyDateTime_GET_DAY(src.ptr()); + cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1; + cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900; + cal.tm_isdst = -1; + msecs = microseconds(PyDateTime_DATE_GET_MICROSECOND(src.ptr())); + } else if (PyDate_Check(src.ptr())) { + cal.tm_sec = 0; + cal.tm_min = 0; + cal.tm_hour = 0; + cal.tm_mday = PyDateTime_GET_DAY(src.ptr()); + cal.tm_mon = PyDateTime_GET_MONTH(src.ptr()) - 1; + cal.tm_year = PyDateTime_GET_YEAR(src.ptr()) - 1900; + cal.tm_isdst = -1; + msecs = microseconds(0); + } else if (PyTime_Check(src.ptr())) { + cal.tm_sec = PyDateTime_TIME_GET_SECOND(src.ptr()); + cal.tm_min = PyDateTime_TIME_GET_MINUTE(src.ptr()); + cal.tm_hour = PyDateTime_TIME_GET_HOUR(src.ptr()); + cal.tm_mday = 1; // This date (day, month, year) = (1, 0, 70) + cal.tm_mon = 0; // represents 1-Jan-1970, which is the first + cal.tm_year = 70; // earliest available date for Python's datetime + cal.tm_isdst = -1; + msecs = microseconds(PyDateTime_TIME_GET_MICROSECOND(src.ptr())); + } + else return false; + + value = system_clock::from_time_t(std::mktime(&cal)) + msecs; + return true; + } + + static handle cast(const std::chrono::time_point &src, return_value_policy /* policy */, handle /* parent */) { + using namespace std::chrono; + + // Lazy initialise the PyDateTime import + if (!PyDateTimeAPI) { PyDateTime_IMPORT; } + + std::time_t tt = system_clock::to_time_t(time_point_cast(src)); + // this function uses static memory so it's best to copy it out asap just in case + // otherwise other code that is using localtime may break this (not just python code) + std::tm localtime = *std::localtime(&tt); + + // Declare these special duration types so the conversions happen with the correct primitive types (int) + using us_t = duration; + + return PyDateTime_FromDateAndTime(localtime.tm_year + 1900, + localtime.tm_mon + 1, + localtime.tm_mday, + localtime.tm_hour, + localtime.tm_min, + localtime.tm_sec, + (duration_cast(src.time_since_epoch() % seconds(1))).count()); + } + PYBIND11_TYPE_CASTER(type, _("datetime.datetime")); +}; + +// Other clocks that are not the system clock are not measured as datetime.datetime objects +// since they are not measured on calendar time. So instead we just make them timedeltas +// Or if they have passed us a time as a float we convert that +template class type_caster> +: public duration_caster> { +}; + +template class type_caster> +: public duration_caster> { +}; + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/common.h b/thirdparty/pybind11/include/pybind11/common.h new file mode 100644 index 000000000..6c8a4f1e8 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/common.h @@ -0,0 +1,2 @@ +#include "detail/common.h" +#warning "Including 'common.h' is deprecated. It will be removed in v3.0. Use 'pybind11.h'." diff --git a/thirdparty/pybind11/include/pybind11/complex.h b/thirdparty/pybind11/include/pybind11/complex.h new file mode 100644 index 000000000..3f8963857 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/complex.h @@ -0,0 +1,65 @@ +/* + pybind11/complex.h: Complex number support + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include + +/// glibc defines I as a macro which breaks things, e.g., boost template names +#ifdef I +# undef I +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +template struct format_descriptor, detail::enable_if_t::value>> { + static constexpr const char c = format_descriptor::c; + static constexpr const char value[3] = { 'Z', c, '\0' }; + static std::string format() { return std::string(value); } +}; + +#ifndef PYBIND11_CPP17 + +template constexpr const char format_descriptor< + std::complex, detail::enable_if_t::value>>::value[3]; + +#endif + +NAMESPACE_BEGIN(detail) + +template struct is_fmt_numeric, detail::enable_if_t::value>> { + static constexpr bool value = true; + static constexpr int index = is_fmt_numeric::index + 3; +}; + +template class type_caster> { +public: + bool load(handle src, bool convert) { + if (!src) + return false; + if (!convert && !PyComplex_Check(src.ptr())) + return false; + Py_complex result = PyComplex_AsCComplex(src.ptr()); + if (result.real == -1.0 && PyErr_Occurred()) { + PyErr_Clear(); + return false; + } + value = std::complex((T) result.real, (T) result.imag); + return true; + } + + static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) { + return PyComplex_FromDoubles((double) src.real(), (double) src.imag()); + } + + PYBIND11_TYPE_CASTER(std::complex, _("complex")); +}; +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/detail/class.h b/thirdparty/pybind11/include/pybind11/detail/class.h new file mode 100644 index 000000000..edfa7de68 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/class.h @@ -0,0 +1,639 @@ +/* + pybind11/detail/class.h: Python C API implementation details for py::class_ + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "../attr.h" +#include "../options.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +#if PY_VERSION_HEX >= 0x03030000 +# define PYBIND11_BUILTIN_QUALNAME +# define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) +#else +// In pre-3.3 Python, we still set __qualname__ so that we can produce reliable function type +// signatures; in 3.3+ this macro expands to nothing: +# define PYBIND11_SET_OLDPY_QUALNAME(obj, nameobj) setattr((PyObject *) obj, "__qualname__", nameobj) +#endif + +inline PyTypeObject *type_incref(PyTypeObject *type) { + Py_INCREF(type); + return type; +} + +#if !defined(PYPY_VERSION) + +/// `pybind11_static_property.__get__()`: Always pass the class instead of the instance. +extern "C" inline PyObject *pybind11_static_get(PyObject *self, PyObject * /*ob*/, PyObject *cls) { + return PyProperty_Type.tp_descr_get(self, cls, cls); +} + +/// `pybind11_static_property.__set__()`: Just like the above `__get__()`. +extern "C" inline int pybind11_static_set(PyObject *self, PyObject *obj, PyObject *value) { + PyObject *cls = PyType_Check(obj) ? obj : (PyObject *) Py_TYPE(obj); + return PyProperty_Type.tp_descr_set(self, cls, value); +} + +/** A `static_property` is the same as a `property` but the `__get__()` and `__set__()` + methods are modified to always use the object type instead of a concrete instance. + Return value: New reference. */ +inline PyTypeObject *make_static_property_type() { + constexpr auto *name = "pybind11_static_property"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0); + if (!heap_type) + pybind11_fail("make_static_property_type(): error allocating type!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyProperty_Type); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + type->tp_descr_get = pybind11_static_get; + type->tp_descr_set = pybind11_static_set; + + if (PyType_Ready(type) < 0) + pybind11_fail("make_static_property_type(): failure in PyType_Ready()!"); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + return type; +} + +#else // PYPY + +/** PyPy has some issues with the above C API, so we evaluate Python code instead. + This function will only be called once so performance isn't really a concern. + Return value: New reference. */ +inline PyTypeObject *make_static_property_type() { + auto d = dict(); + PyObject *result = PyRun_String(R"(\ + class pybind11_static_property(property): + def __get__(self, obj, cls): + return property.__get__(self, cls, cls) + + def __set__(self, obj, value): + cls = obj if isinstance(obj, type) else type(obj) + property.__set__(self, cls, value) + )", Py_file_input, d.ptr(), d.ptr() + ); + if (result == nullptr) + throw error_already_set(); + Py_DECREF(result); + return (PyTypeObject *) d["pybind11_static_property"].cast().release().ptr(); +} + +#endif // PYPY + +/** Types with static properties need to handle `Type.static_prop = x` in a specific way. + By default, Python replaces the `static_property` itself, but for wrapped C++ types + we need to call `static_property.__set__()` in order to propagate the new value to + the underlying C++ data structure. */ +extern "C" inline int pybind11_meta_setattro(PyObject* obj, PyObject* name, PyObject* value) { + // Use `_PyType_Lookup()` instead of `PyObject_GetAttr()` in order to get the raw + // descriptor (`property`) instead of calling `tp_descr_get` (`property.__get__()`). + PyObject *descr = _PyType_Lookup((PyTypeObject *) obj, name); + + // The following assignment combinations are possible: + // 1. `Type.static_prop = value` --> descr_set: `Type.static_prop.__set__(value)` + // 2. `Type.static_prop = other_static_prop` --> setattro: replace existing `static_prop` + // 3. `Type.regular_attribute = value` --> setattro: regular attribute assignment + const auto static_prop = (PyObject *) get_internals().static_property_type; + const auto call_descr_set = descr && PyObject_IsInstance(descr, static_prop) + && !PyObject_IsInstance(value, static_prop); + if (call_descr_set) { + // Call `static_property.__set__()` instead of replacing the `static_property`. +#if !defined(PYPY_VERSION) + return Py_TYPE(descr)->tp_descr_set(descr, obj, value); +#else + if (PyObject *result = PyObject_CallMethod(descr, "__set__", "OO", obj, value)) { + Py_DECREF(result); + return 0; + } else { + return -1; + } +#endif + } else { + // Replace existing attribute. + return PyType_Type.tp_setattro(obj, name, value); + } +} + +#if PY_MAJOR_VERSION >= 3 +/** + * Python 3's PyInstanceMethod_Type hides itself via its tp_descr_get, which prevents aliasing + * methods via cls.attr("m2") = cls.attr("m1"): instead the tp_descr_get returns a plain function, + * when called on a class, or a PyMethod, when called on an instance. Override that behaviour here + * to do a special case bypass for PyInstanceMethod_Types. + */ +extern "C" inline PyObject *pybind11_meta_getattro(PyObject *obj, PyObject *name) { + PyObject *descr = _PyType_Lookup((PyTypeObject *) obj, name); + if (descr && PyInstanceMethod_Check(descr)) { + Py_INCREF(descr); + return descr; + } + else { + return PyType_Type.tp_getattro(obj, name); + } +} +#endif + +/** This metaclass is assigned by default to all pybind11 types and is required in order + for static properties to function correctly. Users may override this using `py::metaclass`. + Return value: New reference. */ +inline PyTypeObject* make_default_metaclass() { + constexpr auto *name = "pybind11_type"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) PyType_Type.tp_alloc(&PyType_Type, 0); + if (!heap_type) + pybind11_fail("make_default_metaclass(): error allocating metaclass!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyType_Type); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + + type->tp_setattro = pybind11_meta_setattro; +#if PY_MAJOR_VERSION >= 3 + type->tp_getattro = pybind11_meta_getattro; +#endif + + if (PyType_Ready(type) < 0) + pybind11_fail("make_default_metaclass(): failure in PyType_Ready()!"); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + return type; +} + +/// For multiple inheritance types we need to recursively register/deregister base pointers for any +/// base classes with pointers that are difference from the instance value pointer so that we can +/// correctly recognize an offset base class pointer. This calls a function with any offset base ptrs. +inline void traverse_offset_bases(void *valueptr, const detail::type_info *tinfo, instance *self, + bool (*f)(void * /*parentptr*/, instance * /*self*/)) { + for (handle h : reinterpret_borrow(tinfo->type->tp_bases)) { + if (auto parent_tinfo = get_type_info((PyTypeObject *) h.ptr())) { + for (auto &c : parent_tinfo->implicit_casts) { + if (c.first == tinfo->cpptype) { + auto *parentptr = c.second(valueptr); + if (parentptr != valueptr) + f(parentptr, self); + traverse_offset_bases(parentptr, parent_tinfo, self, f); + break; + } + } + } + } +} + +inline bool register_instance_impl(void *ptr, instance *self) { + get_internals().registered_instances.emplace(ptr, self); + return true; // unused, but gives the same signature as the deregister func +} +inline bool deregister_instance_impl(void *ptr, instance *self) { + auto ®istered_instances = get_internals().registered_instances; + auto range = registered_instances.equal_range(ptr); + for (auto it = range.first; it != range.second; ++it) { + if (Py_TYPE(self) == Py_TYPE(it->second)) { + registered_instances.erase(it); + return true; + } + } + return false; +} + +inline void register_instance(instance *self, void *valptr, const type_info *tinfo) { + register_instance_impl(valptr, self); + if (!tinfo->simple_ancestors) + traverse_offset_bases(valptr, tinfo, self, register_instance_impl); +} + +inline bool deregister_instance(instance *self, void *valptr, const type_info *tinfo) { + bool ret = deregister_instance_impl(valptr, self); + if (!tinfo->simple_ancestors) + traverse_offset_bases(valptr, tinfo, self, deregister_instance_impl); + return ret; +} + +/// Instance creation function for all pybind11 types. It allocates the internal instance layout for +/// holding C++ objects and holders. Allocation is done lazily (the first time the instance is cast +/// to a reference or pointer), and initialization is done by an `__init__` function. +inline PyObject *make_new_instance(PyTypeObject *type) { +#if defined(PYPY_VERSION) + // PyPy gets tp_basicsize wrong (issue 2482) under multiple inheritance when the first inherited + // object is a a plain Python type (i.e. not derived from an extension type). Fix it. + ssize_t instance_size = static_cast(sizeof(instance)); + if (type->tp_basicsize < instance_size) { + type->tp_basicsize = instance_size; + } +#endif + PyObject *self = type->tp_alloc(type, 0); + auto inst = reinterpret_cast(self); + // Allocate the value/holder internals: + inst->allocate_layout(); + + inst->owned = true; + + return self; +} + +/// Instance creation function for all pybind11 types. It only allocates space for the +/// C++ object, but doesn't call the constructor -- an `__init__` function must do that. +extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) { + return make_new_instance(type); +} + +/// An `__init__` function constructs the C++ object. Users should provide at least one +/// of these using `py::init` or directly with `.def(__init__, ...)`. Otherwise, the +/// following default function will be used which simply throws an exception. +extern "C" inline int pybind11_object_init(PyObject *self, PyObject *, PyObject *) { + PyTypeObject *type = Py_TYPE(self); + std::string msg; +#if defined(PYPY_VERSION) + msg += handle((PyObject *) type).attr("__module__").cast() + "."; +#endif + msg += type->tp_name; + msg += ": No constructor defined!"; + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return -1; +} + +inline void add_patient(PyObject *nurse, PyObject *patient) { + auto &internals = get_internals(); + auto instance = reinterpret_cast(nurse); + instance->has_patients = true; + Py_INCREF(patient); + internals.patients[nurse].push_back(patient); +} + +inline void clear_patients(PyObject *self) { + auto instance = reinterpret_cast(self); + auto &internals = get_internals(); + auto pos = internals.patients.find(self); + assert(pos != internals.patients.end()); + // Clearing the patients can cause more Python code to run, which + // can invalidate the iterator. Extract the vector of patients + // from the unordered_map first. + auto patients = std::move(pos->second); + internals.patients.erase(pos); + instance->has_patients = false; + for (PyObject *&patient : patients) + Py_CLEAR(patient); +} + +/// Clears all internal data from the instance and removes it from registered instances in +/// preparation for deallocation. +inline void clear_instance(PyObject *self) { + auto instance = reinterpret_cast(self); + + // Deallocate any values/holders, if present: + for (auto &v_h : values_and_holders(instance)) { + if (v_h) { + + // We have to deregister before we call dealloc because, for virtual MI types, we still + // need to be able to get the parent pointers. + if (v_h.instance_registered() && !deregister_instance(instance, v_h.value_ptr(), v_h.type)) + pybind11_fail("pybind11_object_dealloc(): Tried to deallocate unregistered instance!"); + + if (instance->owned || v_h.holder_constructed()) + v_h.type->dealloc(v_h); + } + } + // Deallocate the value/holder layout internals: + instance->deallocate_layout(); + + if (instance->weakrefs) + PyObject_ClearWeakRefs(self); + + PyObject **dict_ptr = _PyObject_GetDictPtr(self); + if (dict_ptr) + Py_CLEAR(*dict_ptr); + + if (instance->has_patients) + clear_patients(self); +} + +/// Instance destructor function for all pybind11 types. It calls `type_info.dealloc` +/// to destroy the C++ object itself, while the rest is Python bookkeeping. +extern "C" inline void pybind11_object_dealloc(PyObject *self) { + clear_instance(self); + + auto type = Py_TYPE(self); + type->tp_free(self); + +#if PY_VERSION_HEX < 0x03080000 + // `type->tp_dealloc != pybind11_object_dealloc` means that we're being called + // as part of a derived type's dealloc, in which case we're not allowed to decref + // the type here. For cross-module compatibility, we shouldn't compare directly + // with `pybind11_object_dealloc`, but with the common one stashed in internals. + auto pybind11_object_type = (PyTypeObject *) get_internals().instance_base; + if (type->tp_dealloc == pybind11_object_type->tp_dealloc) + Py_DECREF(type); +#else + // This was not needed before Python 3.8 (Python issue 35810) + // https://github.com/pybind/pybind11/issues/1946 + Py_DECREF(type); +#endif +} + +/** Create the type which can be used as a common base for all classes. This is + needed in order to satisfy Python's requirements for multiple inheritance. + Return value: New reference. */ +inline PyObject *make_object_base_type(PyTypeObject *metaclass) { + constexpr auto *name = "pybind11_object"; + auto name_obj = reinterpret_steal(PYBIND11_FROM_STRING(name)); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0); + if (!heap_type) + pybind11_fail("make_object_base_type(): error allocating type!"); + + heap_type->ht_name = name_obj.inc_ref().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = name_obj.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = name; + type->tp_base = type_incref(&PyBaseObject_Type); + type->tp_basicsize = static_cast(sizeof(instance)); + type->tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; + + type->tp_new = pybind11_object_new; + type->tp_init = pybind11_object_init; + type->tp_dealloc = pybind11_object_dealloc; + + /* Support weak references (needed for the keep_alive feature) */ + type->tp_weaklistoffset = offsetof(instance, weakrefs); + + if (PyType_Ready(type) < 0) + pybind11_fail("PyType_Ready failed in make_object_base_type():" + error_string()); + + setattr((PyObject *) type, "__module__", str("pybind11_builtins")); + PYBIND11_SET_OLDPY_QUALNAME(type, name_obj); + + assert(!PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC)); + return (PyObject *) heap_type; +} + +/// dynamic_attr: Support for `d = instance.__dict__`. +extern "C" inline PyObject *pybind11_get_dict(PyObject *self, void *) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + if (!dict) + dict = PyDict_New(); + Py_XINCREF(dict); + return dict; +} + +/// dynamic_attr: Support for `instance.__dict__ = dict()`. +extern "C" inline int pybind11_set_dict(PyObject *self, PyObject *new_dict, void *) { + if (!PyDict_Check(new_dict)) { + PyErr_Format(PyExc_TypeError, "__dict__ must be set to a dictionary, not a '%.200s'", + Py_TYPE(new_dict)->tp_name); + return -1; + } + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_INCREF(new_dict); + Py_CLEAR(dict); + dict = new_dict; + return 0; +} + +/// dynamic_attr: Allow the garbage collector to traverse the internal instance `__dict__`. +extern "C" inline int pybind11_traverse(PyObject *self, visitproc visit, void *arg) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_VISIT(dict); + return 0; +} + +/// dynamic_attr: Allow the GC to clear the dictionary. +extern "C" inline int pybind11_clear(PyObject *self) { + PyObject *&dict = *_PyObject_GetDictPtr(self); + Py_CLEAR(dict); + return 0; +} + +/// Give instances of this type a `__dict__` and opt into garbage collection. +inline void enable_dynamic_attributes(PyHeapTypeObject *heap_type) { + auto type = &heap_type->ht_type; +#if defined(PYPY_VERSION) + pybind11_fail(std::string(type->tp_name) + ": dynamic attributes are " + "currently not supported in " + "conjunction with PyPy!"); +#endif + type->tp_flags |= Py_TPFLAGS_HAVE_GC; + type->tp_dictoffset = type->tp_basicsize; // place dict at the end + type->tp_basicsize += (ssize_t)sizeof(PyObject *); // and allocate enough space for it + type->tp_traverse = pybind11_traverse; + type->tp_clear = pybind11_clear; + + static PyGetSetDef getset[] = { + {const_cast("__dict__"), pybind11_get_dict, pybind11_set_dict, nullptr, nullptr}, + {nullptr, nullptr, nullptr, nullptr, nullptr} + }; + type->tp_getset = getset; +} + +/// buffer_protocol: Fill in the view as specified by flags. +extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int flags) { + // Look for a `get_buffer` implementation in this type's info or any bases (following MRO). + type_info *tinfo = nullptr; + for (auto type : reinterpret_borrow(Py_TYPE(obj)->tp_mro)) { + tinfo = get_type_info((PyTypeObject *) type.ptr()); + if (tinfo && tinfo->get_buffer) + break; + } + if (view == nullptr || !tinfo || !tinfo->get_buffer) { + if (view) + view->obj = nullptr; + PyErr_SetString(PyExc_BufferError, "pybind11_getbuffer(): Internal error"); + return -1; + } + std::memset(view, 0, sizeof(Py_buffer)); + buffer_info *info = tinfo->get_buffer(obj, tinfo->get_buffer_data); + view->obj = obj; + view->ndim = 1; + view->internal = info; + view->buf = info->ptr; + view->itemsize = info->itemsize; + view->len = view->itemsize; + for (auto s : info->shape) + view->len *= s; + view->readonly = info->readonly; + if ((flags & PyBUF_WRITABLE) == PyBUF_WRITABLE && info->readonly) { + if (view) + view->obj = nullptr; + PyErr_SetString(PyExc_BufferError, "Writable buffer requested for readonly storage"); + return -1; + } + if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) + view->format = const_cast(info->format.c_str()); + if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) { + view->ndim = (int) info->ndim; + view->strides = &info->strides[0]; + view->shape = &info->shape[0]; + } + Py_INCREF(view->obj); + return 0; +} + +/// buffer_protocol: Release the resources of the buffer. +extern "C" inline void pybind11_releasebuffer(PyObject *, Py_buffer *view) { + delete (buffer_info *) view->internal; +} + +/// Give this type a buffer interface. +inline void enable_buffer_protocol(PyHeapTypeObject *heap_type) { + heap_type->ht_type.tp_as_buffer = &heap_type->as_buffer; +#if PY_MAJOR_VERSION < 3 + heap_type->ht_type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER; +#endif + + heap_type->as_buffer.bf_getbuffer = pybind11_getbuffer; + heap_type->as_buffer.bf_releasebuffer = pybind11_releasebuffer; +} + +/** Create a brand new Python type according to the `type_record` specification. + Return value: New reference. */ +inline PyObject* make_new_python_type(const type_record &rec) { + auto name = reinterpret_steal(PYBIND11_FROM_STRING(rec.name)); + + auto qualname = name; + if (rec.scope && !PyModule_Check(rec.scope.ptr()) && hasattr(rec.scope, "__qualname__")) { +#if PY_MAJOR_VERSION >= 3 + qualname = reinterpret_steal( + PyUnicode_FromFormat("%U.%U", rec.scope.attr("__qualname__").ptr(), name.ptr())); +#else + qualname = str(rec.scope.attr("__qualname__").cast() + "." + rec.name); +#endif + } + + object module; + if (rec.scope) { + if (hasattr(rec.scope, "__module__")) + module = rec.scope.attr("__module__"); + else if (hasattr(rec.scope, "__name__")) + module = rec.scope.attr("__name__"); + } + + auto full_name = c_str( +#if !defined(PYPY_VERSION) + module ? str(module).cast() + "." + rec.name : +#endif + rec.name); + + char *tp_doc = nullptr; + if (rec.doc && options::show_user_defined_docstrings()) { + /* Allocate memory for docstring (using PyObject_MALLOC, since + Python will free this later on) */ + size_t size = strlen(rec.doc) + 1; + tp_doc = (char *) PyObject_MALLOC(size); + memcpy((void *) tp_doc, rec.doc, size); + } + + auto &internals = get_internals(); + auto bases = tuple(rec.bases); + auto base = (bases.size() == 0) ? internals.instance_base + : bases[0].ptr(); + + /* Danger zone: from now (and until PyType_Ready), make sure to + issue no Python C API calls which could potentially invoke the + garbage collector (the GC will call type_traverse(), which will in + turn find the newly constructed type in an invalid state) */ + auto metaclass = rec.metaclass.ptr() ? (PyTypeObject *) rec.metaclass.ptr() + : internals.default_metaclass; + + auto heap_type = (PyHeapTypeObject *) metaclass->tp_alloc(metaclass, 0); + if (!heap_type) + pybind11_fail(std::string(rec.name) + ": Unable to create type object!"); + + heap_type->ht_name = name.release().ptr(); +#ifdef PYBIND11_BUILTIN_QUALNAME + heap_type->ht_qualname = qualname.inc_ref().ptr(); +#endif + + auto type = &heap_type->ht_type; + type->tp_name = full_name; + type->tp_doc = tp_doc; + type->tp_base = type_incref((PyTypeObject *)base); + type->tp_basicsize = static_cast(sizeof(instance)); + if (bases.size() > 0) + type->tp_bases = bases.release().ptr(); + + /* Don't inherit base __init__ */ + type->tp_init = pybind11_object_init; + + /* Supported protocols */ + type->tp_as_number = &heap_type->as_number; + type->tp_as_sequence = &heap_type->as_sequence; + type->tp_as_mapping = &heap_type->as_mapping; +#if PY_VERSION_HEX >= 0x03050000 + type->tp_as_async = &heap_type->as_async; +#endif + + /* Flags */ + type->tp_flags |= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HEAPTYPE; +#if PY_MAJOR_VERSION < 3 + type->tp_flags |= Py_TPFLAGS_CHECKTYPES; +#endif + + if (rec.dynamic_attr) + enable_dynamic_attributes(heap_type); + + if (rec.buffer_protocol) + enable_buffer_protocol(heap_type); + + if (PyType_Ready(type) < 0) + pybind11_fail(std::string(rec.name) + ": PyType_Ready failed (" + error_string() + ")!"); + + assert(rec.dynamic_attr ? PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC) + : !PyType_HasFeature(type, Py_TPFLAGS_HAVE_GC)); + + /* Register type with the parent scope */ + if (rec.scope) + setattr(rec.scope, rec.name, (PyObject *) type); + else + Py_INCREF(type); // Keep it alive forever (reference leak) + + if (module) // Needed by pydoc + setattr((PyObject *) type, "__module__", module); + + PYBIND11_SET_OLDPY_QUALNAME(type, qualname); + + return (PyObject *) type; +} + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/detail/common.h b/thirdparty/pybind11/include/pybind11/detail/common.h new file mode 100644 index 000000000..e53f502d6 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/common.h @@ -0,0 +1,820 @@ +/* + pybind11/detail/common.h -- Basic macros + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#if !defined(NAMESPACE_BEGIN) +# define NAMESPACE_BEGIN(name) namespace name { +#endif +#if !defined(NAMESPACE_END) +# define NAMESPACE_END(name) } +#endif + +// Robust support for some features and loading modules compiled against different pybind versions +// requires forcing hidden visibility on pybind code, so we enforce this by setting the attribute on +// the main `pybind11` namespace. +#if !defined(PYBIND11_NAMESPACE) +# ifdef __GNUG__ +# define PYBIND11_NAMESPACE pybind11 __attribute__((visibility("hidden"))) +# else +# define PYBIND11_NAMESPACE pybind11 +# endif +#endif + +#if !(defined(_MSC_VER) && __cplusplus == 199711L) && !defined(__INTEL_COMPILER) +# if __cplusplus >= 201402L +# define PYBIND11_CPP14 +# if __cplusplus >= 201703L +# define PYBIND11_CPP17 +# endif +# endif +#elif defined(_MSC_VER) && __cplusplus == 199711L +// MSVC sets _MSVC_LANG rather than __cplusplus (supposedly until the standard is fully implemented) +// Unless you use the /Zc:__cplusplus flag on Visual Studio 2017 15.7 Preview 3 or newer +# if _MSVC_LANG >= 201402L +# define PYBIND11_CPP14 +# if _MSVC_LANG > 201402L && _MSC_VER >= 1910 +# define PYBIND11_CPP17 +# endif +# endif +#endif + +// Compiler version assertions +#if defined(__INTEL_COMPILER) +# if __INTEL_COMPILER < 1700 +# error pybind11 requires Intel C++ compiler v17 or newer +# endif +#elif defined(__clang__) && !defined(__apple_build_version__) +# if __clang_major__ < 3 || (__clang_major__ == 3 && __clang_minor__ < 3) +# error pybind11 requires clang 3.3 or newer +# endif +#elif defined(__clang__) +// Apple changes clang version macros to its Xcode version; the first Xcode release based on +// (upstream) clang 3.3 was Xcode 5: +# if __clang_major__ < 5 +# error pybind11 requires Xcode/clang 5.0 or newer +# endif +#elif defined(__GNUG__) +# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +# error pybind11 requires gcc 4.8 or newer +# endif +#elif defined(_MSC_VER) +// Pybind hits various compiler bugs in 2015u2 and earlier, and also makes use of some stl features +// (e.g. std::negation) added in 2015u3: +# if _MSC_FULL_VER < 190024210 +# error pybind11 requires MSVC 2015 update 3 or newer +# endif +#endif + +#if !defined(PYBIND11_EXPORT) +# if defined(WIN32) || defined(_WIN32) +# define PYBIND11_EXPORT __declspec(dllexport) +# else +# define PYBIND11_EXPORT __attribute__ ((visibility("default"))) +# endif +#endif + +#if defined(_MSC_VER) +# define PYBIND11_NOINLINE __declspec(noinline) +#else +# define PYBIND11_NOINLINE __attribute__ ((noinline)) +#endif + +#if defined(PYBIND11_CPP14) +# define PYBIND11_DEPRECATED(reason) [[deprecated(reason)]] +#else +# define PYBIND11_DEPRECATED(reason) __attribute__((deprecated(reason))) +#endif + +#define PYBIND11_VERSION_MAJOR 2 +#define PYBIND11_VERSION_MINOR 5 +#define PYBIND11_VERSION_PATCH 0 + +/// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode +#if defined(_MSC_VER) +# if (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 4) +# define HAVE_ROUND 1 +# endif +# pragma warning(push) +# pragma warning(disable: 4510 4610 4512 4005) +# if defined(_DEBUG) && !defined(Py_DEBUG) +# define PYBIND11_DEBUG_MARKER +# undef _DEBUG +# endif +#endif + +#include +#include +#include + +/* Python #defines overrides on all sorts of core functions, which + tends to weak havok in C++ codebases that expect these to work + like regular functions (potentially with several overloads) */ +#if defined(isalnum) +# undef isalnum +# undef isalpha +# undef islower +# undef isspace +# undef isupper +# undef tolower +# undef toupper +#endif + +#if defined(copysign) +# undef copysign +#endif + +#if defined(_MSC_VER) +# if defined(PYBIND11_DEBUG_MARKER) +# define _DEBUG +# undef PYBIND11_DEBUG_MARKER +# endif +# pragma warning(pop) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if PY_MAJOR_VERSION >= 3 /// Compatibility macros for various Python versions +#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyInstanceMethod_New(ptr) +#define PYBIND11_INSTANCE_METHOD_CHECK PyInstanceMethod_Check +#define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyInstanceMethod_GET_FUNCTION +#define PYBIND11_BYTES_CHECK PyBytes_Check +#define PYBIND11_BYTES_FROM_STRING PyBytes_FromString +#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyBytes_FromStringAndSize +#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyBytes_AsStringAndSize +#define PYBIND11_BYTES_AS_STRING PyBytes_AsString +#define PYBIND11_BYTES_SIZE PyBytes_Size +#define PYBIND11_LONG_CHECK(o) PyLong_Check(o) +#define PYBIND11_LONG_AS_LONGLONG(o) PyLong_AsLongLong(o) +#define PYBIND11_LONG_FROM_SIGNED(o) PyLong_FromSsize_t((ssize_t) o) +#define PYBIND11_LONG_FROM_UNSIGNED(o) PyLong_FromSize_t((size_t) o) +#define PYBIND11_BYTES_NAME "bytes" +#define PYBIND11_STRING_NAME "str" +#define PYBIND11_SLICE_OBJECT PyObject +#define PYBIND11_FROM_STRING PyUnicode_FromString +#define PYBIND11_STR_TYPE ::pybind11::str +#define PYBIND11_BOOL_ATTR "__bool__" +#define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_bool) +// Providing a separate declaration to make Clang's -Wmissing-prototypes happy +#define PYBIND11_PLUGIN_IMPL(name) \ + extern "C" PYBIND11_EXPORT PyObject *PyInit_##name(); \ + extern "C" PYBIND11_EXPORT PyObject *PyInit_##name() + +#else +#define PYBIND11_INSTANCE_METHOD_NEW(ptr, class_) PyMethod_New(ptr, nullptr, class_) +#define PYBIND11_INSTANCE_METHOD_CHECK PyMethod_Check +#define PYBIND11_INSTANCE_METHOD_GET_FUNCTION PyMethod_GET_FUNCTION +#define PYBIND11_BYTES_CHECK PyString_Check +#define PYBIND11_BYTES_FROM_STRING PyString_FromString +#define PYBIND11_BYTES_FROM_STRING_AND_SIZE PyString_FromStringAndSize +#define PYBIND11_BYTES_AS_STRING_AND_SIZE PyString_AsStringAndSize +#define PYBIND11_BYTES_AS_STRING PyString_AsString +#define PYBIND11_BYTES_SIZE PyString_Size +#define PYBIND11_LONG_CHECK(o) (PyInt_Check(o) || PyLong_Check(o)) +#define PYBIND11_LONG_AS_LONGLONG(o) (PyInt_Check(o) ? (long long) PyLong_AsLong(o) : PyLong_AsLongLong(o)) +#define PYBIND11_LONG_FROM_SIGNED(o) PyInt_FromSsize_t((ssize_t) o) // Returns long if needed. +#define PYBIND11_LONG_FROM_UNSIGNED(o) PyInt_FromSize_t((size_t) o) // Returns long if needed. +#define PYBIND11_BYTES_NAME "str" +#define PYBIND11_STRING_NAME "unicode" +#define PYBIND11_SLICE_OBJECT PySliceObject +#define PYBIND11_FROM_STRING PyString_FromString +#define PYBIND11_STR_TYPE ::pybind11::bytes +#define PYBIND11_BOOL_ATTR "__nonzero__" +#define PYBIND11_NB_BOOL(ptr) ((ptr)->nb_nonzero) +// Providing a separate PyInit decl to make Clang's -Wmissing-prototypes happy +#define PYBIND11_PLUGIN_IMPL(name) \ + static PyObject *pybind11_init_wrapper(); \ + extern "C" PYBIND11_EXPORT void init##name(); \ + extern "C" PYBIND11_EXPORT void init##name() { \ + (void)pybind11_init_wrapper(); \ + } \ + PyObject *pybind11_init_wrapper() +#endif + +#if PY_VERSION_HEX >= 0x03050000 && PY_VERSION_HEX < 0x03050200 +extern "C" { + struct _Py_atomic_address { void *value; }; + PyAPI_DATA(_Py_atomic_address) _PyThreadState_Current; +} +#endif + +#define PYBIND11_TRY_NEXT_OVERLOAD ((PyObject *) 1) // special failure return code +#define PYBIND11_STRINGIFY(x) #x +#define PYBIND11_TOSTRING(x) PYBIND11_STRINGIFY(x) +#define PYBIND11_CONCAT(first, second) first##second +#define PYBIND11_ENSURE_INTERNALS_READY \ + pybind11::detail::get_internals(); + +#define PYBIND11_CHECK_PYTHON_VERSION \ + { \ + const char *compiled_ver = PYBIND11_TOSTRING(PY_MAJOR_VERSION) \ + "." PYBIND11_TOSTRING(PY_MINOR_VERSION); \ + const char *runtime_ver = Py_GetVersion(); \ + size_t len = std::strlen(compiled_ver); \ + if (std::strncmp(runtime_ver, compiled_ver, len) != 0 \ + || (runtime_ver[len] >= '0' && runtime_ver[len] <= '9')) { \ + PyErr_Format(PyExc_ImportError, \ + "Python version mismatch: module was compiled for Python %s, " \ + "but the interpreter version is incompatible: %s.", \ + compiled_ver, runtime_ver); \ + return nullptr; \ + } \ + } + +#define PYBIND11_CATCH_INIT_EXCEPTIONS \ + catch (pybind11::error_already_set &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } catch (const std::exception &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } \ + +/** \rst + ***Deprecated in favor of PYBIND11_MODULE*** + + This macro creates the entry point that will be invoked when the Python interpreter + imports a plugin library. Please create a `module` in the function body and return + the pointer to its underlying Python object at the end. + + .. code-block:: cpp + + PYBIND11_PLUGIN(example) { + pybind11::module m("example", "pybind11 example plugin"); + /// Set up bindings here + return m.ptr(); + } +\endrst */ +#define PYBIND11_PLUGIN(name) \ + PYBIND11_DEPRECATED("PYBIND11_PLUGIN is deprecated, use PYBIND11_MODULE") \ + static PyObject *pybind11_init(); \ + PYBIND11_PLUGIN_IMPL(name) { \ + PYBIND11_CHECK_PYTHON_VERSION \ + PYBIND11_ENSURE_INTERNALS_READY \ + try { \ + return pybind11_init(); \ + } PYBIND11_CATCH_INIT_EXCEPTIONS \ + } \ + PyObject *pybind11_init() + +/** \rst + This macro creates the entry point that will be invoked when the Python interpreter + imports an extension module. The module name is given as the fist argument and it + should not be in quotes. The second macro argument defines a variable of type + `py::module` which can be used to initialize the module. + + .. code-block:: cpp + + PYBIND11_MODULE(example, m) { + m.doc() = "pybind11 example module"; + + // Add bindings here + m.def("foo", []() { + return "Hello, World!"; + }); + } +\endrst */ +#define PYBIND11_MODULE(name, variable) \ + static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &); \ + PYBIND11_PLUGIN_IMPL(name) { \ + PYBIND11_CHECK_PYTHON_VERSION \ + PYBIND11_ENSURE_INTERNALS_READY \ + auto m = pybind11::module(PYBIND11_TOSTRING(name)); \ + try { \ + PYBIND11_CONCAT(pybind11_init_, name)(m); \ + return m.ptr(); \ + } PYBIND11_CATCH_INIT_EXCEPTIONS \ + } \ + void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable) + + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +using ssize_t = Py_ssize_t; +using size_t = std::size_t; + +/// Approach used to cast a previously unknown C++ instance into a Python object +enum class return_value_policy : uint8_t { + /** This is the default return value policy, which falls back to the policy + return_value_policy::take_ownership when the return value is a pointer. + Otherwise, it uses return_value::move or return_value::copy for rvalue + and lvalue references, respectively. See below for a description of what + all of these different policies do. */ + automatic = 0, + + /** As above, but use policy return_value_policy::reference when the return + value is a pointer. This is the default conversion policy for function + arguments when calling Python functions manually from C++ code (i.e. via + handle::operator()). You probably won't need to use this. */ + automatic_reference, + + /** Reference an existing object (i.e. do not create a new copy) and take + ownership. Python will call the destructor and delete operator when the + object’s reference count reaches zero. Undefined behavior ensues when + the C++ side does the same.. */ + take_ownership, + + /** Create a new copy of the returned object, which will be owned by + Python. This policy is comparably safe because the lifetimes of the two + instances are decoupled. */ + copy, + + /** Use std::move to move the return value contents into a new instance + that will be owned by Python. This policy is comparably safe because the + lifetimes of the two instances (move source and destination) are + decoupled. */ + move, + + /** Reference an existing object, but do not take ownership. The C++ side + is responsible for managing the object’s lifetime and deallocating it + when it is no longer used. Warning: undefined behavior will ensue when + the C++ side deletes an object that is still referenced and used by + Python. */ + reference, + + /** This policy only applies to methods and properties. It references the + object without taking ownership similar to the above + return_value_policy::reference policy. In contrast to that policy, the + function or property’s implicit this argument (called the parent) is + considered to be the the owner of the return value (the child). + pybind11 then couples the lifetime of the parent to the child via a + reference relationship that ensures that the parent cannot be garbage + collected while Python is still using the child. More advanced + variations of this scheme are also possible using combinations of + return_value_policy::reference and the keep_alive call policy */ + reference_internal +}; + +NAMESPACE_BEGIN(detail) + +inline static constexpr int log2(size_t n, int k = 0) { return (n <= 1) ? k : log2(n >> 1, k + 1); } + +// Returns the size as a multiple of sizeof(void *), rounded up. +inline static constexpr size_t size_in_ptrs(size_t s) { return 1 + ((s - 1) >> log2(sizeof(void *))); } + +/** + * The space to allocate for simple layout instance holders (see below) in multiple of the size of + * a pointer (e.g. 2 means 16 bytes on 64-bit architectures). The default is the minimum required + * to holder either a std::unique_ptr or std::shared_ptr (which is almost always + * sizeof(std::shared_ptr)). + */ +constexpr size_t instance_simple_holder_in_ptrs() { + static_assert(sizeof(std::shared_ptr) >= sizeof(std::unique_ptr), + "pybind assumes std::shared_ptrs are at least as big as std::unique_ptrs"); + return size_in_ptrs(sizeof(std::shared_ptr)); +} + +// Forward declarations +struct type_info; +struct value_and_holder; + +struct nonsimple_values_and_holders { + void **values_and_holders; + uint8_t *status; +}; + +/// The 'instance' type which needs to be standard layout (need to be able to use 'offsetof') +struct instance { + PyObject_HEAD + /// Storage for pointers and holder; see simple_layout, below, for a description + union { + void *simple_value_holder[1 + instance_simple_holder_in_ptrs()]; + nonsimple_values_and_holders nonsimple; + }; + /// Weak references + PyObject *weakrefs; + /// If true, the pointer is owned which means we're free to manage it with a holder. + bool owned : 1; + /** + * An instance has two possible value/holder layouts. + * + * Simple layout (when this flag is true), means the `simple_value_holder` is set with a pointer + * and the holder object governing that pointer, i.e. [val1*][holder]. This layout is applied + * whenever there is no python-side multiple inheritance of bound C++ types *and* the type's + * holder will fit in the default space (which is large enough to hold either a std::unique_ptr + * or std::shared_ptr). + * + * Non-simple layout applies when using custom holders that require more space than `shared_ptr` + * (which is typically the size of two pointers), or when multiple inheritance is used on the + * python side. Non-simple layout allocates the required amount of memory to have multiple + * bound C++ classes as parents. Under this layout, `nonsimple.values_and_holders` is set to a + * pointer to allocated space of the required space to hold a sequence of value pointers and + * holders followed `status`, a set of bit flags (1 byte each), i.e. + * [val1*][holder1][val2*][holder2]...[bb...] where each [block] is rounded up to a multiple of + * `sizeof(void *)`. `nonsimple.status` is, for convenience, a pointer to the + * beginning of the [bb...] block (but not independently allocated). + * + * Status bits indicate whether the associated holder is constructed (& + * status_holder_constructed) and whether the value pointer is registered (& + * status_instance_registered) in `registered_instances`. + */ + bool simple_layout : 1; + /// For simple layout, tracks whether the holder has been constructed + bool simple_holder_constructed : 1; + /// For simple layout, tracks whether the instance is registered in `registered_instances` + bool simple_instance_registered : 1; + /// If true, get_internals().patients has an entry for this object + bool has_patients : 1; + + /// Initializes all of the above type/values/holders data (but not the instance values themselves) + void allocate_layout(); + + /// Destroys/deallocates all of the above + void deallocate_layout(); + + /// Returns the value_and_holder wrapper for the given type (or the first, if `find_type` + /// omitted). Returns a default-constructed (with `.inst = nullptr`) object on failure if + /// `throw_if_missing` is false. + value_and_holder get_value_and_holder(const type_info *find_type = nullptr, bool throw_if_missing = true); + + /// Bit values for the non-simple status flags + static constexpr uint8_t status_holder_constructed = 1; + static constexpr uint8_t status_instance_registered = 2; +}; + +static_assert(std::is_standard_layout::value, "Internal error: `pybind11::detail::instance` is not standard layout!"); + +/// from __cpp_future__ import (convenient aliases from C++14/17) +#if defined(PYBIND11_CPP14) && (!defined(_MSC_VER) || _MSC_VER >= 1910) +using std::enable_if_t; +using std::conditional_t; +using std::remove_cv_t; +using std::remove_reference_t; +#else +template using enable_if_t = typename std::enable_if::type; +template using conditional_t = typename std::conditional::type; +template using remove_cv_t = typename std::remove_cv::type; +template using remove_reference_t = typename std::remove_reference::type; +#endif + +/// Index sequences +#if defined(PYBIND11_CPP14) +using std::index_sequence; +using std::make_index_sequence; +#else +template struct index_sequence { }; +template struct make_index_sequence_impl : make_index_sequence_impl { }; +template struct make_index_sequence_impl <0, S...> { typedef index_sequence type; }; +template using make_index_sequence = typename make_index_sequence_impl::type; +#endif + +/// Make an index sequence of the indices of true arguments +template struct select_indices_impl { using type = ISeq; }; +template struct select_indices_impl, I, B, Bs...> + : select_indices_impl, index_sequence>, I + 1, Bs...> {}; +template using select_indices = typename select_indices_impl, 0, Bs...>::type; + +/// Backports of std::bool_constant and std::negation to accommodate older compilers +template using bool_constant = std::integral_constant; +template struct negation : bool_constant { }; + +template struct void_t_impl { using type = void; }; +template using void_t = typename void_t_impl::type; + +/// Compile-time all/any/none of that check the boolean value of all template types +#if defined(__cpp_fold_expressions) && !(defined(_MSC_VER) && (_MSC_VER < 1916)) +template using all_of = bool_constant<(Ts::value && ...)>; +template using any_of = bool_constant<(Ts::value || ...)>; +#elif !defined(_MSC_VER) +template struct bools {}; +template using all_of = std::is_same< + bools, + bools>; +template using any_of = negation...>>; +#else +// MSVC has trouble with the above, but supports std::conjunction, which we can use instead (albeit +// at a slight loss of compilation efficiency). +template using all_of = std::conjunction; +template using any_of = std::disjunction; +#endif +template using none_of = negation>; + +template class... Predicates> using satisfies_all_of = all_of...>; +template class... Predicates> using satisfies_any_of = any_of...>; +template class... Predicates> using satisfies_none_of = none_of...>; + +/// Strip the class from a method type +template struct remove_class { }; +template struct remove_class { typedef R type(A...); }; +template struct remove_class { typedef R type(A...); }; + +/// Helper template to strip away type modifiers +template struct intrinsic_type { typedef T type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template struct intrinsic_type { typedef typename intrinsic_type::type type; }; +template using intrinsic_t = typename intrinsic_type::type; + +/// Helper type to replace 'void' in some expressions +struct void_type { }; + +/// Helper template which holds a list of types +template struct type_list { }; + +/// Compile-time integer sum +#ifdef __cpp_fold_expressions +template constexpr size_t constexpr_sum(Ts... ns) { return (0 + ... + size_t{ns}); } +#else +constexpr size_t constexpr_sum() { return 0; } +template +constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); } +#endif + +NAMESPACE_BEGIN(constexpr_impl) +/// Implementation details for constexpr functions +constexpr int first(int i) { return i; } +template +constexpr int first(int i, T v, Ts... vs) { return v ? i : first(i + 1, vs...); } + +constexpr int last(int /*i*/, int result) { return result; } +template +constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); } +NAMESPACE_END(constexpr_impl) + +/// Return the index of the first type in Ts which satisfies Predicate. Returns sizeof...(Ts) if +/// none match. +template class Predicate, typename... Ts> +constexpr int constexpr_first() { return constexpr_impl::first(0, Predicate::value...); } + +/// Return the index of the last type in Ts which satisfies Predicate, or -1 if none match. +template class Predicate, typename... Ts> +constexpr int constexpr_last() { return constexpr_impl::last(0, -1, Predicate::value...); } + +/// Return the Nth element from the parameter pack +template +struct pack_element { using type = typename pack_element::type; }; +template +struct pack_element<0, T, Ts...> { using type = T; }; + +/// Return the one and only type which matches the predicate, or Default if none match. +/// If more than one type matches the predicate, fail at compile-time. +template class Predicate, typename Default, typename... Ts> +struct exactly_one { + static constexpr auto found = constexpr_sum(Predicate::value...); + static_assert(found <= 1, "Found more than one type matching the predicate"); + + static constexpr auto index = found ? constexpr_first() : 0; + using type = conditional_t::type, Default>; +}; +template class P, typename Default> +struct exactly_one { using type = Default; }; + +template class Predicate, typename Default, typename... Ts> +using exactly_one_t = typename exactly_one::type; + +/// Defer the evaluation of type T until types Us are instantiated +template struct deferred_type { using type = T; }; +template using deferred_t = typename deferred_type::type; + +/// Like is_base_of, but requires a strict base (i.e. `is_strict_base_of::value == false`, +/// unlike `std::is_base_of`) +template using is_strict_base_of = bool_constant< + std::is_base_of::value && !std::is_same::value>; + +/// Like is_base_of, but also requires that the base type is accessible (i.e. that a Derived pointer +/// can be converted to a Base pointer) +template using is_accessible_base_of = bool_constant< + std::is_base_of::value && std::is_convertible::value>; + +template class Base> +struct is_template_base_of_impl { + template static std::true_type check(Base *); + static std::false_type check(...); +}; + +/// Check if a template is the base of a type. For example: +/// `is_template_base_of` is true if `struct T : Base {}` where U can be anything +template class Base, typename T> +#if !defined(_MSC_VER) +using is_template_base_of = decltype(is_template_base_of_impl::check((intrinsic_t*)nullptr)); +#else // MSVC2015 has trouble with decltype in template aliases +struct is_template_base_of : decltype(is_template_base_of_impl::check((intrinsic_t*)nullptr)) { }; +#endif + +/// Check if T is an instantiation of the template `Class`. For example: +/// `is_instantiation` is true if `T == shared_ptr` where U can be anything. +template class Class, typename T> +struct is_instantiation : std::false_type { }; +template class Class, typename... Us> +struct is_instantiation> : std::true_type { }; + +/// Check if T is std::shared_ptr where U can be anything +template using is_shared_ptr = is_instantiation; + +/// Check if T looks like an input iterator +template struct is_input_iterator : std::false_type {}; +template +struct is_input_iterator()), decltype(++std::declval())>> + : std::true_type {}; + +template using is_function_pointer = bool_constant< + std::is_pointer::value && std::is_function::type>::value>; + +template struct strip_function_object { + using type = typename remove_class::type; +}; + +// Extracts the function signature from a function, function pointer or lambda. +template > +using function_signature_t = conditional_t< + std::is_function::value, + F, + typename conditional_t< + std::is_pointer::value || std::is_member_pointer::value, + std::remove_pointer, + strip_function_object + >::type +>; + +/// Returns true if the type looks like a lambda: that is, isn't a function, pointer or member +/// pointer. Note that this can catch all sorts of other things, too; this is intended to be used +/// in a place where passing a lambda makes sense. +template using is_lambda = satisfies_none_of, + std::is_function, std::is_pointer, std::is_member_pointer>; + +/// Ignore that a variable is unused in compiler warnings +inline void ignore_unused(const int *) { } + +/// Apply a function over each element of a parameter pack +#ifdef __cpp_fold_expressions +#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) (((PATTERN), void()), ...) +#else +using expand_side_effects = bool[]; +#define PYBIND11_EXPAND_SIDE_EFFECTS(PATTERN) pybind11::detail::expand_side_effects{ ((PATTERN), void(), false)..., false } +#endif + +NAMESPACE_END(detail) + +/// C++ bindings of builtin Python exceptions +class builtin_exception : public std::runtime_error { +public: + using std::runtime_error::runtime_error; + /// Set the error using the Python C API + virtual void set_error() const = 0; +}; + +#define PYBIND11_RUNTIME_EXCEPTION(name, type) \ + class name : public builtin_exception { public: \ + using builtin_exception::builtin_exception; \ + name() : name("") { } \ + void set_error() const override { PyErr_SetString(type, what()); } \ + }; + +PYBIND11_RUNTIME_EXCEPTION(stop_iteration, PyExc_StopIteration) +PYBIND11_RUNTIME_EXCEPTION(index_error, PyExc_IndexError) +PYBIND11_RUNTIME_EXCEPTION(key_error, PyExc_KeyError) +PYBIND11_RUNTIME_EXCEPTION(value_error, PyExc_ValueError) +PYBIND11_RUNTIME_EXCEPTION(type_error, PyExc_TypeError) +PYBIND11_RUNTIME_EXCEPTION(buffer_error, PyExc_BufferError) +PYBIND11_RUNTIME_EXCEPTION(import_error, PyExc_ImportError) +PYBIND11_RUNTIME_EXCEPTION(cast_error, PyExc_RuntimeError) /// Thrown when pybind11::cast or handle::call fail due to a type casting error +PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used internally + +[[noreturn]] PYBIND11_NOINLINE inline void pybind11_fail(const char *reason) { throw std::runtime_error(reason); } +[[noreturn]] PYBIND11_NOINLINE inline void pybind11_fail(const std::string &reason) { throw std::runtime_error(reason); } + +template struct format_descriptor { }; + +NAMESPACE_BEGIN(detail) +// Returns the index of the given type in the type char array below, and in the list in numpy.h +// The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double; +// complex float,double,long double. Note that the long double types only participate when long +// double is actually longer than double (it isn't under MSVC). +// NB: not only the string below but also complex.h and numpy.h rely on this order. +template struct is_fmt_numeric { static constexpr bool value = false; }; +template struct is_fmt_numeric::value>> { + static constexpr bool value = true; + static constexpr int index = std::is_same::value ? 0 : 1 + ( + std::is_integral::value ? detail::log2(sizeof(T))*2 + std::is_unsigned::value : 8 + ( + std::is_same::value ? 1 : std::is_same::value ? 2 : 0)); +}; +NAMESPACE_END(detail) + +template struct format_descriptor::value>> { + static constexpr const char c = "?bBhHiIqQfdg"[detail::is_fmt_numeric::index]; + static constexpr const char value[2] = { c, '\0' }; + static std::string format() { return std::string(1, c); } +}; + +#if !defined(PYBIND11_CPP17) + +template constexpr const char format_descriptor< + T, detail::enable_if_t::value>>::value[2]; + +#endif + +/// RAII wrapper that temporarily clears any Python error state +struct error_scope { + PyObject *type, *value, *trace; + error_scope() { PyErr_Fetch(&type, &value, &trace); } + ~error_scope() { PyErr_Restore(type, value, trace); } +}; + +/// Dummy destructor wrapper that can be used to expose classes with a private destructor +struct nodelete { template void operator()(T*) { } }; + +NAMESPACE_BEGIN(detail) +template +struct overload_cast_impl { + constexpr overload_cast_impl() {} // MSVC 2015 needs this + + template + constexpr auto operator()(Return (*pf)(Args...)) const noexcept + -> decltype(pf) { return pf; } + + template + constexpr auto operator()(Return (Class::*pmf)(Args...), std::false_type = {}) const noexcept + -> decltype(pmf) { return pmf; } + + template + constexpr auto operator()(Return (Class::*pmf)(Args...) const, std::true_type) const noexcept + -> decltype(pmf) { return pmf; } +}; +NAMESPACE_END(detail) + +// overload_cast requires variable templates: C++14 +#if defined(PYBIND11_CPP14) +#define PYBIND11_OVERLOAD_CAST 1 +/// Syntax sugar for resolving overloaded function pointers: +/// - regular: static_cast(&Class::func) +/// - sweet: overload_cast(&Class::func) +template +static constexpr detail::overload_cast_impl overload_cast = {}; +// MSVC 2015 only accepts this particular initialization syntax for this variable template. +#endif + +/// Const member function selector for overload_cast +/// - regular: static_cast(&Class::func) +/// - sweet: overload_cast(&Class::func, const_) +static constexpr auto const_ = std::true_type{}; + +#if !defined(PYBIND11_CPP14) // no overload_cast: providing something that static_assert-fails: +template struct overload_cast { + static_assert(detail::deferred_t::value, + "pybind11::overload_cast<...> requires compiling in C++14 mode"); +}; +#endif // overload_cast + +NAMESPACE_BEGIN(detail) + +// Adaptor for converting arbitrary container arguments into a vector; implicitly convertible from +// any standard container (or C-style array) supporting std::begin/std::end, any singleton +// arithmetic type (if T is arithmetic), or explicitly constructible from an iterator pair. +template +class any_container { + std::vector v; +public: + any_container() = default; + + // Can construct from a pair of iterators + template ::value>> + any_container(It first, It last) : v(first, last) { } + + // Implicit conversion constructor from any arbitrary container type with values convertible to T + template ())), T>::value>> + any_container(const Container &c) : any_container(std::begin(c), std::end(c)) { } + + // initializer_list's aren't deducible, so don't get matched by the above template; we need this + // to explicitly allow implicit conversion from one: + template ::value>> + any_container(const std::initializer_list &c) : any_container(c.begin(), c.end()) { } + + // Avoid copying if given an rvalue vector of the correct type. + any_container(std::vector &&v) : v(std::move(v)) { } + + // Moves the vector out of an rvalue any_container + operator std::vector &&() && { return std::move(v); } + + // Dereferencing obtains a reference to the underlying vector + std::vector &operator*() { return v; } + const std::vector &operator*() const { return v; } + + // -> lets you call methods on the underlying vector + std::vector *operator->() { return &v; } + const std::vector *operator->() const { return &v; } +}; + +NAMESPACE_END(detail) + + + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/detail/descr.h b/thirdparty/pybind11/include/pybind11/detail/descr.h new file mode 100644 index 000000000..8d404e534 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/descr.h @@ -0,0 +1,100 @@ +/* + pybind11/detail/descr.h: Helper type for concatenating type signatures at compile time + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "common.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +#if !defined(_MSC_VER) +# define PYBIND11_DESCR_CONSTEXPR static constexpr +#else +# define PYBIND11_DESCR_CONSTEXPR const +#endif + +/* Concatenate type signatures at compile time */ +template +struct descr { + char text[N + 1]; + + constexpr descr() : text{'\0'} { } + constexpr descr(char const (&s)[N+1]) : descr(s, make_index_sequence()) { } + + template + constexpr descr(char const (&s)[N+1], index_sequence) : text{s[Is]..., '\0'} { } + + template + constexpr descr(char c, Chars... cs) : text{c, static_cast(cs)..., '\0'} { } + + static constexpr std::array types() { + return {{&typeid(Ts)..., nullptr}}; + } +}; + +template +constexpr descr plus_impl(const descr &a, const descr &b, + index_sequence, index_sequence) { + return {a.text[Is1]..., b.text[Is2]...}; +} + +template +constexpr descr operator+(const descr &a, const descr &b) { + return plus_impl(a, b, make_index_sequence(), make_index_sequence()); +} + +template +constexpr descr _(char const(&text)[N]) { return descr(text); } +constexpr descr<0> _(char const(&)[1]) { return {}; } + +template struct int_to_str : int_to_str { }; +template struct int_to_str<0, Digits...> { + static constexpr auto digits = descr(('0' + Digits)...); +}; + +// Ternary description (like std::conditional) +template +constexpr enable_if_t> _(char const(&text1)[N1], char const(&)[N2]) { + return _(text1); +} +template +constexpr enable_if_t> _(char const(&)[N1], char const(&text2)[N2]) { + return _(text2); +} + +template +constexpr enable_if_t _(const T1 &d, const T2 &) { return d; } +template +constexpr enable_if_t _(const T1 &, const T2 &d) { return d; } + +template auto constexpr _() -> decltype(int_to_str::digits) { + return int_to_str::digits; +} + +template constexpr descr<1, Type> _() { return {'%'}; } + +constexpr descr<0> concat() { return {}; } + +template +constexpr descr concat(const descr &descr) { return descr; } + +template +constexpr auto concat(const descr &d, const Args &...args) + -> decltype(std::declval>() + concat(args...)) { + return d + _(", ") + concat(args...); +} + +template +constexpr descr type_descr(const descr &descr) { + return _("{") + descr + _("}"); +} + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/detail/init.h b/thirdparty/pybind11/include/pybind11/detail/init.h new file mode 100644 index 000000000..acfe00bdb --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/init.h @@ -0,0 +1,335 @@ +/* + pybind11/detail/init.h: init factory function implementation and support code. + + Copyright (c) 2017 Jason Rhinelander + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "class.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +template <> +class type_caster { +public: + bool load(handle h, bool) { + value = reinterpret_cast(h.ptr()); + return true; + } + + template using cast_op_type = value_and_holder &; + operator value_and_holder &() { return *value; } + static constexpr auto name = _(); + +private: + value_and_holder *value = nullptr; +}; + +NAMESPACE_BEGIN(initimpl) + +inline void no_nullptr(void *ptr) { + if (!ptr) throw type_error("pybind11::init(): factory function returned nullptr"); +} + +// Implementing functions for all forms of py::init<...> and py::init(...) +template using Cpp = typename Class::type; +template using Alias = typename Class::type_alias; +template using Holder = typename Class::holder_type; + +template using is_alias_constructible = std::is_constructible, Cpp &&>; + +// Takes a Cpp pointer and returns true if it actually is a polymorphic Alias instance. +template = 0> +bool is_alias(Cpp *ptr) { + return dynamic_cast *>(ptr) != nullptr; +} +// Failing fallback version of the above for a no-alias class (always returns false) +template +constexpr bool is_alias(void *) { return false; } + +// Constructs and returns a new object; if the given arguments don't map to a constructor, we fall +// back to brace aggregate initiailization so that for aggregate initialization can be used with +// py::init, e.g. `py::init` to initialize a `struct T { int a; int b; }`. For +// non-aggregate types, we need to use an ordinary T(...) constructor (invoking as `T{...}` usually +// works, but will not do the expected thing when `T` has an `initializer_list` constructor). +template ::value, int> = 0> +inline Class *construct_or_initialize(Args &&...args) { return new Class(std::forward(args)...); } +template ::value, int> = 0> +inline Class *construct_or_initialize(Args &&...args) { return new Class{std::forward(args)...}; } + +// Attempts to constructs an alias using a `Alias(Cpp &&)` constructor. This allows types with +// an alias to provide only a single Cpp factory function as long as the Alias can be +// constructed from an rvalue reference of the base Cpp type. This means that Alias classes +// can, when appropriate, simply define a `Alias(Cpp &&)` constructor rather than needing to +// inherit all the base class constructors. +template +void construct_alias_from_cpp(std::true_type /*is_alias_constructible*/, + value_and_holder &v_h, Cpp &&base) { + v_h.value_ptr() = new Alias(std::move(base)); +} +template +[[noreturn]] void construct_alias_from_cpp(std::false_type /*!is_alias_constructible*/, + value_and_holder &, Cpp &&) { + throw type_error("pybind11::init(): unable to convert returned instance to required " + "alias class: no `Alias(Class &&)` constructor available"); +} + +// Error-generating fallback for factories that don't match one of the below construction +// mechanisms. +template +void construct(...) { + static_assert(!std::is_same::value /* always false */, + "pybind11::init(): init function must return a compatible pointer, " + "holder, or value"); +} + +// Pointer return v1: the factory function returns a class pointer for a registered class. +// If we don't need an alias (because this class doesn't have one, or because the final type is +// inherited on the Python side) we can simply take over ownership. Otherwise we need to try to +// construct an Alias from the returned base instance. +template +void construct(value_and_holder &v_h, Cpp *ptr, bool need_alias) { + no_nullptr(ptr); + if (Class::has_alias && need_alias && !is_alias(ptr)) { + // We're going to try to construct an alias by moving the cpp type. Whether or not + // that succeeds, we still need to destroy the original cpp pointer (either the + // moved away leftover, if the alias construction works, or the value itself if we + // throw an error), but we can't just call `delete ptr`: it might have a special + // deleter, or might be shared_from_this. So we construct a holder around it as if + // it was a normal instance, then steal the holder away into a local variable; thus + // the holder and destruction happens when we leave the C++ scope, and the holder + // class gets to handle the destruction however it likes. + v_h.value_ptr() = ptr; + v_h.set_instance_registered(true); // To prevent init_instance from registering it + v_h.type->init_instance(v_h.inst, nullptr); // Set up the holder + Holder temp_holder(std::move(v_h.holder>())); // Steal the holder + v_h.type->dealloc(v_h); // Destroys the moved-out holder remains, resets value ptr to null + v_h.set_instance_registered(false); + + construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(*ptr)); + } else { + // Otherwise the type isn't inherited, so we don't need an Alias + v_h.value_ptr() = ptr; + } +} + +// Pointer return v2: a factory that always returns an alias instance ptr. We simply take over +// ownership of the pointer. +template = 0> +void construct(value_and_holder &v_h, Alias *alias_ptr, bool) { + no_nullptr(alias_ptr); + v_h.value_ptr() = static_cast *>(alias_ptr); +} + +// Holder return: copy its pointer, and move or copy the returned holder into the new instance's +// holder. This also handles types like std::shared_ptr and std::unique_ptr where T is a +// derived type (through those holder's implicit conversion from derived class holder constructors). +template +void construct(value_and_holder &v_h, Holder holder, bool need_alias) { + auto *ptr = holder_helper>::get(holder); + // If we need an alias, check that the held pointer is actually an alias instance + if (Class::has_alias && need_alias && !is_alias(ptr)) + throw type_error("pybind11::init(): construction failed: returned holder-wrapped instance " + "is not an alias instance"); + + v_h.value_ptr() = ptr; + v_h.type->init_instance(v_h.inst, &holder); +} + +// return-by-value version 1: returning a cpp class by value. If the class has an alias and an +// alias is required the alias must have an `Alias(Cpp &&)` constructor so that we can construct +// the alias from the base when needed (i.e. because of Python-side inheritance). When we don't +// need it, we simply move-construct the cpp value into a new instance. +template +void construct(value_and_holder &v_h, Cpp &&result, bool need_alias) { + static_assert(std::is_move_constructible>::value, + "pybind11::init() return-by-value factory function requires a movable class"); + if (Class::has_alias && need_alias) + construct_alias_from_cpp(is_alias_constructible{}, v_h, std::move(result)); + else + v_h.value_ptr() = new Cpp(std::move(result)); +} + +// return-by-value version 2: returning a value of the alias type itself. We move-construct an +// Alias instance (even if no the python-side inheritance is involved). The is intended for +// cases where Alias initialization is always desired. +template +void construct(value_and_holder &v_h, Alias &&result, bool) { + static_assert(std::is_move_constructible>::value, + "pybind11::init() return-by-alias-value factory function requires a movable alias class"); + v_h.value_ptr() = new Alias(std::move(result)); +} + +// Implementing class for py::init<...>() +template +struct constructor { + template = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } + + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + else + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } + + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } +}; + +// Implementing class for py::init_alias<...>() +template struct alias_constructor { + template , Args...>::value, int> = 0> + static void execute(Class &cl, const Extra&... extra) { + cl.def("__init__", [](value_and_holder &v_h, Args... args) { + v_h.value_ptr() = construct_or_initialize>(std::forward(args)...); + }, is_new_style_constructor(), extra...); + } +}; + +// Implementation class for py::init(Func) and py::init(Func, AliasFunc) +template , typename = function_signature_t> +struct factory; + +// Specialization for py::init(Func) +template +struct factory { + remove_reference_t class_factory; + + factory(Func &&f) : class_factory(std::forward(f)) { } + + // The given class either has no alias or has no separate alias factory; + // this always constructs the class itself. If the class is registered with an alias + // type and an alias instance is needed (i.e. because the final type is a Python class + // inheriting from the C++ type) the returned value needs to either already be an alias + // instance, or the alias needs to be constructible from a `Class &&` argument. + template + void execute(Class &cl, const Extra &...extra) && { + #if defined(PYBIND11_CPP14) + cl.def("__init__", [func = std::move(class_factory)] + #else + auto &func = class_factory; + cl.def("__init__", [func] + #endif + (value_and_holder &v_h, Args... args) { + construct(v_h, func(std::forward(args)...), + Py_TYPE(v_h.inst) != v_h.type->type); + }, is_new_style_constructor(), extra...); + } +}; + +// Specialization for py::init(Func, AliasFunc) +template +struct factory { + static_assert(sizeof...(CArgs) == sizeof...(AArgs), + "pybind11::init(class_factory, alias_factory): class and alias factories " + "must have identical argument signatures"); + static_assert(all_of...>::value, + "pybind11::init(class_factory, alias_factory): class and alias factories " + "must have identical argument signatures"); + + remove_reference_t class_factory; + remove_reference_t alias_factory; + + factory(CFunc &&c, AFunc &&a) + : class_factory(std::forward(c)), alias_factory(std::forward(a)) { } + + // The class factory is called when the `self` type passed to `__init__` is the direct + // class (i.e. not inherited), the alias factory when `self` is a Python-side subtype. + template + void execute(Class &cl, const Extra&... extra) && { + static_assert(Class::has_alias, "The two-argument version of `py::init()` can " + "only be used if the class has an alias"); + #if defined(PYBIND11_CPP14) + cl.def("__init__", [class_func = std::move(class_factory), alias_func = std::move(alias_factory)] + #else + auto &class_func = class_factory; + auto &alias_func = alias_factory; + cl.def("__init__", [class_func, alias_func] + #endif + (value_and_holder &v_h, CArgs... args) { + if (Py_TYPE(v_h.inst) == v_h.type->type) + // If the instance type equals the registered type we don't have inheritance, so + // don't need the alias and can construct using the class function: + construct(v_h, class_func(std::forward(args)...), false); + else + construct(v_h, alias_func(std::forward(args)...), true); + }, is_new_style_constructor(), extra...); + } +}; + +/// Set just the C++ state. Same as `__init__`. +template +void setstate(value_and_holder &v_h, T &&result, bool need_alias) { + construct(v_h, std::forward(result), need_alias); +} + +/// Set both the C++ and Python states +template ::value, int> = 0> +void setstate(value_and_holder &v_h, std::pair &&result, bool need_alias) { + construct(v_h, std::move(result.first), need_alias); + setattr((PyObject *) v_h.inst, "__dict__", result.second); +} + +/// Implementation for py::pickle(GetState, SetState) +template , typename = function_signature_t> +struct pickle_factory; + +template +struct pickle_factory { + static_assert(std::is_same, intrinsic_t>::value, + "The type returned by `__getstate__` must be the same " + "as the argument accepted by `__setstate__`"); + + remove_reference_t get; + remove_reference_t set; + + pickle_factory(Get get, Set set) + : get(std::forward(get)), set(std::forward(set)) { } + + template + void execute(Class &cl, const Extra &...extra) && { + cl.def("__getstate__", std::move(get)); + +#if defined(PYBIND11_CPP14) + cl.def("__setstate__", [func = std::move(set)] +#else + auto &func = set; + cl.def("__setstate__", [func] +#endif + (value_and_holder &v_h, ArgState state) { + setstate(v_h, func(std::forward(state)), + Py_TYPE(v_h.inst) != v_h.type->type); + }, is_new_style_constructor(), extra...); + } +}; + +NAMESPACE_END(initimpl) +NAMESPACE_END(detail) +NAMESPACE_END(pybind11) diff --git a/thirdparty/pybind11/include/pybind11/detail/internals.h b/thirdparty/pybind11/include/pybind11/detail/internals.h new file mode 100644 index 000000000..6224dfb22 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/internals.h @@ -0,0 +1,349 @@ +/* + pybind11/detail/internals.h: Internal data structure and related functions + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "../pytypes.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) +// Forward declarations +inline PyTypeObject *make_static_property_type(); +inline PyTypeObject *make_default_metaclass(); +inline PyObject *make_object_base_type(PyTypeObject *metaclass); + +// The old Python Thread Local Storage (TLS) API is deprecated in Python 3.7 in favor of the new +// Thread Specific Storage (TSS) API. +#if PY_VERSION_HEX >= 0x03070000 +# define PYBIND11_TLS_KEY_INIT(var) Py_tss_t *var = nullptr +# define PYBIND11_TLS_GET_VALUE(key) PyThread_tss_get((key)) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) PyThread_tss_set((key), (value)) +# define PYBIND11_TLS_DELETE_VALUE(key) PyThread_tss_set((key), nullptr) +# define PYBIND11_TLS_FREE(key) PyThread_tss_free(key) +#else + // Usually an int but a long on Cygwin64 with Python 3.x +# define PYBIND11_TLS_KEY_INIT(var) decltype(PyThread_create_key()) var = 0 +# define PYBIND11_TLS_GET_VALUE(key) PyThread_get_key_value((key)) +# if PY_MAJOR_VERSION < 3 +# define PYBIND11_TLS_DELETE_VALUE(key) \ + PyThread_delete_key_value(key) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ + do { \ + PyThread_delete_key_value((key)); \ + PyThread_set_key_value((key), (value)); \ + } while (false) +# else +# define PYBIND11_TLS_DELETE_VALUE(key) \ + PyThread_set_key_value((key), nullptr) +# define PYBIND11_TLS_REPLACE_VALUE(key, value) \ + PyThread_set_key_value((key), (value)) +# endif +# define PYBIND11_TLS_FREE(key) (void)key +#endif + +// Python loads modules by default with dlopen with the RTLD_LOCAL flag; under libc++ and possibly +// other STLs, this means `typeid(A)` from one module won't equal `typeid(A)` from another module +// even when `A` is the same, non-hidden-visibility type (e.g. from a common include). Under +// libstdc++, this doesn't happen: equality and the type_index hash are based on the type name, +// which works. If not under a known-good stl, provide our own name-based hash and equality +// functions that use the type name. +#if defined(__GLIBCXX__) +inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { return lhs == rhs; } +using type_hash = std::hash; +using type_equal_to = std::equal_to; +#else +inline bool same_type(const std::type_info &lhs, const std::type_info &rhs) { + return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0; +} + +struct type_hash { + size_t operator()(const std::type_index &t) const { + size_t hash = 5381; + const char *ptr = t.name(); + while (auto c = static_cast(*ptr++)) + hash = (hash * 33) ^ c; + return hash; + } +}; + +struct type_equal_to { + bool operator()(const std::type_index &lhs, const std::type_index &rhs) const { + return lhs.name() == rhs.name() || std::strcmp(lhs.name(), rhs.name()) == 0; + } +}; +#endif + +template +using type_map = std::unordered_map; + +struct overload_hash { + inline size_t operator()(const std::pair& v) const { + size_t value = std::hash()(v.first); + value ^= std::hash()(v.second) + 0x9e3779b9 + (value<<6) + (value>>2); + return value; + } +}; + +/// Internal data structure used to track registered instances and types. +/// Whenever binary incompatible changes are made to this structure, +/// `PYBIND11_INTERNALS_VERSION` must be incremented. +struct internals { + type_map registered_types_cpp; // std::type_index -> pybind11's type information + std::unordered_map> registered_types_py; // PyTypeObject* -> base type_info(s) + std::unordered_multimap registered_instances; // void * -> instance* + std::unordered_set, overload_hash> inactive_overload_cache; + type_map> direct_conversions; + std::unordered_map> patients; + std::forward_list registered_exception_translators; + std::unordered_map shared_data; // Custom data to be shared across extensions + std::vector loader_patient_stack; // Used by `loader_life_support` + std::forward_list static_strings; // Stores the std::strings backing detail::c_str() + PyTypeObject *static_property_type; + PyTypeObject *default_metaclass; + PyObject *instance_base; +#if defined(WITH_THREAD) + PYBIND11_TLS_KEY_INIT(tstate); + PyInterpreterState *istate = nullptr; + ~internals() { + // This destructor is called *after* Py_Finalize() in finalize_interpreter(). + // That *SHOULD BE* fine. The following details what happens whe PyThread_tss_free is called. + // PYBIND11_TLS_FREE is PyThread_tss_free on python 3.7+. On older python, it does nothing. + // PyThread_tss_free calls PyThread_tss_delete and PyMem_RawFree. + // PyThread_tss_delete just calls TlsFree (on Windows) or pthread_key_delete (on *NIX). Neither + // of those have anything to do with CPython internals. + // PyMem_RawFree *requires* that the `tstate` be allocated with the CPython allocator. + PYBIND11_TLS_FREE(tstate); + } +#endif +}; + +/// Additional type information which does not fit into the PyTypeObject. +/// Changes to this struct also require bumping `PYBIND11_INTERNALS_VERSION`. +struct type_info { + PyTypeObject *type; + const std::type_info *cpptype; + size_t type_size, type_align, holder_size_in_ptrs; + void *(*operator_new)(size_t); + void (*init_instance)(instance *, const void *); + void (*dealloc)(value_and_holder &v_h); + std::vector implicit_conversions; + std::vector> implicit_casts; + std::vector *direct_conversions; + buffer_info *(*get_buffer)(PyObject *, void *) = nullptr; + void *get_buffer_data = nullptr; + void *(*module_local_load)(PyObject *, const type_info *) = nullptr; + /* A simple type never occurs as a (direct or indirect) parent + * of a class that makes use of multiple inheritance */ + bool simple_type : 1; + /* True if there is no multiple inheritance in this type's inheritance tree */ + bool simple_ancestors : 1; + /* for base vs derived holder_type checks */ + bool default_holder : 1; + /* true if this is a type registered with py::module_local */ + bool module_local : 1; +}; + +/// Tracks the `internals` and `type_info` ABI version independent of the main library version +#define PYBIND11_INTERNALS_VERSION 4 + +/// On MSVC, debug and release builds are not ABI-compatible! +#if defined(_MSC_VER) && defined(_DEBUG) +# define PYBIND11_BUILD_TYPE "_debug" +#else +# define PYBIND11_BUILD_TYPE "" +#endif + +/// Let's assume that different compilers are ABI-incompatible. +#if defined(_MSC_VER) +# define PYBIND11_COMPILER_TYPE "_msvc" +#elif defined(__INTEL_COMPILER) +# define PYBIND11_COMPILER_TYPE "_icc" +#elif defined(__clang__) +# define PYBIND11_COMPILER_TYPE "_clang" +#elif defined(__PGI) +# define PYBIND11_COMPILER_TYPE "_pgi" +#elif defined(__MINGW32__) +# define PYBIND11_COMPILER_TYPE "_mingw" +#elif defined(__CYGWIN__) +# define PYBIND11_COMPILER_TYPE "_gcc_cygwin" +#elif defined(__GNUC__) +# define PYBIND11_COMPILER_TYPE "_gcc" +#else +# define PYBIND11_COMPILER_TYPE "_unknown" +#endif + +#if defined(_LIBCPP_VERSION) +# define PYBIND11_STDLIB "_libcpp" +#elif defined(__GLIBCXX__) || defined(__GLIBCPP__) +# define PYBIND11_STDLIB "_libstdcpp" +#else +# define PYBIND11_STDLIB "" +#endif + +/// On Linux/OSX, changes in __GXX_ABI_VERSION__ indicate ABI incompatibility. +#if defined(__GXX_ABI_VERSION) +# define PYBIND11_BUILD_ABI "_cxxabi" PYBIND11_TOSTRING(__GXX_ABI_VERSION) +#else +# define PYBIND11_BUILD_ABI "" +#endif + +#if defined(WITH_THREAD) +# define PYBIND11_INTERNALS_KIND "" +#else +# define PYBIND11_INTERNALS_KIND "_without_thread" +#endif + +#define PYBIND11_INTERNALS_ID "__pybind11_internals_v" \ + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + +#define PYBIND11_MODULE_LOCAL_ID "__pybind11_module_local_v" \ + PYBIND11_TOSTRING(PYBIND11_INTERNALS_VERSION) PYBIND11_INTERNALS_KIND PYBIND11_COMPILER_TYPE PYBIND11_STDLIB PYBIND11_BUILD_ABI PYBIND11_BUILD_TYPE "__" + +/// Each module locally stores a pointer to the `internals` data. The data +/// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`. +inline internals **&get_internals_pp() { + static internals **internals_pp = nullptr; + return internals_pp; +} + +inline void translate_exception(std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (error_already_set &e) { e.restore(); return; + } catch (const builtin_exception &e) { e.set_error(); return; + } catch (const std::bad_alloc &e) { PyErr_SetString(PyExc_MemoryError, e.what()); return; + } catch (const std::domain_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::invalid_argument &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::length_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::out_of_range &e) { PyErr_SetString(PyExc_IndexError, e.what()); return; + } catch (const std::range_error &e) { PyErr_SetString(PyExc_ValueError, e.what()); return; + } catch (const std::overflow_error &e) { PyErr_SetString(PyExc_OverflowError, e.what()); return; + } catch (const std::exception &e) { PyErr_SetString(PyExc_RuntimeError, e.what()); return; + } catch (...) { + PyErr_SetString(PyExc_RuntimeError, "Caught an unknown exception!"); + return; + } +} + +#if !defined(__GLIBCXX__) +inline void translate_local_exception(std::exception_ptr p) { + try { + if (p) std::rethrow_exception(p); + } catch (error_already_set &e) { e.restore(); return; + } catch (const builtin_exception &e) { e.set_error(); return; + } +} +#endif + +/// Return a reference to the current `internals` data +PYBIND11_NOINLINE inline internals &get_internals() { + auto **&internals_pp = get_internals_pp(); + if (internals_pp && *internals_pp) + return **internals_pp; + + // Ensure that the GIL is held since we will need to make Python calls. + // Cannot use py::gil_scoped_acquire here since that constructor calls get_internals. + struct gil_scoped_acquire_local { + gil_scoped_acquire_local() : state (PyGILState_Ensure()) {} + ~gil_scoped_acquire_local() { PyGILState_Release(state); } + const PyGILState_STATE state; + } gil; + + constexpr auto *id = PYBIND11_INTERNALS_ID; + auto builtins = handle(PyEval_GetBuiltins()); + if (builtins.contains(id) && isinstance(builtins[id])) { + internals_pp = static_cast(capsule(builtins[id])); + + // We loaded builtins through python's builtins, which means that our `error_already_set` + // and `builtin_exception` may be different local classes than the ones set up in the + // initial exception translator, below, so add another for our local exception classes. + // + // libstdc++ doesn't require this (types there are identified only by name) +#if !defined(__GLIBCXX__) + (*internals_pp)->registered_exception_translators.push_front(&translate_local_exception); +#endif + } else { + if (!internals_pp) internals_pp = new internals*(); + auto *&internals_ptr = *internals_pp; + internals_ptr = new internals(); +#if defined(WITH_THREAD) + PyEval_InitThreads(); + PyThreadState *tstate = PyThreadState_Get(); + #if PY_VERSION_HEX >= 0x03070000 + internals_ptr->tstate = PyThread_tss_alloc(); + if (!internals_ptr->tstate || PyThread_tss_create(internals_ptr->tstate)) + pybind11_fail("get_internals: could not successfully initialize the TSS key!"); + PyThread_tss_set(internals_ptr->tstate, tstate); + #else + internals_ptr->tstate = PyThread_create_key(); + if (internals_ptr->tstate == -1) + pybind11_fail("get_internals: could not successfully initialize the TLS key!"); + PyThread_set_key_value(internals_ptr->tstate, tstate); + #endif + internals_ptr->istate = tstate->interp; +#endif + builtins[id] = capsule(internals_pp); + internals_ptr->registered_exception_translators.push_front(&translate_exception); + internals_ptr->static_property_type = make_static_property_type(); + internals_ptr->default_metaclass = make_default_metaclass(); + internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass); + } + return **internals_pp; +} + +/// Works like `internals.registered_types_cpp`, but for module-local registered types: +inline type_map ®istered_local_types_cpp() { + static type_map locals{}; + return locals; +} + +/// Constructs a std::string with the given arguments, stores it in `internals`, and returns its +/// `c_str()`. Such strings objects have a long storage duration -- the internal strings are only +/// cleared when the program exits or after interpreter shutdown (when embedding), and so are +/// suitable for c-style strings needed by Python internals (such as PyTypeObject's tp_name). +template +const char *c_str(Args &&...args) { + auto &strings = get_internals().static_strings; + strings.emplace_front(std::forward(args)...); + return strings.front().c_str(); +} + +NAMESPACE_END(detail) + +/// Returns a named pointer that is shared among all extension modules (using the same +/// pybind11 version) running in the current interpreter. Names starting with underscores +/// are reserved for internal usage. Returns `nullptr` if no matching entry was found. +inline PYBIND11_NOINLINE void *get_shared_data(const std::string &name) { + auto &internals = detail::get_internals(); + auto it = internals.shared_data.find(name); + return it != internals.shared_data.end() ? it->second : nullptr; +} + +/// Set the shared data that can be later recovered by `get_shared_data()`. +inline PYBIND11_NOINLINE void *set_shared_data(const std::string &name, void *data) { + detail::get_internals().shared_data[name] = data; + return data; +} + +/// Returns a typed reference to a shared data entry (by using `get_shared_data()`) if +/// such entry exists. Otherwise, a new object of default-constructible type `T` is +/// added to the shared data under the given name and a reference to it is returned. +template +T &get_or_create_shared_data(const std::string &name) { + auto &internals = detail::get_internals(); + auto it = internals.shared_data.find(name); + T *ptr = (T *) (it != internals.shared_data.end() ? it->second : nullptr); + if (!ptr) { + ptr = new T(); + internals.shared_data[name] = ptr; + } + return *ptr; +} + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/detail/typeid.h b/thirdparty/pybind11/include/pybind11/detail/typeid.h new file mode 100644 index 000000000..9c8a4fc69 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/detail/typeid.h @@ -0,0 +1,55 @@ +/* + pybind11/detail/typeid.h: Compiler-independent access to type identifiers + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include +#include + +#if defined(__GNUG__) +#include +#endif + +#include "common.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) +/// Erase all occurrences of a substring +inline void erase_all(std::string &string, const std::string &search) { + for (size_t pos = 0;;) { + pos = string.find(search, pos); + if (pos == std::string::npos) break; + string.erase(pos, search.length()); + } +} + +PYBIND11_NOINLINE inline void clean_type_id(std::string &name) { +#if defined(__GNUG__) + int status = 0; + std::unique_ptr res { + abi::__cxa_demangle(name.c_str(), nullptr, nullptr, &status), std::free }; + if (status == 0) + name = res.get(); +#else + detail::erase_all(name, "class "); + detail::erase_all(name, "struct "); + detail::erase_all(name, "enum "); +#endif + detail::erase_all(name, "pybind11::"); +} +NAMESPACE_END(detail) + +/// Return a string representation of a C++ type +template static std::string type_id() { + std::string name(typeid(T).name()); + detail::clean_type_id(name); + return name; +} + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/eigen.h b/thirdparty/pybind11/include/pybind11/eigen.h new file mode 100644 index 000000000..d963d9650 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/eigen.h @@ -0,0 +1,607 @@ +/* + pybind11/eigen.h: Transparent conversion for dense and sparse Eigen matrices + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "numpy.h" + +#if defined(__INTEL_COMPILER) +# pragma warning(disable: 1682) // implicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) +#elif defined(__GNUG__) || defined(__clang__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wconversion" +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" +# ifdef __clang__ +// Eigen generates a bunch of implicit-copy-constructor-is-deprecated warnings with -Wdeprecated +// under Clang, so disable that warning here: +# pragma GCC diagnostic ignored "-Wdeprecated" +# endif +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wint-in-bool-context" +# endif +#endif + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4127) // warning C4127: Conditional expression is constant +# pragma warning(disable: 4996) // warning C4996: std::unary_negate is deprecated in C++17 +#endif + +#include +#include + +// Eigen prior to 3.2.7 doesn't have proper move constructors--but worse, some classes get implicit +// move constructors that break things. We could detect this an explicitly copy, but an extra copy +// of matrices seems highly undesirable. +static_assert(EIGEN_VERSION_AT_LEAST(3,2,7), "Eigen support in pybind11 requires Eigen >= 3.2.7"); + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +// Provide a convenience alias for easier pass-by-ref usage with fully dynamic strides: +using EigenDStride = Eigen::Stride; +template using EigenDRef = Eigen::Ref; +template using EigenDMap = Eigen::Map; + +NAMESPACE_BEGIN(detail) + +#if EIGEN_VERSION_AT_LEAST(3,3,0) +using EigenIndex = Eigen::Index; +#else +using EigenIndex = EIGEN_DEFAULT_DENSE_INDEX_TYPE; +#endif + +// Matches Eigen::Map, Eigen::Ref, blocks, etc: +template using is_eigen_dense_map = all_of, std::is_base_of, T>>; +template using is_eigen_mutable_map = std::is_base_of, T>; +template using is_eigen_dense_plain = all_of>, is_template_base_of>; +template using is_eigen_sparse = is_template_base_of; +// Test for objects inheriting from EigenBase that aren't captured by the above. This +// basically covers anything that can be assigned to a dense matrix but that don't have a typical +// matrix data layout that can be copied from their .data(). For example, DiagonalMatrix and +// SelfAdjointView fall into this category. +template using is_eigen_other = all_of< + is_template_base_of, + negation, is_eigen_dense_plain, is_eigen_sparse>> +>; + +// Captures numpy/eigen conformability status (returned by EigenProps::conformable()): +template struct EigenConformable { + bool conformable = false; + EigenIndex rows = 0, cols = 0; + EigenDStride stride{0, 0}; // Only valid if negativestrides is false! + bool negativestrides = false; // If true, do not use stride! + + EigenConformable(bool fits = false) : conformable{fits} {} + // Matrix type: + EigenConformable(EigenIndex r, EigenIndex c, + EigenIndex rstride, EigenIndex cstride) : + conformable{true}, rows{r}, cols{c} { + // TODO: when Eigen bug #747 is fixed, remove the tests for non-negativity. http://eigen.tuxfamily.org/bz/show_bug.cgi?id=747 + if (rstride < 0 || cstride < 0) { + negativestrides = true; + } else { + stride = {EigenRowMajor ? rstride : cstride /* outer stride */, + EigenRowMajor ? cstride : rstride /* inner stride */ }; + } + } + // Vector type: + EigenConformable(EigenIndex r, EigenIndex c, EigenIndex stride) + : EigenConformable(r, c, r == 1 ? c*stride : stride, c == 1 ? r : r*stride) {} + + template bool stride_compatible() const { + // To have compatible strides, we need (on both dimensions) one of fully dynamic strides, + // matching strides, or a dimension size of 1 (in which case the stride value is irrelevant) + return + !negativestrides && + (props::inner_stride == Eigen::Dynamic || props::inner_stride == stride.inner() || + (EigenRowMajor ? cols : rows) == 1) && + (props::outer_stride == Eigen::Dynamic || props::outer_stride == stride.outer() || + (EigenRowMajor ? rows : cols) == 1); + } + operator bool() const { return conformable; } +}; + +template struct eigen_extract_stride { using type = Type; }; +template +struct eigen_extract_stride> { using type = StrideType; }; +template +struct eigen_extract_stride> { using type = StrideType; }; + +// Helper struct for extracting information from an Eigen type +template struct EigenProps { + using Type = Type_; + using Scalar = typename Type::Scalar; + using StrideType = typename eigen_extract_stride::type; + static constexpr EigenIndex + rows = Type::RowsAtCompileTime, + cols = Type::ColsAtCompileTime, + size = Type::SizeAtCompileTime; + static constexpr bool + row_major = Type::IsRowMajor, + vector = Type::IsVectorAtCompileTime, // At least one dimension has fixed size 1 + fixed_rows = rows != Eigen::Dynamic, + fixed_cols = cols != Eigen::Dynamic, + fixed = size != Eigen::Dynamic, // Fully-fixed size + dynamic = !fixed_rows && !fixed_cols; // Fully-dynamic size + + template using if_zero = std::integral_constant; + static constexpr EigenIndex inner_stride = if_zero::value, + outer_stride = if_zero::value; + static constexpr bool dynamic_stride = inner_stride == Eigen::Dynamic && outer_stride == Eigen::Dynamic; + static constexpr bool requires_row_major = !dynamic_stride && !vector && (row_major ? inner_stride : outer_stride) == 1; + static constexpr bool requires_col_major = !dynamic_stride && !vector && (row_major ? outer_stride : inner_stride) == 1; + + // Takes an input array and determines whether we can make it fit into the Eigen type. If + // the array is a vector, we attempt to fit it into either an Eigen 1xN or Nx1 vector + // (preferring the latter if it will fit in either, i.e. for a fully dynamic matrix type). + static EigenConformable conformable(const array &a) { + const auto dims = a.ndim(); + if (dims < 1 || dims > 2) + return false; + + if (dims == 2) { // Matrix type: require exact match (or dynamic) + + EigenIndex + np_rows = a.shape(0), + np_cols = a.shape(1), + np_rstride = a.strides(0) / static_cast(sizeof(Scalar)), + np_cstride = a.strides(1) / static_cast(sizeof(Scalar)); + if ((fixed_rows && np_rows != rows) || (fixed_cols && np_cols != cols)) + return false; + + return {np_rows, np_cols, np_rstride, np_cstride}; + } + + // Otherwise we're storing an n-vector. Only one of the strides will be used, but whichever + // is used, we want the (single) numpy stride value. + const EigenIndex n = a.shape(0), + stride = a.strides(0) / static_cast(sizeof(Scalar)); + + if (vector) { // Eigen type is a compile-time vector + if (fixed && size != n) + return false; // Vector size mismatch + return {rows == 1 ? 1 : n, cols == 1 ? 1 : n, stride}; + } + else if (fixed) { + // The type has a fixed size, but is not a vector: abort + return false; + } + else if (fixed_cols) { + // Since this isn't a vector, cols must be != 1. We allow this only if it exactly + // equals the number of elements (rows is Dynamic, and so 1 row is allowed). + if (cols != n) return false; + return {1, n, stride}; + } + else { + // Otherwise it's either fully dynamic, or column dynamic; both become a column vector + if (fixed_rows && rows != n) return false; + return {n, 1, stride}; + } + } + + static constexpr bool show_writeable = is_eigen_dense_map::value && is_eigen_mutable_map::value; + static constexpr bool show_order = is_eigen_dense_map::value; + static constexpr bool show_c_contiguous = show_order && requires_row_major; + static constexpr bool show_f_contiguous = !show_c_contiguous && show_order && requires_col_major; + + static constexpr auto descriptor = + _("numpy.ndarray[") + npy_format_descriptor::name + + _("[") + _(_<(size_t) rows>(), _("m")) + + _(", ") + _(_<(size_t) cols>(), _("n")) + + _("]") + + // For a reference type (e.g. Ref) we have other constraints that might need to be + // satisfied: writeable=True (for a mutable reference), and, depending on the map's stride + // options, possibly f_contiguous or c_contiguous. We include them in the descriptor output + // to provide some hint as to why a TypeError is occurring (otherwise it can be confusing to + // see that a function accepts a 'numpy.ndarray[float64[3,2]]' and an error message that you + // *gave* a numpy.ndarray of the right type and dimensions. + _(", flags.writeable", "") + + _(", flags.c_contiguous", "") + + _(", flags.f_contiguous", "") + + _("]"); +}; + +// Casts an Eigen type to numpy array. If given a base, the numpy array references the src data, +// otherwise it'll make a copy. writeable lets you turn off the writeable flag for the array. +template handle eigen_array_cast(typename props::Type const &src, handle base = handle(), bool writeable = true) { + constexpr ssize_t elem_size = sizeof(typename props::Scalar); + array a; + if (props::vector) + a = array({ src.size() }, { elem_size * src.innerStride() }, src.data(), base); + else + a = array({ src.rows(), src.cols() }, { elem_size * src.rowStride(), elem_size * src.colStride() }, + src.data(), base); + + if (!writeable) + array_proxy(a.ptr())->flags &= ~detail::npy_api::NPY_ARRAY_WRITEABLE_; + + return a.release(); +} + +// Takes an lvalue ref to some Eigen type and a (python) base object, creating a numpy array that +// reference the Eigen object's data with `base` as the python-registered base class (if omitted, +// the base will be set to None, and lifetime management is up to the caller). The numpy array is +// non-writeable if the given type is const. +template +handle eigen_ref_array(Type &src, handle parent = none()) { + // none here is to get past array's should-we-copy detection, which currently always + // copies when there is no base. Setting the base to None should be harmless. + return eigen_array_cast(src, parent, !std::is_const::value); +} + +// Takes a pointer to some dense, plain Eigen type, builds a capsule around it, then returns a numpy +// array that references the encapsulated data with a python-side reference to the capsule to tie +// its destruction to that of any dependent python objects. Const-ness is determined by whether or +// not the Type of the pointer given is const. +template ::value>> +handle eigen_encapsulate(Type *src) { + capsule base(src, [](void *o) { delete static_cast(o); }); + return eigen_ref_array(*src, base); +} + +// Type caster for regular, dense matrix types (e.g. MatrixXd), but not maps/refs/etc. of dense +// types. +template +struct type_caster::value>> { + using Scalar = typename Type::Scalar; + using props = EigenProps; + + bool load(handle src, bool convert) { + // If we're in no-convert mode, only load if given an array of the correct type + if (!convert && !isinstance>(src)) + return false; + + // Coerce into an array, but don't do type conversion yet; the copy below handles it. + auto buf = array::ensure(src); + + if (!buf) + return false; + + auto dims = buf.ndim(); + if (dims < 1 || dims > 2) + return false; + + auto fits = props::conformable(buf); + if (!fits) + return false; + + // Allocate the new type, then build a numpy reference into it + value = Type(fits.rows, fits.cols); + auto ref = reinterpret_steal(eigen_ref_array(value)); + if (dims == 1) ref = ref.squeeze(); + else if (ref.ndim() == 1) buf = buf.squeeze(); + + int result = detail::npy_api::get().PyArray_CopyInto_(ref.ptr(), buf.ptr()); + + if (result < 0) { // Copy failed! + PyErr_Clear(); + return false; + } + + return true; + } + +private: + + // Cast implementation + template + static handle cast_impl(CType *src, return_value_policy policy, handle parent) { + switch (policy) { + case return_value_policy::take_ownership: + case return_value_policy::automatic: + return eigen_encapsulate(src); + case return_value_policy::move: + return eigen_encapsulate(new CType(std::move(*src))); + case return_value_policy::copy: + return eigen_array_cast(*src); + case return_value_policy::reference: + case return_value_policy::automatic_reference: + return eigen_ref_array(*src); + case return_value_policy::reference_internal: + return eigen_ref_array(*src, parent); + default: + throw cast_error("unhandled return_value_policy: should not happen!"); + }; + } + +public: + + // Normal returned non-reference, non-const value: + static handle cast(Type &&src, return_value_policy /* policy */, handle parent) { + return cast_impl(&src, return_value_policy::move, parent); + } + // If you return a non-reference const, we mark the numpy array readonly: + static handle cast(const Type &&src, return_value_policy /* policy */, handle parent) { + return cast_impl(&src, return_value_policy::move, parent); + } + // lvalue reference return; default (automatic) becomes copy + static handle cast(Type &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast_impl(&src, policy, parent); + } + // const lvalue reference return; default (automatic) becomes copy + static handle cast(const Type &src, return_value_policy policy, handle parent) { + if (policy == return_value_policy::automatic || policy == return_value_policy::automatic_reference) + policy = return_value_policy::copy; + return cast(&src, policy, parent); + } + // non-const pointer return + static handle cast(Type *src, return_value_policy policy, handle parent) { + return cast_impl(src, policy, parent); + } + // const pointer return + static handle cast(const Type *src, return_value_policy policy, handle parent) { + return cast_impl(src, policy, parent); + } + + static constexpr auto name = props::descriptor; + + operator Type*() { return &value; } + operator Type&() { return value; } + operator Type&&() && { return std::move(value); } + template using cast_op_type = movable_cast_op_type; + +private: + Type value; +}; + +// Base class for casting reference/map/block/etc. objects back to python. +template struct eigen_map_caster { +private: + using props = EigenProps; + +public: + + // Directly referencing a ref/map's data is a bit dangerous (whatever the map/ref points to has + // to stay around), but we'll allow it under the assumption that you know what you're doing (and + // have an appropriate keep_alive in place). We return a numpy array pointing directly at the + // ref's data (The numpy array ends up read-only if the ref was to a const matrix type.) Note + // that this means you need to ensure you don't destroy the object in some other way (e.g. with + // an appropriate keep_alive, or with a reference to a statically allocated matrix). + static handle cast(const MapType &src, return_value_policy policy, handle parent) { + switch (policy) { + case return_value_policy::copy: + return eigen_array_cast(src); + case return_value_policy::reference_internal: + return eigen_array_cast(src, parent, is_eigen_mutable_map::value); + case return_value_policy::reference: + case return_value_policy::automatic: + case return_value_policy::automatic_reference: + return eigen_array_cast(src, none(), is_eigen_mutable_map::value); + default: + // move, take_ownership don't make any sense for a ref/map: + pybind11_fail("Invalid return_value_policy for Eigen Map/Ref/Block type"); + } + } + + static constexpr auto name = props::descriptor; + + // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return + // types but not bound arguments). We still provide them (with an explicitly delete) so that + // you end up here if you try anyway. + bool load(handle, bool) = delete; + operator MapType() = delete; + template using cast_op_type = MapType; +}; + +// We can return any map-like object (but can only load Refs, specialized next): +template struct type_caster::value>> + : eigen_map_caster {}; + +// Loader for Ref<...> arguments. See the documentation for info on how to make this work without +// copying (it requires some extra effort in many cases). +template +struct type_caster< + Eigen::Ref, + enable_if_t>::value> +> : public eigen_map_caster> { +private: + using Type = Eigen::Ref; + using props = EigenProps; + using Scalar = typename props::Scalar; + using MapType = Eigen::Map; + using Array = array_t; + static constexpr bool need_writeable = is_eigen_mutable_map::value; + // Delay construction (these have no default constructor) + std::unique_ptr map; + std::unique_ptr ref; + // Our array. When possible, this is just a numpy array pointing to the source data, but + // sometimes we can't avoid copying (e.g. input is not a numpy array at all, has an incompatible + // layout, or is an array of a type that needs to be converted). Using a numpy temporary + // (rather than an Eigen temporary) saves an extra copy when we need both type conversion and + // storage order conversion. (Note that we refuse to use this temporary copy when loading an + // argument for a Ref with M non-const, i.e. a read-write reference). + Array copy_or_ref; +public: + bool load(handle src, bool convert) { + // First check whether what we have is already an array of the right type. If not, we can't + // avoid a copy (because the copy is also going to do type conversion). + bool need_copy = !isinstance(src); + + EigenConformable fits; + if (!need_copy) { + // We don't need a converting copy, but we also need to check whether the strides are + // compatible with the Ref's stride requirements + Array aref = reinterpret_borrow(src); + + if (aref && (!need_writeable || aref.writeable())) { + fits = props::conformable(aref); + if (!fits) return false; // Incompatible dimensions + if (!fits.template stride_compatible()) + need_copy = true; + else + copy_or_ref = std::move(aref); + } + else { + need_copy = true; + } + } + + if (need_copy) { + // We need to copy: If we need a mutable reference, or we're not supposed to convert + // (either because we're in the no-convert overload pass, or because we're explicitly + // instructed not to copy (via `py::arg().noconvert()`) we have to fail loading. + if (!convert || need_writeable) return false; + + Array copy = Array::ensure(src); + if (!copy) return false; + fits = props::conformable(copy); + if (!fits || !fits.template stride_compatible()) + return false; + copy_or_ref = std::move(copy); + loader_life_support::add_patient(copy_or_ref); + } + + ref.reset(); + map.reset(new MapType(data(copy_or_ref), fits.rows, fits.cols, make_stride(fits.stride.outer(), fits.stride.inner()))); + ref.reset(new Type(*map)); + + return true; + } + + operator Type*() { return ref.get(); } + operator Type&() { return *ref; } + template using cast_op_type = pybind11::detail::cast_op_type<_T>; + +private: + template ::value, int> = 0> + Scalar *data(Array &a) { return a.mutable_data(); } + + template ::value, int> = 0> + const Scalar *data(Array &a) { return a.data(); } + + // Attempt to figure out a constructor of `Stride` that will work. + // If both strides are fixed, use a default constructor: + template using stride_ctor_default = bool_constant< + S::InnerStrideAtCompileTime != Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic && + std::is_default_constructible::value>; + // Otherwise, if there is a two-index constructor, assume it is (outer,inner) like + // Eigen::Stride, and use it: + template using stride_ctor_dual = bool_constant< + !stride_ctor_default::value && std::is_constructible::value>; + // Otherwise, if there is a one-index constructor, and just one of the strides is dynamic, use + // it (passing whichever stride is dynamic). + template using stride_ctor_outer = bool_constant< + !any_of, stride_ctor_dual>::value && + S::OuterStrideAtCompileTime == Eigen::Dynamic && S::InnerStrideAtCompileTime != Eigen::Dynamic && + std::is_constructible::value>; + template using stride_ctor_inner = bool_constant< + !any_of, stride_ctor_dual>::value && + S::InnerStrideAtCompileTime == Eigen::Dynamic && S::OuterStrideAtCompileTime != Eigen::Dynamic && + std::is_constructible::value>; + + template ::value, int> = 0> + static S make_stride(EigenIndex, EigenIndex) { return S(); } + template ::value, int> = 0> + static S make_stride(EigenIndex outer, EigenIndex inner) { return S(outer, inner); } + template ::value, int> = 0> + static S make_stride(EigenIndex outer, EigenIndex) { return S(outer); } + template ::value, int> = 0> + static S make_stride(EigenIndex, EigenIndex inner) { return S(inner); } + +}; + +// type_caster for special matrix types (e.g. DiagonalMatrix), which are EigenBase, but not +// EigenDense (i.e. they don't have a data(), at least not with the usual matrix layout). +// load() is not supported, but we can cast them into the python domain by first copying to a +// regular Eigen::Matrix, then casting that. +template +struct type_caster::value>> { +protected: + using Matrix = Eigen::Matrix; + using props = EigenProps; +public: + static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) { + handle h = eigen_encapsulate(new Matrix(src)); + return h; + } + static handle cast(const Type *src, return_value_policy policy, handle parent) { return cast(*src, policy, parent); } + + static constexpr auto name = props::descriptor; + + // Explicitly delete these: support python -> C++ conversion on these (i.e. these can be return + // types but not bound arguments). We still provide them (with an explicitly delete) so that + // you end up here if you try anyway. + bool load(handle, bool) = delete; + operator Type() = delete; + template using cast_op_type = Type; +}; + +template +struct type_caster::value>> { + typedef typename Type::Scalar Scalar; + typedef remove_reference_t().outerIndexPtr())> StorageIndex; + typedef typename Type::Index Index; + static constexpr bool rowMajor = Type::IsRowMajor; + + bool load(handle src, bool) { + if (!src) + return false; + + auto obj = reinterpret_borrow(src); + object sparse_module = module::import("scipy.sparse"); + object matrix_type = sparse_module.attr( + rowMajor ? "csr_matrix" : "csc_matrix"); + + if (!obj.get_type().is(matrix_type)) { + try { + obj = matrix_type(obj); + } catch (const error_already_set &) { + return false; + } + } + + auto values = array_t((object) obj.attr("data")); + auto innerIndices = array_t((object) obj.attr("indices")); + auto outerIndices = array_t((object) obj.attr("indptr")); + auto shape = pybind11::tuple((pybind11::object) obj.attr("shape")); + auto nnz = obj.attr("nnz").cast(); + + if (!values || !innerIndices || !outerIndices) + return false; + + value = Eigen::MappedSparseMatrix( + shape[0].cast(), shape[1].cast(), nnz, + outerIndices.mutable_data(), innerIndices.mutable_data(), values.mutable_data()); + + return true; + } + + static handle cast(const Type &src, return_value_policy /* policy */, handle /* parent */) { + const_cast(src).makeCompressed(); + + object matrix_type = module::import("scipy.sparse").attr( + rowMajor ? "csr_matrix" : "csc_matrix"); + + array data(src.nonZeros(), src.valuePtr()); + array outerIndices((rowMajor ? src.rows() : src.cols()) + 1, src.outerIndexPtr()); + array innerIndices(src.nonZeros(), src.innerIndexPtr()); + + return matrix_type( + std::make_tuple(data, innerIndices, outerIndices), + std::make_pair(src.rows(), src.cols()) + ).release(); + } + + PYBIND11_TYPE_CASTER(Type, _<(Type::IsRowMajor) != 0>("scipy.sparse.csr_matrix[", "scipy.sparse.csc_matrix[") + + npy_format_descriptor::name + _("]")); +}; + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(__GNUG__) || defined(__clang__) +# pragma GCC diagnostic pop +#elif defined(_MSC_VER) +# pragma warning(pop) +#endif diff --git a/thirdparty/pybind11/include/pybind11/embed.h b/thirdparty/pybind11/include/pybind11/embed.h new file mode 100644 index 000000000..f814c783e --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/embed.h @@ -0,0 +1,202 @@ +/* + pybind11/embed.h: Support for embedding the interpreter + + Copyright (c) 2017 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include "eval.h" + +#if defined(PYPY_VERSION) +# error Embedding the interpreter is not supported with PyPy +#endif + +#if PY_MAJOR_VERSION >= 3 +# define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + extern "C" PyObject *pybind11_init_impl_##name(); \ + extern "C" PyObject *pybind11_init_impl_##name() { \ + return pybind11_init_wrapper_##name(); \ + } +#else +# define PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + extern "C" void pybind11_init_impl_##name(); \ + extern "C" void pybind11_init_impl_##name() { \ + pybind11_init_wrapper_##name(); \ + } +#endif + +/** \rst + Add a new module to the table of builtins for the interpreter. Must be + defined in global scope. The first macro parameter is the name of the + module (without quotes). The second parameter is the variable which will + be used as the interface to add functions and classes to the module. + + .. code-block:: cpp + + PYBIND11_EMBEDDED_MODULE(example, m) { + // ... initialize functions and classes here + m.def("foo", []() { + return "Hello, World!"; + }); + } + \endrst */ +#define PYBIND11_EMBEDDED_MODULE(name, variable) \ + static void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &); \ + static PyObject PYBIND11_CONCAT(*pybind11_init_wrapper_, name)() { \ + auto m = pybind11::module(PYBIND11_TOSTRING(name)); \ + try { \ + PYBIND11_CONCAT(pybind11_init_, name)(m); \ + return m.ptr(); \ + } catch (pybind11::error_already_set &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } catch (const std::exception &e) { \ + PyErr_SetString(PyExc_ImportError, e.what()); \ + return nullptr; \ + } \ + } \ + PYBIND11_EMBEDDED_MODULE_IMPL(name) \ + pybind11::detail::embedded_module name(PYBIND11_TOSTRING(name), \ + PYBIND11_CONCAT(pybind11_init_impl_, name)); \ + void PYBIND11_CONCAT(pybind11_init_, name)(pybind11::module &variable) + + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +/// Python 2.7/3.x compatible version of `PyImport_AppendInittab` and error checks. +struct embedded_module { +#if PY_MAJOR_VERSION >= 3 + using init_t = PyObject *(*)(); +#else + using init_t = void (*)(); +#endif + embedded_module(const char *name, init_t init) { + if (Py_IsInitialized()) + pybind11_fail("Can't add new modules after the interpreter has been initialized"); + + auto result = PyImport_AppendInittab(name, init); + if (result == -1) + pybind11_fail("Insufficient memory to add a new module"); + } +}; + +NAMESPACE_END(detail) + +/** \rst + Initialize the Python interpreter. No other pybind11 or CPython API functions can be + called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The + optional parameter can be used to skip the registration of signal handlers (see the + `Python documentation`_ for details). Calling this function again after the interpreter + has already been initialized is a fatal error. + + If initializing the Python interpreter fails, then the program is terminated. (This + is controlled by the CPython runtime and is an exception to pybind11's normal behavior + of throwing exceptions on errors.) + + .. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx + \endrst */ +inline void initialize_interpreter(bool init_signal_handlers = true) { + if (Py_IsInitialized()) + pybind11_fail("The interpreter is already running"); + + Py_InitializeEx(init_signal_handlers ? 1 : 0); + + // Make .py files in the working directory available by default + module::import("sys").attr("path").cast().append("."); +} + +/** \rst + Shut down the Python interpreter. No pybind11 or CPython API functions can be called + after this. In addition, pybind11 objects must not outlive the interpreter: + + .. code-block:: cpp + + { // BAD + py::initialize_interpreter(); + auto hello = py::str("Hello, World!"); + py::finalize_interpreter(); + } // <-- BOOM, hello's destructor is called after interpreter shutdown + + { // GOOD + py::initialize_interpreter(); + { // scoped + auto hello = py::str("Hello, World!"); + } // <-- OK, hello is cleaned up properly + py::finalize_interpreter(); + } + + { // BETTER + py::scoped_interpreter guard{}; + auto hello = py::str("Hello, World!"); + } + + .. warning:: + + The interpreter can be restarted by calling `initialize_interpreter` again. + Modules created using pybind11 can be safely re-initialized. However, Python + itself cannot completely unload binary extension modules and there are several + caveats with regard to interpreter restarting. All the details can be found + in the CPython documentation. In short, not all interpreter memory may be + freed, either due to reference cycles or user-created global data. + + \endrst */ +inline void finalize_interpreter() { + handle builtins(PyEval_GetBuiltins()); + const char *id = PYBIND11_INTERNALS_ID; + + // Get the internals pointer (without creating it if it doesn't exist). It's possible for the + // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()` + // during destruction), so we get the pointer-pointer here and check it after Py_Finalize(). + detail::internals **internals_ptr_ptr = detail::get_internals_pp(); + // It could also be stashed in builtins, so look there too: + if (builtins.contains(id) && isinstance(builtins[id])) + internals_ptr_ptr = capsule(builtins[id]); + + Py_Finalize(); + + if (internals_ptr_ptr) { + delete *internals_ptr_ptr; + *internals_ptr_ptr = nullptr; + } +} + +/** \rst + Scope guard version of `initialize_interpreter` and `finalize_interpreter`. + This a move-only guard and only a single instance can exist. + + .. code-block:: cpp + + #include + + int main() { + py::scoped_interpreter guard{}; + py::print(Hello, World!); + } // <-- interpreter shutdown + \endrst */ +class scoped_interpreter { +public: + scoped_interpreter(bool init_signal_handlers = true) { + initialize_interpreter(init_signal_handlers); + } + + scoped_interpreter(const scoped_interpreter &) = delete; + scoped_interpreter(scoped_interpreter &&other) noexcept { other.is_valid = false; } + scoped_interpreter &operator=(const scoped_interpreter &) = delete; + scoped_interpreter &operator=(scoped_interpreter &&) = delete; + + ~scoped_interpreter() { + if (is_valid) + finalize_interpreter(); + } + +private: + bool is_valid = true; +}; + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/eval.h b/thirdparty/pybind11/include/pybind11/eval.h new file mode 100644 index 000000000..ea85ba1db --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/eval.h @@ -0,0 +1,117 @@ +/* + pybind11/exec.h: Support for evaluating Python expressions and statements + from strings and files + + Copyright (c) 2016 Klemens Morgenstern and + Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +enum eval_mode { + /// Evaluate a string containing an isolated expression + eval_expr, + + /// Evaluate a string containing a single statement. Returns \c none + eval_single_statement, + + /// Evaluate a string containing a sequence of statement. Returns \c none + eval_statements +}; + +template +object eval(str expr, object global = globals(), object local = object()) { + if (!local) + local = global; + + /* PyRun_String does not accept a PyObject / encoding specifier, + this seems to be the only alternative */ + std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr; + + int start; + switch (mode) { + case eval_expr: start = Py_eval_input; break; + case eval_single_statement: start = Py_single_input; break; + case eval_statements: start = Py_file_input; break; + default: pybind11_fail("invalid evaluation mode"); + } + + PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr()); + if (!result) + throw error_already_set(); + return reinterpret_steal(result); +} + +template +object eval(const char (&s)[N], object global = globals(), object local = object()) { + /* Support raw string literals by removing common leading whitespace */ + auto expr = (s[0] == '\n') ? str(module::import("textwrap").attr("dedent")(s)) + : str(s); + return eval(expr, global, local); +} + +inline void exec(str expr, object global = globals(), object local = object()) { + eval(expr, global, local); +} + +template +void exec(const char (&s)[N], object global = globals(), object local = object()) { + eval(s, global, local); +} + +template +object eval_file(str fname, object global = globals(), object local = object()) { + if (!local) + local = global; + + int start; + switch (mode) { + case eval_expr: start = Py_eval_input; break; + case eval_single_statement: start = Py_single_input; break; + case eval_statements: start = Py_file_input; break; + default: pybind11_fail("invalid evaluation mode"); + } + + int closeFile = 1; + std::string fname_str = (std::string) fname; +#if PY_VERSION_HEX >= 0x03040000 + FILE *f = _Py_fopen_obj(fname.ptr(), "r"); +#elif PY_VERSION_HEX >= 0x03000000 + FILE *f = _Py_fopen(fname.ptr(), "r"); +#else + /* No unicode support in open() :( */ + auto fobj = reinterpret_steal(PyFile_FromString( + const_cast(fname_str.c_str()), + const_cast("r"))); + FILE *f = nullptr; + if (fobj) + f = PyFile_AsFile(fobj.ptr()); + closeFile = 0; +#endif + if (!f) { + PyErr_Clear(); + pybind11_fail("File \"" + fname_str + "\" could not be opened!"); + } + +#if PY_VERSION_HEX < 0x03000000 && defined(PYPY_VERSION) + PyObject *result = PyRun_File(f, fname_str.c_str(), start, global.ptr(), + local.ptr()); + (void) closeFile; +#else + PyObject *result = PyRun_FileEx(f, fname_str.c_str(), start, global.ptr(), + local.ptr(), closeFile); +#endif + + if (!result) + throw error_already_set(); + return reinterpret_steal(result); +} + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/functional.h b/thirdparty/pybind11/include/pybind11/functional.h new file mode 100644 index 000000000..f8bda6483 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/functional.h @@ -0,0 +1,101 @@ +/* + pybind11/functional.h: std::function<> support + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +template +struct type_caster> { + using type = std::function; + using retval_type = conditional_t::value, void_type, Return>; + using function_type = Return (*) (Args...); + +public: + bool load(handle src, bool convert) { + if (src.is_none()) { + // Defer accepting None to other overloads (if we aren't in convert mode): + if (!convert) return false; + return true; + } + + if (!isinstance(src)) + return false; + + auto func = reinterpret_borrow(src); + + /* + When passing a C++ function as an argument to another C++ + function via Python, every function call would normally involve + a full C++ -> Python -> C++ roundtrip, which can be prohibitive. + Here, we try to at least detect the case where the function is + stateless (i.e. function pointer or lambda function without + captured variables), in which case the roundtrip can be avoided. + */ + if (auto cfunc = func.cpp_function()) { + auto c = reinterpret_borrow(PyCFunction_GET_SELF(cfunc.ptr())); + auto rec = (function_record *) c; + + if (rec && rec->is_stateless && + same_type(typeid(function_type), *reinterpret_cast(rec->data[1]))) { + struct capture { function_type f; }; + value = ((capture *) &rec->data)->f; + return true; + } + } + + // ensure GIL is held during functor destruction + struct func_handle { + function f; + func_handle(function&& f_) : f(std::move(f_)) {} + func_handle(const func_handle&) = default; + ~func_handle() { + gil_scoped_acquire acq; + function kill_f(std::move(f)); + } + }; + + // to emulate 'move initialization capture' in C++11 + struct func_wrapper { + func_handle hfunc; + func_wrapper(func_handle&& hf): hfunc(std::move(hf)) {} + Return operator()(Args... args) const { + gil_scoped_acquire acq; + object retval(hfunc.f(std::forward(args)...)); + /* Visual studio 2015 parser issue: need parentheses around this expression */ + return (retval.template cast()); + } + }; + + value = func_wrapper(func_handle(std::move(func))); + return true; + } + + template + static handle cast(Func &&f_, return_value_policy policy, handle /* parent */) { + if (!f_) + return none().inc_ref(); + + auto result = f_.template target(); + if (result) + return cpp_function(*result, policy).release(); + else + return cpp_function(std::forward(f_), policy).release(); + } + + PYBIND11_TYPE_CASTER(type, _("Callable[[") + concat(make_caster::name...) + _("], ") + + make_caster::name + _("]")); +}; + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/iostream.h b/thirdparty/pybind11/include/pybind11/iostream.h new file mode 100644 index 000000000..c43b7c93a --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/iostream.h @@ -0,0 +1,209 @@ +/* + pybind11/iostream.h -- Tools to assist with redirecting cout and cerr to Python + + Copyright (c) 2017 Henry F. Schreiner + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" + +#include +#include +#include +#include +#include + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +// Buffer that writes to Python instead of C++ +class pythonbuf : public std::streambuf { +private: + using traits_type = std::streambuf::traits_type; + + const size_t buf_size; + std::unique_ptr d_buffer; + object pywrite; + object pyflush; + + int overflow(int c) { + if (!traits_type::eq_int_type(c, traits_type::eof())) { + *pptr() = traits_type::to_char_type(c); + pbump(1); + } + return sync() == 0 ? traits_type::not_eof(c) : traits_type::eof(); + } + + int sync() { + if (pbase() != pptr()) { + // This subtraction cannot be negative, so dropping the sign + str line(pbase(), static_cast(pptr() - pbase())); + + { + gil_scoped_acquire tmp; + pywrite(line); + pyflush(); + } + + setp(pbase(), epptr()); + } + return 0; + } + +public: + + pythonbuf(object pyostream, size_t buffer_size = 1024) + : buf_size(buffer_size), + d_buffer(new char[buf_size]), + pywrite(pyostream.attr("write")), + pyflush(pyostream.attr("flush")) { + setp(d_buffer.get(), d_buffer.get() + buf_size - 1); + } + + pythonbuf(pythonbuf&&) = default; + + /// Sync before destroy + ~pythonbuf() { + sync(); + } +}; + +NAMESPACE_END(detail) + + +/** \rst + This a move-only guard that redirects output. + + .. code-block:: cpp + + #include + + ... + + { + py::scoped_ostream_redirect output; + std::cout << "Hello, World!"; // Python stdout + } // <-- return std::cout to normal + + You can explicitly pass the c++ stream and the python object, + for example to guard stderr instead. + + .. code-block:: cpp + + { + py::scoped_ostream_redirect output{std::cerr, py::module::import("sys").attr("stderr")}; + std::cerr << "Hello, World!"; + } + \endrst */ +class scoped_ostream_redirect { +protected: + std::streambuf *old; + std::ostream &costream; + detail::pythonbuf buffer; + +public: + scoped_ostream_redirect( + std::ostream &costream = std::cout, + object pyostream = module::import("sys").attr("stdout")) + : costream(costream), buffer(pyostream) { + old = costream.rdbuf(&buffer); + } + + ~scoped_ostream_redirect() { + costream.rdbuf(old); + } + + scoped_ostream_redirect(const scoped_ostream_redirect &) = delete; + scoped_ostream_redirect(scoped_ostream_redirect &&other) = default; + scoped_ostream_redirect &operator=(const scoped_ostream_redirect &) = delete; + scoped_ostream_redirect &operator=(scoped_ostream_redirect &&) = delete; +}; + + +/** \rst + Like `scoped_ostream_redirect`, but redirects cerr by default. This class + is provided primary to make ``py::call_guard`` easier to make. + + .. code-block:: cpp + + m.def("noisy_func", &noisy_func, + py::call_guard()); + +\endrst */ +class scoped_estream_redirect : public scoped_ostream_redirect { +public: + scoped_estream_redirect( + std::ostream &costream = std::cerr, + object pyostream = module::import("sys").attr("stderr")) + : scoped_ostream_redirect(costream,pyostream) {} +}; + + +NAMESPACE_BEGIN(detail) + +// Class to redirect output as a context manager. C++ backend. +class OstreamRedirect { + bool do_stdout_; + bool do_stderr_; + std::unique_ptr redirect_stdout; + std::unique_ptr redirect_stderr; + +public: + OstreamRedirect(bool do_stdout = true, bool do_stderr = true) + : do_stdout_(do_stdout), do_stderr_(do_stderr) {} + + void enter() { + if (do_stdout_) + redirect_stdout.reset(new scoped_ostream_redirect()); + if (do_stderr_) + redirect_stderr.reset(new scoped_estream_redirect()); + } + + void exit() { + redirect_stdout.reset(); + redirect_stderr.reset(); + } +}; + +NAMESPACE_END(detail) + +/** \rst + This is a helper function to add a C++ redirect context manager to Python + instead of using a C++ guard. To use it, add the following to your binding code: + + .. code-block:: cpp + + #include + + ... + + py::add_ostream_redirect(m, "ostream_redirect"); + + You now have a Python context manager that redirects your output: + + .. code-block:: python + + with m.ostream_redirect(): + m.print_to_cout_function() + + This manager can optionally be told which streams to operate on: + + .. code-block:: python + + with m.ostream_redirect(stdout=true, stderr=true): + m.noisy_function_with_error_printing() + + \endrst */ +inline class_ add_ostream_redirect(module m, std::string name = "ostream_redirect") { + return class_(m, name.c_str(), module_local()) + .def(init(), arg("stdout")=true, arg("stderr")=true) + .def("__enter__", &detail::OstreamRedirect::enter) + .def("__exit__", [](detail::OstreamRedirect &self_, args) { self_.exit(); }); +} + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/numpy.h b/thirdparty/pybind11/include/pybind11/numpy.h new file mode 100644 index 000000000..a67452105 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/numpy.h @@ -0,0 +1,1643 @@ +/* + pybind11/numpy.h: Basic NumPy support, vectorize() wrapper + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include "complex.h" +#include "detail/common.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4127) // warning C4127: Conditional expression is constant +#endif + +/* This will be true on all flat address space platforms and allows us to reduce the + whole npy_intp / ssize_t / Py_intptr_t business down to just ssize_t for all size + and dimension types (e.g. shape, strides, indexing), instead of inflicting this + upon the library user. */ +static_assert(sizeof(ssize_t) == sizeof(Py_intptr_t), "ssize_t != Py_intptr_t"); + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +class array; // Forward declaration + +NAMESPACE_BEGIN(detail) +template struct npy_format_descriptor; + +struct PyArrayDescr_Proxy { + PyObject_HEAD + PyObject *typeobj; + char kind; + char type; + char byteorder; + char flags; + int type_num; + int elsize; + int alignment; + char *subarray; + PyObject *fields; + PyObject *names; +}; + +struct PyArray_Proxy { + PyObject_HEAD + char *data; + int nd; + ssize_t *dimensions; + ssize_t *strides; + PyObject *base; + PyObject *descr; + int flags; +}; + +struct PyVoidScalarObject_Proxy { + PyObject_VAR_HEAD + char *obval; + PyArrayDescr_Proxy *descr; + int flags; + PyObject *base; +}; + +struct numpy_type_info { + PyObject* dtype_ptr; + std::string format_str; +}; + +struct numpy_internals { + std::unordered_map registered_dtypes; + + numpy_type_info *get_type_info(const std::type_info& tinfo, bool throw_if_missing = true) { + auto it = registered_dtypes.find(std::type_index(tinfo)); + if (it != registered_dtypes.end()) + return &(it->second); + if (throw_if_missing) + pybind11_fail(std::string("NumPy type info missing for ") + tinfo.name()); + return nullptr; + } + + template numpy_type_info *get_type_info(bool throw_if_missing = true) { + return get_type_info(typeid(typename std::remove_cv::type), throw_if_missing); + } +}; + +inline PYBIND11_NOINLINE void load_numpy_internals(numpy_internals* &ptr) { + ptr = &get_or_create_shared_data("_numpy_internals"); +} + +inline numpy_internals& get_numpy_internals() { + static numpy_internals* ptr = nullptr; + if (!ptr) + load_numpy_internals(ptr); + return *ptr; +} + +template struct same_size { + template using as = bool_constant; +}; + +template constexpr int platform_lookup() { return -1; } + +// Lookup a type according to its size, and return a value corresponding to the NumPy typenum. +template +constexpr int platform_lookup(int I, Ints... Is) { + return sizeof(Concrete) == sizeof(T) ? I : platform_lookup(Is...); +} + +struct npy_api { + enum constants { + NPY_ARRAY_C_CONTIGUOUS_ = 0x0001, + NPY_ARRAY_F_CONTIGUOUS_ = 0x0002, + NPY_ARRAY_OWNDATA_ = 0x0004, + NPY_ARRAY_FORCECAST_ = 0x0010, + NPY_ARRAY_ENSUREARRAY_ = 0x0040, + NPY_ARRAY_ALIGNED_ = 0x0100, + NPY_ARRAY_WRITEABLE_ = 0x0400, + NPY_BOOL_ = 0, + NPY_BYTE_, NPY_UBYTE_, + NPY_SHORT_, NPY_USHORT_, + NPY_INT_, NPY_UINT_, + NPY_LONG_, NPY_ULONG_, + NPY_LONGLONG_, NPY_ULONGLONG_, + NPY_FLOAT_, NPY_DOUBLE_, NPY_LONGDOUBLE_, + NPY_CFLOAT_, NPY_CDOUBLE_, NPY_CLONGDOUBLE_, + NPY_OBJECT_ = 17, + NPY_STRING_, NPY_UNICODE_, NPY_VOID_, + // Platform-dependent normalization + NPY_INT8_ = NPY_BYTE_, + NPY_UINT8_ = NPY_UBYTE_, + NPY_INT16_ = NPY_SHORT_, + NPY_UINT16_ = NPY_USHORT_, + // `npy_common.h` defines the integer aliases. In order, it checks: + // NPY_BITSOF_LONG, NPY_BITSOF_LONGLONG, NPY_BITSOF_INT, NPY_BITSOF_SHORT, NPY_BITSOF_CHAR + // and assigns the alias to the first matching size, so we should check in this order. + NPY_INT32_ = platform_lookup( + NPY_LONG_, NPY_INT_, NPY_SHORT_), + NPY_UINT32_ = platform_lookup( + NPY_ULONG_, NPY_UINT_, NPY_USHORT_), + NPY_INT64_ = platform_lookup( + NPY_LONG_, NPY_LONGLONG_, NPY_INT_), + NPY_UINT64_ = platform_lookup( + NPY_ULONG_, NPY_ULONGLONG_, NPY_UINT_), + }; + + typedef struct { + Py_intptr_t *ptr; + int len; + } PyArray_Dims; + + static npy_api& get() { + static npy_api api = lookup(); + return api; + } + + bool PyArray_Check_(PyObject *obj) const { + return (bool) PyObject_TypeCheck(obj, PyArray_Type_); + } + bool PyArrayDescr_Check_(PyObject *obj) const { + return (bool) PyObject_TypeCheck(obj, PyArrayDescr_Type_); + } + + unsigned int (*PyArray_GetNDArrayCFeatureVersion_)(); + PyObject *(*PyArray_DescrFromType_)(int); + PyObject *(*PyArray_NewFromDescr_) + (PyTypeObject *, PyObject *, int, Py_intptr_t *, + Py_intptr_t *, void *, int, PyObject *); + PyObject *(*PyArray_DescrNewFromType_)(int); + int (*PyArray_CopyInto_)(PyObject *, PyObject *); + PyObject *(*PyArray_NewCopy_)(PyObject *, int); + PyTypeObject *PyArray_Type_; + PyTypeObject *PyVoidArrType_Type_; + PyTypeObject *PyArrayDescr_Type_; + PyObject *(*PyArray_DescrFromScalar_)(PyObject *); + PyObject *(*PyArray_FromAny_) (PyObject *, PyObject *, int, int, int, PyObject *); + int (*PyArray_DescrConverter_) (PyObject *, PyObject **); + bool (*PyArray_EquivTypes_) (PyObject *, PyObject *); + int (*PyArray_GetArrayParamsFromObject_)(PyObject *, PyObject *, char, PyObject **, int *, + Py_ssize_t *, PyObject **, PyObject *); + PyObject *(*PyArray_Squeeze_)(PyObject *); + int (*PyArray_SetBaseObject_)(PyObject *, PyObject *); + PyObject* (*PyArray_Resize_)(PyObject*, PyArray_Dims*, int, int); +private: + enum functions { + API_PyArray_GetNDArrayCFeatureVersion = 211, + API_PyArray_Type = 2, + API_PyArrayDescr_Type = 3, + API_PyVoidArrType_Type = 39, + API_PyArray_DescrFromType = 45, + API_PyArray_DescrFromScalar = 57, + API_PyArray_FromAny = 69, + API_PyArray_Resize = 80, + API_PyArray_CopyInto = 82, + API_PyArray_NewCopy = 85, + API_PyArray_NewFromDescr = 94, + API_PyArray_DescrNewFromType = 9, + API_PyArray_DescrConverter = 174, + API_PyArray_EquivTypes = 182, + API_PyArray_GetArrayParamsFromObject = 278, + API_PyArray_Squeeze = 136, + API_PyArray_SetBaseObject = 282 + }; + + static npy_api lookup() { + module m = module::import("numpy.core.multiarray"); + auto c = m.attr("_ARRAY_API"); +#if PY_MAJOR_VERSION >= 3 + void **api_ptr = (void **) PyCapsule_GetPointer(c.ptr(), NULL); +#else + void **api_ptr = (void **) PyCObject_AsVoidPtr(c.ptr()); +#endif + npy_api api; +#define DECL_NPY_API(Func) api.Func##_ = (decltype(api.Func##_)) api_ptr[API_##Func]; + DECL_NPY_API(PyArray_GetNDArrayCFeatureVersion); + if (api.PyArray_GetNDArrayCFeatureVersion_() < 0x7) + pybind11_fail("pybind11 numpy support requires numpy >= 1.7.0"); + DECL_NPY_API(PyArray_Type); + DECL_NPY_API(PyVoidArrType_Type); + DECL_NPY_API(PyArrayDescr_Type); + DECL_NPY_API(PyArray_DescrFromType); + DECL_NPY_API(PyArray_DescrFromScalar); + DECL_NPY_API(PyArray_FromAny); + DECL_NPY_API(PyArray_Resize); + DECL_NPY_API(PyArray_CopyInto); + DECL_NPY_API(PyArray_NewCopy); + DECL_NPY_API(PyArray_NewFromDescr); + DECL_NPY_API(PyArray_DescrNewFromType); + DECL_NPY_API(PyArray_DescrConverter); + DECL_NPY_API(PyArray_EquivTypes); + DECL_NPY_API(PyArray_GetArrayParamsFromObject); + DECL_NPY_API(PyArray_Squeeze); + DECL_NPY_API(PyArray_SetBaseObject); +#undef DECL_NPY_API + return api; + } +}; + +inline PyArray_Proxy* array_proxy(void* ptr) { + return reinterpret_cast(ptr); +} + +inline const PyArray_Proxy* array_proxy(const void* ptr) { + return reinterpret_cast(ptr); +} + +inline PyArrayDescr_Proxy* array_descriptor_proxy(PyObject* ptr) { + return reinterpret_cast(ptr); +} + +inline const PyArrayDescr_Proxy* array_descriptor_proxy(const PyObject* ptr) { + return reinterpret_cast(ptr); +} + +inline bool check_flags(const void* ptr, int flag) { + return (flag == (array_proxy(ptr)->flags & flag)); +} + +template struct is_std_array : std::false_type { }; +template struct is_std_array> : std::true_type { }; +template struct is_complex : std::false_type { }; +template struct is_complex> : std::true_type { }; + +template struct array_info_scalar { + typedef T type; + static constexpr bool is_array = false; + static constexpr bool is_empty = false; + static constexpr auto extents = _(""); + static void append_extents(list& /* shape */) { } +}; +// Computes underlying type and a comma-separated list of extents for array +// types (any mix of std::array and built-in arrays). An array of char is +// treated as scalar because it gets special handling. +template struct array_info : array_info_scalar { }; +template struct array_info> { + using type = typename array_info::type; + static constexpr bool is_array = true; + static constexpr bool is_empty = (N == 0) || array_info::is_empty; + static constexpr size_t extent = N; + + // appends the extents to shape + static void append_extents(list& shape) { + shape.append(N); + array_info::append_extents(shape); + } + + static constexpr auto extents = _::is_array>( + concat(_(), array_info::extents), _() + ); +}; +// For numpy we have special handling for arrays of characters, so we don't include +// the size in the array extents. +template struct array_info : array_info_scalar { }; +template struct array_info> : array_info_scalar> { }; +template struct array_info : array_info> { }; +template using remove_all_extents_t = typename array_info::type; + +template using is_pod_struct = all_of< + std::is_standard_layout, // since we're accessing directly in memory we need a standard layout type +#if !defined(__GNUG__) || defined(_LIBCPP_VERSION) || defined(_GLIBCXX_USE_CXX11_ABI) + // _GLIBCXX_USE_CXX11_ABI indicates that we're using libstdc++ from GCC 5 or newer, independent + // of the actual compiler (Clang can also use libstdc++, but it always defines __GNUC__ == 4). + std::is_trivially_copyable, +#else + // GCC 4 doesn't implement is_trivially_copyable, so approximate it + std::is_trivially_destructible, + satisfies_any_of, +#endif + satisfies_none_of +>; + +template ssize_t byte_offset_unsafe(const Strides &) { return 0; } +template +ssize_t byte_offset_unsafe(const Strides &strides, ssize_t i, Ix... index) { + return i * strides[Dim] + byte_offset_unsafe(strides, index...); +} + +/** + * Proxy class providing unsafe, unchecked const access to array data. This is constructed through + * the `unchecked()` method of `array` or the `unchecked()` method of `array_t`. `Dims` + * will be -1 for dimensions determined at runtime. + */ +template +class unchecked_reference { +protected: + static constexpr bool Dynamic = Dims < 0; + const unsigned char *data_; + // Storing the shape & strides in local variables (i.e. these arrays) allows the compiler to + // make large performance gains on big, nested loops, but requires compile-time dimensions + conditional_t> + shape_, strides_; + const ssize_t dims_; + + friend class pybind11::array; + // Constructor for compile-time dimensions: + template + unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t) + : data_{reinterpret_cast(data)}, dims_{Dims} { + for (size_t i = 0; i < (size_t) dims_; i++) { + shape_[i] = shape[i]; + strides_[i] = strides[i]; + } + } + // Constructor for runtime dimensions: + template + unchecked_reference(const void *data, const ssize_t *shape, const ssize_t *strides, enable_if_t dims) + : data_{reinterpret_cast(data)}, shape_{shape}, strides_{strides}, dims_{dims} {} + +public: + /** + * Unchecked const reference access to data at the given indices. For a compile-time known + * number of dimensions, this requires the correct number of arguments; for run-time + * dimensionality, this is not checked (and so is up to the caller to use safely). + */ + template const T &operator()(Ix... index) const { + static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic, + "Invalid number of indices for unchecked array reference"); + return *reinterpret_cast(data_ + byte_offset_unsafe(strides_, ssize_t(index)...)); + } + /** + * Unchecked const reference access to data; this operator only participates if the reference + * is to a 1-dimensional array. When present, this is exactly equivalent to `obj(index)`. + */ + template > + const T &operator[](ssize_t index) const { return operator()(index); } + + /// Pointer access to the data at the given indices. + template const T *data(Ix... ix) const { return &operator()(ssize_t(ix)...); } + + /// Returns the item size, i.e. sizeof(T) + constexpr static ssize_t itemsize() { return sizeof(T); } + + /// Returns the shape (i.e. size) of dimension `dim` + ssize_t shape(ssize_t dim) const { return shape_[(size_t) dim]; } + + /// Returns the number of dimensions of the array + ssize_t ndim() const { return dims_; } + + /// Returns the total number of elements in the referenced array, i.e. the product of the shapes + template + enable_if_t size() const { + return std::accumulate(shape_.begin(), shape_.end(), (ssize_t) 1, std::multiplies()); + } + template + enable_if_t size() const { + return std::accumulate(shape_, shape_ + ndim(), (ssize_t) 1, std::multiplies()); + } + + /// Returns the total number of bytes used by the referenced data. Note that the actual span in + /// memory may be larger if the referenced array has non-contiguous strides (e.g. for a slice). + ssize_t nbytes() const { + return size() * itemsize(); + } +}; + +template +class unchecked_mutable_reference : public unchecked_reference { + friend class pybind11::array; + using ConstBase = unchecked_reference; + using ConstBase::ConstBase; + using ConstBase::Dynamic; +public: + /// Mutable, unchecked access to data at the given indices. + template T& operator()(Ix... index) { + static_assert(ssize_t{sizeof...(Ix)} == Dims || Dynamic, + "Invalid number of indices for unchecked array reference"); + return const_cast(ConstBase::operator()(index...)); + } + /** + * Mutable, unchecked access data at the given index; this operator only participates if the + * reference is to a 1-dimensional array (or has runtime dimensions). When present, this is + * exactly equivalent to `obj(index)`. + */ + template > + T &operator[](ssize_t index) { return operator()(index); } + + /// Mutable pointer access to the data at the given indices. + template T *mutable_data(Ix... ix) { return &operator()(ssize_t(ix)...); } +}; + +template +struct type_caster> { + static_assert(Dim == 0 && Dim > 0 /* always fail */, "unchecked array proxy object is not castable"); +}; +template +struct type_caster> : type_caster> {}; + +NAMESPACE_END(detail) + +class dtype : public object { +public: + PYBIND11_OBJECT_DEFAULT(dtype, object, detail::npy_api::get().PyArrayDescr_Check_); + + explicit dtype(const buffer_info &info) { + dtype descr(_dtype_from_pep3118()(PYBIND11_STR_TYPE(info.format))); + // If info.itemsize == 0, use the value calculated from the format string + m_ptr = descr.strip_padding(info.itemsize ? info.itemsize : descr.itemsize()).release().ptr(); + } + + explicit dtype(const std::string &format) { + m_ptr = from_args(pybind11::str(format)).release().ptr(); + } + + dtype(const char *format) : dtype(std::string(format)) { } + + dtype(list names, list formats, list offsets, ssize_t itemsize) { + dict args; + args["names"] = names; + args["formats"] = formats; + args["offsets"] = offsets; + args["itemsize"] = pybind11::int_(itemsize); + m_ptr = from_args(args).release().ptr(); + } + + /// This is essentially the same as calling numpy.dtype(args) in Python. + static dtype from_args(object args) { + PyObject *ptr = nullptr; + if (!detail::npy_api::get().PyArray_DescrConverter_(args.ptr(), &ptr) || !ptr) + throw error_already_set(); + return reinterpret_steal(ptr); + } + + /// Return dtype associated with a C++ type. + template static dtype of() { + return detail::npy_format_descriptor::type>::dtype(); + } + + /// Size of the data type in bytes. + ssize_t itemsize() const { + return detail::array_descriptor_proxy(m_ptr)->elsize; + } + + /// Returns true for structured data types. + bool has_fields() const { + return detail::array_descriptor_proxy(m_ptr)->names != nullptr; + } + + /// Single-character type code. + char kind() const { + return detail::array_descriptor_proxy(m_ptr)->kind; + } + +private: + static object _dtype_from_pep3118() { + static PyObject *obj = module::import("numpy.core._internal") + .attr("_dtype_from_pep3118").cast().release().ptr(); + return reinterpret_borrow(obj); + } + + dtype strip_padding(ssize_t itemsize) { + // Recursively strip all void fields with empty names that are generated for + // padding fields (as of NumPy v1.11). + if (!has_fields()) + return *this; + + struct field_descr { PYBIND11_STR_TYPE name; object format; pybind11::int_ offset; }; + std::vector field_descriptors; + + for (auto field : attr("fields").attr("items")()) { + auto spec = field.cast(); + auto name = spec[0].cast(); + auto format = spec[1].cast()[0].cast(); + auto offset = spec[1].cast()[1].cast(); + if (!len(name) && format.kind() == 'V') + continue; + field_descriptors.push_back({(PYBIND11_STR_TYPE) name, format.strip_padding(format.itemsize()), offset}); + } + + std::sort(field_descriptors.begin(), field_descriptors.end(), + [](const field_descr& a, const field_descr& b) { + return a.offset.cast() < b.offset.cast(); + }); + + list names, formats, offsets; + for (auto& descr : field_descriptors) { + names.append(descr.name); + formats.append(descr.format); + offsets.append(descr.offset); + } + return dtype(names, formats, offsets, itemsize); + } +}; + +class array : public buffer { +public: + PYBIND11_OBJECT_CVT(array, buffer, detail::npy_api::get().PyArray_Check_, raw_array) + + enum { + c_style = detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_, + f_style = detail::npy_api::NPY_ARRAY_F_CONTIGUOUS_, + forcecast = detail::npy_api::NPY_ARRAY_FORCECAST_ + }; + + array() : array({{0}}, static_cast(nullptr)) {} + + using ShapeContainer = detail::any_container; + using StridesContainer = detail::any_container; + + // Constructs an array taking shape/strides from arbitrary container types + array(const pybind11::dtype &dt, ShapeContainer shape, StridesContainer strides, + const void *ptr = nullptr, handle base = handle()) { + + if (strides->empty()) + *strides = c_strides(*shape, dt.itemsize()); + + auto ndim = shape->size(); + if (ndim != strides->size()) + pybind11_fail("NumPy: shape ndim doesn't match strides ndim"); + auto descr = dt; + + int flags = 0; + if (base && ptr) { + if (isinstance(base)) + /* Copy flags from base (except ownership bit) */ + flags = reinterpret_borrow(base).flags() & ~detail::npy_api::NPY_ARRAY_OWNDATA_; + else + /* Writable by default, easy to downgrade later on if needed */ + flags = detail::npy_api::NPY_ARRAY_WRITEABLE_; + } + + auto &api = detail::npy_api::get(); + auto tmp = reinterpret_steal(api.PyArray_NewFromDescr_( + api.PyArray_Type_, descr.release().ptr(), (int) ndim, shape->data(), strides->data(), + const_cast(ptr), flags, nullptr)); + if (!tmp) + throw error_already_set(); + if (ptr) { + if (base) { + api.PyArray_SetBaseObject_(tmp.ptr(), base.inc_ref().ptr()); + } else { + tmp = reinterpret_steal(api.PyArray_NewCopy_(tmp.ptr(), -1 /* any order */)); + } + } + m_ptr = tmp.release().ptr(); + } + + array(const pybind11::dtype &dt, ShapeContainer shape, const void *ptr = nullptr, handle base = handle()) + : array(dt, std::move(shape), {}, ptr, base) { } + + template ::value && !std::is_same::value>> + array(const pybind11::dtype &dt, T count, const void *ptr = nullptr, handle base = handle()) + : array(dt, {{count}}, ptr, base) { } + + template + array(ShapeContainer shape, StridesContainer strides, const T *ptr, handle base = handle()) + : array(pybind11::dtype::of(), std::move(shape), std::move(strides), ptr, base) { } + + template + array(ShapeContainer shape, const T *ptr, handle base = handle()) + : array(std::move(shape), {}, ptr, base) { } + + template + explicit array(ssize_t count, const T *ptr, handle base = handle()) : array({count}, {}, ptr, base) { } + + explicit array(const buffer_info &info) + : array(pybind11::dtype(info), info.shape, info.strides, info.ptr) { } + + /// Array descriptor (dtype) + pybind11::dtype dtype() const { + return reinterpret_borrow(detail::array_proxy(m_ptr)->descr); + } + + /// Total number of elements + ssize_t size() const { + return std::accumulate(shape(), shape() + ndim(), (ssize_t) 1, std::multiplies()); + } + + /// Byte size of a single element + ssize_t itemsize() const { + return detail::array_descriptor_proxy(detail::array_proxy(m_ptr)->descr)->elsize; + } + + /// Total number of bytes + ssize_t nbytes() const { + return size() * itemsize(); + } + + /// Number of dimensions + ssize_t ndim() const { + return detail::array_proxy(m_ptr)->nd; + } + + /// Base object + object base() const { + return reinterpret_borrow(detail::array_proxy(m_ptr)->base); + } + + /// Dimensions of the array + const ssize_t* shape() const { + return detail::array_proxy(m_ptr)->dimensions; + } + + /// Dimension along a given axis + ssize_t shape(ssize_t dim) const { + if (dim >= ndim()) + fail_dim_check(dim, "invalid axis"); + return shape()[dim]; + } + + /// Strides of the array + const ssize_t* strides() const { + return detail::array_proxy(m_ptr)->strides; + } + + /// Stride along a given axis + ssize_t strides(ssize_t dim) const { + if (dim >= ndim()) + fail_dim_check(dim, "invalid axis"); + return strides()[dim]; + } + + /// Return the NumPy array flags + int flags() const { + return detail::array_proxy(m_ptr)->flags; + } + + /// If set, the array is writeable (otherwise the buffer is read-only) + bool writeable() const { + return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_WRITEABLE_); + } + + /// If set, the array owns the data (will be freed when the array is deleted) + bool owndata() const { + return detail::check_flags(m_ptr, detail::npy_api::NPY_ARRAY_OWNDATA_); + } + + /// Pointer to the contained data. If index is not provided, points to the + /// beginning of the buffer. May throw if the index would lead to out of bounds access. + template const void* data(Ix... index) const { + return static_cast(detail::array_proxy(m_ptr)->data + offset_at(index...)); + } + + /// Mutable pointer to the contained data. If index is not provided, points to the + /// beginning of the buffer. May throw if the index would lead to out of bounds access. + /// May throw if the array is not writeable. + template void* mutable_data(Ix... index) { + check_writeable(); + return static_cast(detail::array_proxy(m_ptr)->data + offset_at(index...)); + } + + /// Byte offset from beginning of the array to a given index (full or partial). + /// May throw if the index would lead to out of bounds access. + template ssize_t offset_at(Ix... index) const { + if ((ssize_t) sizeof...(index) > ndim()) + fail_dim_check(sizeof...(index), "too many indices for an array"); + return byte_offset(ssize_t(index)...); + } + + ssize_t offset_at() const { return 0; } + + /// Item count from beginning of the array to a given index (full or partial). + /// May throw if the index would lead to out of bounds access. + template ssize_t index_at(Ix... index) const { + return offset_at(index...) / itemsize(); + } + + /** + * Returns a proxy object that provides access to the array's data without bounds or + * dimensionality checking. Will throw if the array is missing the `writeable` flag. Use with + * care: the array must not be destroyed or reshaped for the duration of the returned object, + * and the caller must take care not to access invalid dimensions or dimension indices. + */ + template detail::unchecked_mutable_reference mutable_unchecked() & { + if (Dims >= 0 && ndim() != Dims) + throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) + + "; expected " + std::to_string(Dims)); + return detail::unchecked_mutable_reference(mutable_data(), shape(), strides(), ndim()); + } + + /** + * Returns a proxy object that provides const access to the array's data without bounds or + * dimensionality checking. Unlike `mutable_unchecked()`, this does not require that the + * underlying array have the `writable` flag. Use with care: the array must not be destroyed or + * reshaped for the duration of the returned object, and the caller must take care not to access + * invalid dimensions or dimension indices. + */ + template detail::unchecked_reference unchecked() const & { + if (Dims >= 0 && ndim() != Dims) + throw std::domain_error("array has incorrect number of dimensions: " + std::to_string(ndim()) + + "; expected " + std::to_string(Dims)); + return detail::unchecked_reference(data(), shape(), strides(), ndim()); + } + + /// Return a new view with all of the dimensions of length 1 removed + array squeeze() { + auto& api = detail::npy_api::get(); + return reinterpret_steal(api.PyArray_Squeeze_(m_ptr)); + } + + /// Resize array to given shape + /// If refcheck is true and more that one reference exist to this array + /// then resize will succeed only if it makes a reshape, i.e. original size doesn't change + void resize(ShapeContainer new_shape, bool refcheck = true) { + detail::npy_api::PyArray_Dims d = { + new_shape->data(), int(new_shape->size()) + }; + // try to resize, set ordering param to -1 cause it's not used anyway + object new_array = reinterpret_steal( + detail::npy_api::get().PyArray_Resize_(m_ptr, &d, int(refcheck), -1) + ); + if (!new_array) throw error_already_set(); + if (isinstance(new_array)) { *this = std::move(new_array); } + } + + /// Ensure that the argument is a NumPy array + /// In case of an error, nullptr is returned and the Python error is cleared. + static array ensure(handle h, int ExtraFlags = 0) { + auto result = reinterpret_steal(raw_array(h.ptr(), ExtraFlags)); + if (!result) + PyErr_Clear(); + return result; + } + +protected: + template friend struct detail::npy_format_descriptor; + + void fail_dim_check(ssize_t dim, const std::string& msg) const { + throw index_error(msg + ": " + std::to_string(dim) + + " (ndim = " + std::to_string(ndim()) + ")"); + } + + template ssize_t byte_offset(Ix... index) const { + check_dimensions(index...); + return detail::byte_offset_unsafe(strides(), ssize_t(index)...); + } + + void check_writeable() const { + if (!writeable()) + throw std::domain_error("array is not writeable"); + } + + // Default, C-style strides + static std::vector c_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + if (ndim > 0) + for (size_t i = ndim - 1; i > 0; --i) + strides[i - 1] = strides[i] * shape[i]; + return strides; + } + + // F-style strides; default when constructing an array_t with `ExtraFlags & f_style` + static std::vector f_strides(const std::vector &shape, ssize_t itemsize) { + auto ndim = shape.size(); + std::vector strides(ndim, itemsize); + for (size_t i = 1; i < ndim; ++i) + strides[i] = strides[i - 1] * shape[i - 1]; + return strides; + } + + template void check_dimensions(Ix... index) const { + check_dimensions_impl(ssize_t(0), shape(), ssize_t(index)...); + } + + void check_dimensions_impl(ssize_t, const ssize_t*) const { } + + template void check_dimensions_impl(ssize_t axis, const ssize_t* shape, ssize_t i, Ix... index) const { + if (i >= *shape) { + throw index_error(std::string("index ") + std::to_string(i) + + " is out of bounds for axis " + std::to_string(axis) + + " with size " + std::to_string(*shape)); + } + check_dimensions_impl(axis + 1, shape + 1, index...); + } + + /// Create array from any object -- always returns a new reference + static PyObject *raw_array(PyObject *ptr, int ExtraFlags = 0) { + if (ptr == nullptr) { + PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array from a nullptr"); + return nullptr; + } + return detail::npy_api::get().PyArray_FromAny_( + ptr, nullptr, 0, 0, detail::npy_api::NPY_ARRAY_ENSUREARRAY_ | ExtraFlags, nullptr); + } +}; + +template class array_t : public array { +private: + struct private_ctor {}; + // Delegating constructor needed when both moving and accessing in the same constructor + array_t(private_ctor, ShapeContainer &&shape, StridesContainer &&strides, const T *ptr, handle base) + : array(std::move(shape), std::move(strides), ptr, base) {} +public: + static_assert(!detail::array_info::is_array, "Array types cannot be used with array_t"); + + using value_type = T; + + array_t() : array(0, static_cast(nullptr)) {} + array_t(handle h, borrowed_t) : array(h, borrowed_t{}) { } + array_t(handle h, stolen_t) : array(h, stolen_t{}) { } + + PYBIND11_DEPRECATED("Use array_t::ensure() instead") + array_t(handle h, bool is_borrowed) : array(raw_array_t(h.ptr()), stolen_t{}) { + if (!m_ptr) PyErr_Clear(); + if (!is_borrowed) Py_XDECREF(h.ptr()); + } + + array_t(const object &o) : array(raw_array_t(o.ptr()), stolen_t{}) { + if (!m_ptr) throw error_already_set(); + } + + explicit array_t(const buffer_info& info) : array(info) { } + + array_t(ShapeContainer shape, StridesContainer strides, const T *ptr = nullptr, handle base = handle()) + : array(std::move(shape), std::move(strides), ptr, base) { } + + explicit array_t(ShapeContainer shape, const T *ptr = nullptr, handle base = handle()) + : array_t(private_ctor{}, std::move(shape), + ExtraFlags & f_style ? f_strides(*shape, itemsize()) : c_strides(*shape, itemsize()), + ptr, base) { } + + explicit array_t(size_t count, const T *ptr = nullptr, handle base = handle()) + : array({count}, {}, ptr, base) { } + + constexpr ssize_t itemsize() const { + return sizeof(T); + } + + template ssize_t index_at(Ix... index) const { + return offset_at(index...) / itemsize(); + } + + template const T* data(Ix... index) const { + return static_cast(array::data(index...)); + } + + template T* mutable_data(Ix... index) { + return static_cast(array::mutable_data(index...)); + } + + // Reference to element at a given index + template const T& at(Ix... index) const { + if ((ssize_t) sizeof...(index) != ndim()) + fail_dim_check(sizeof...(index), "index dimension mismatch"); + return *(static_cast(array::data()) + byte_offset(ssize_t(index)...) / itemsize()); + } + + // Mutable reference to element at a given index + template T& mutable_at(Ix... index) { + if ((ssize_t) sizeof...(index) != ndim()) + fail_dim_check(sizeof...(index), "index dimension mismatch"); + return *(static_cast(array::mutable_data()) + byte_offset(ssize_t(index)...) / itemsize()); + } + + /** + * Returns a proxy object that provides access to the array's data without bounds or + * dimensionality checking. Will throw if the array is missing the `writeable` flag. Use with + * care: the array must not be destroyed or reshaped for the duration of the returned object, + * and the caller must take care not to access invalid dimensions or dimension indices. + */ + template detail::unchecked_mutable_reference mutable_unchecked() & { + return array::mutable_unchecked(); + } + + /** + * Returns a proxy object that provides const access to the array's data without bounds or + * dimensionality checking. Unlike `unchecked()`, this does not require that the underlying + * array have the `writable` flag. Use with care: the array must not be destroyed or reshaped + * for the duration of the returned object, and the caller must take care not to access invalid + * dimensions or dimension indices. + */ + template detail::unchecked_reference unchecked() const & { + return array::unchecked(); + } + + /// Ensure that the argument is a NumPy array of the correct dtype (and if not, try to convert + /// it). In case of an error, nullptr is returned and the Python error is cleared. + static array_t ensure(handle h) { + auto result = reinterpret_steal(raw_array_t(h.ptr())); + if (!result) + PyErr_Clear(); + return result; + } + + static bool check_(handle h) { + const auto &api = detail::npy_api::get(); + return api.PyArray_Check_(h.ptr()) + && api.PyArray_EquivTypes_(detail::array_proxy(h.ptr())->descr, dtype::of().ptr()); + } + +protected: + /// Create array from any object -- always returns a new reference + static PyObject *raw_array_t(PyObject *ptr) { + if (ptr == nullptr) { + PyErr_SetString(PyExc_ValueError, "cannot create a pybind11::array_t from a nullptr"); + return nullptr; + } + return detail::npy_api::get().PyArray_FromAny_( + ptr, dtype::of().release().ptr(), 0, 0, + detail::npy_api::NPY_ARRAY_ENSUREARRAY_ | ExtraFlags, nullptr); + } +}; + +template +struct format_descriptor::value>> { + static std::string format() { + return detail::npy_format_descriptor::type>::format(); + } +}; + +template struct format_descriptor { + static std::string format() { return std::to_string(N) + "s"; } +}; +template struct format_descriptor> { + static std::string format() { return std::to_string(N) + "s"; } +}; + +template +struct format_descriptor::value>> { + static std::string format() { + return format_descriptor< + typename std::remove_cv::type>::type>::format(); + } +}; + +template +struct format_descriptor::is_array>> { + static std::string format() { + using namespace detail; + static constexpr auto extents = _("(") + array_info::extents + _(")"); + return extents.text + format_descriptor>::format(); + } +}; + +NAMESPACE_BEGIN(detail) +template +struct pyobject_caster> { + using type = array_t; + + bool load(handle src, bool convert) { + if (!convert && !type::check_(src)) + return false; + value = type::ensure(src); + return static_cast(value); + } + + static handle cast(const handle &src, return_value_policy /* policy */, handle /* parent */) { + return src.inc_ref(); + } + PYBIND11_TYPE_CASTER(type, handle_type_name::name); +}; + +template +struct compare_buffer_info::value>> { + static bool compare(const buffer_info& b) { + return npy_api::get().PyArray_EquivTypes_(dtype::of().ptr(), dtype(b).ptr()); + } +}; + +template +struct npy_format_descriptor_name; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value>( + _("bool"), _::value>("int", "uint") + _() + ); +}; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value || std::is_same::value>( + _("float") + _(), _("longdouble") + ); +}; + +template +struct npy_format_descriptor_name::value>> { + static constexpr auto name = _::value + || std::is_same::value>( + _("complex") + _(), _("longcomplex") + ); +}; + +template +struct npy_format_descriptor::value>> + : npy_format_descriptor_name { +private: + // NB: the order here must match the one in common.h + constexpr static const int values[15] = { + npy_api::NPY_BOOL_, + npy_api::NPY_BYTE_, npy_api::NPY_UBYTE_, npy_api::NPY_INT16_, npy_api::NPY_UINT16_, + npy_api::NPY_INT32_, npy_api::NPY_UINT32_, npy_api::NPY_INT64_, npy_api::NPY_UINT64_, + npy_api::NPY_FLOAT_, npy_api::NPY_DOUBLE_, npy_api::NPY_LONGDOUBLE_, + npy_api::NPY_CFLOAT_, npy_api::NPY_CDOUBLE_, npy_api::NPY_CLONGDOUBLE_ + }; + +public: + static constexpr int value = values[detail::is_fmt_numeric::index]; + + static pybind11::dtype dtype() { + if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) + return reinterpret_steal(ptr); + pybind11_fail("Unsupported buffer format!"); + } +}; + +#define PYBIND11_DECL_CHAR_FMT \ + static constexpr auto name = _("S") + _(); \ + static pybind11::dtype dtype() { return pybind11::dtype(std::string("S") + std::to_string(N)); } +template struct npy_format_descriptor { PYBIND11_DECL_CHAR_FMT }; +template struct npy_format_descriptor> { PYBIND11_DECL_CHAR_FMT }; +#undef PYBIND11_DECL_CHAR_FMT + +template struct npy_format_descriptor::is_array>> { +private: + using base_descr = npy_format_descriptor::type>; +public: + static_assert(!array_info::is_empty, "Zero-sized arrays are not supported"); + + static constexpr auto name = _("(") + array_info::extents + _(")") + base_descr::name; + static pybind11::dtype dtype() { + list shape; + array_info::append_extents(shape); + return pybind11::dtype::from_args(pybind11::make_tuple(base_descr::dtype(), shape)); + } +}; + +template struct npy_format_descriptor::value>> { +private: + using base_descr = npy_format_descriptor::type>; +public: + static constexpr auto name = base_descr::name; + static pybind11::dtype dtype() { return base_descr::dtype(); } +}; + +struct field_descriptor { + const char *name; + ssize_t offset; + ssize_t size; + std::string format; + dtype descr; +}; + +inline PYBIND11_NOINLINE void register_structured_dtype( + any_container fields, + const std::type_info& tinfo, ssize_t itemsize, + bool (*direct_converter)(PyObject *, void *&)) { + + auto& numpy_internals = get_numpy_internals(); + if (numpy_internals.get_type_info(tinfo, false)) + pybind11_fail("NumPy: dtype is already registered"); + + // Use ordered fields because order matters as of NumPy 1.14: + // https://docs.scipy.org/doc/numpy/release.html#multiple-field-indexing-assignment-of-structured-arrays + std::vector ordered_fields(std::move(fields)); + std::sort(ordered_fields.begin(), ordered_fields.end(), + [](const field_descriptor &a, const field_descriptor &b) { return a.offset < b.offset; }); + + list names, formats, offsets; + for (auto& field : ordered_fields) { + if (!field.descr) + pybind11_fail(std::string("NumPy: unsupported field dtype: `") + + field.name + "` @ " + tinfo.name()); + names.append(PYBIND11_STR_TYPE(field.name)); + formats.append(field.descr); + offsets.append(pybind11::int_(field.offset)); + } + auto dtype_ptr = pybind11::dtype(names, formats, offsets, itemsize).release().ptr(); + + // There is an existing bug in NumPy (as of v1.11): trailing bytes are + // not encoded explicitly into the format string. This will supposedly + // get fixed in v1.12; for further details, see these: + // - https://github.com/numpy/numpy/issues/7797 + // - https://github.com/numpy/numpy/pull/7798 + // Because of this, we won't use numpy's logic to generate buffer format + // strings and will just do it ourselves. + ssize_t offset = 0; + std::ostringstream oss; + // mark the structure as unaligned with '^', because numpy and C++ don't + // always agree about alignment (particularly for complex), and we're + // explicitly listing all our padding. This depends on none of the fields + // overriding the endianness. Putting the ^ in front of individual fields + // isn't guaranteed to work due to https://github.com/numpy/numpy/issues/9049 + oss << "^T{"; + for (auto& field : ordered_fields) { + if (field.offset > offset) + oss << (field.offset - offset) << 'x'; + oss << field.format << ':' << field.name << ':'; + offset = field.offset + field.size; + } + if (itemsize > offset) + oss << (itemsize - offset) << 'x'; + oss << '}'; + auto format_str = oss.str(); + + // Sanity check: verify that NumPy properly parses our buffer format string + auto& api = npy_api::get(); + auto arr = array(buffer_info(nullptr, itemsize, format_str, 1)); + if (!api.PyArray_EquivTypes_(dtype_ptr, arr.dtype().ptr())) + pybind11_fail("NumPy: invalid buffer descriptor!"); + + auto tindex = std::type_index(tinfo); + numpy_internals.registered_dtypes[tindex] = { dtype_ptr, format_str }; + get_internals().direct_conversions[tindex].push_back(direct_converter); +} + +template struct npy_format_descriptor { + static_assert(is_pod_struct::value, "Attempt to use a non-POD or unimplemented POD type as a numpy dtype"); + + static constexpr auto name = make_caster::name; + + static pybind11::dtype dtype() { + return reinterpret_borrow(dtype_ptr()); + } + + static std::string format() { + static auto format_str = get_numpy_internals().get_type_info(true)->format_str; + return format_str; + } + + static void register_dtype(any_container fields) { + register_structured_dtype(std::move(fields), typeid(typename std::remove_cv::type), + sizeof(T), &direct_converter); + } + +private: + static PyObject* dtype_ptr() { + static PyObject* ptr = get_numpy_internals().get_type_info(true)->dtype_ptr; + return ptr; + } + + static bool direct_converter(PyObject *obj, void*& value) { + auto& api = npy_api::get(); + if (!PyObject_TypeCheck(obj, api.PyVoidArrType_Type_)) + return false; + if (auto descr = reinterpret_steal(api.PyArray_DescrFromScalar_(obj))) { + if (api.PyArray_EquivTypes_(dtype_ptr(), descr.ptr())) { + value = ((PyVoidScalarObject_Proxy *) obj)->obval; + return true; + } + } + return false; + } +}; + +#ifdef __CLION_IDE__ // replace heavy macro with dummy code for the IDE (doesn't affect code) +# define PYBIND11_NUMPY_DTYPE(Type, ...) ((void)0) +# define PYBIND11_NUMPY_DTYPE_EX(Type, ...) ((void)0) +#else + +#define PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, Name) \ + ::pybind11::detail::field_descriptor { \ + Name, offsetof(T, Field), sizeof(decltype(std::declval().Field)), \ + ::pybind11::format_descriptor().Field)>::format(), \ + ::pybind11::detail::npy_format_descriptor().Field)>::dtype() \ + } + +// Extract name, offset and format descriptor for a struct field +#define PYBIND11_FIELD_DESCRIPTOR(T, Field) PYBIND11_FIELD_DESCRIPTOR_EX(T, Field, #Field) + +// The main idea of this macro is borrowed from https://github.com/swansontec/map-macro +// (C) William Swanson, Paul Fultz +#define PYBIND11_EVAL0(...) __VA_ARGS__ +#define PYBIND11_EVAL1(...) PYBIND11_EVAL0 (PYBIND11_EVAL0 (PYBIND11_EVAL0 (__VA_ARGS__))) +#define PYBIND11_EVAL2(...) PYBIND11_EVAL1 (PYBIND11_EVAL1 (PYBIND11_EVAL1 (__VA_ARGS__))) +#define PYBIND11_EVAL3(...) PYBIND11_EVAL2 (PYBIND11_EVAL2 (PYBIND11_EVAL2 (__VA_ARGS__))) +#define PYBIND11_EVAL4(...) PYBIND11_EVAL3 (PYBIND11_EVAL3 (PYBIND11_EVAL3 (__VA_ARGS__))) +#define PYBIND11_EVAL(...) PYBIND11_EVAL4 (PYBIND11_EVAL4 (PYBIND11_EVAL4 (__VA_ARGS__))) +#define PYBIND11_MAP_END(...) +#define PYBIND11_MAP_OUT +#define PYBIND11_MAP_COMMA , +#define PYBIND11_MAP_GET_END() 0, PYBIND11_MAP_END +#define PYBIND11_MAP_NEXT0(test, next, ...) next PYBIND11_MAP_OUT +#define PYBIND11_MAP_NEXT1(test, next) PYBIND11_MAP_NEXT0 (test, next, 0) +#define PYBIND11_MAP_NEXT(test, next) PYBIND11_MAP_NEXT1 (PYBIND11_MAP_GET_END test, next) +#ifdef _MSC_VER // MSVC is not as eager to expand macros, hence this workaround +#define PYBIND11_MAP_LIST_NEXT1(test, next) \ + PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)) +#else +#define PYBIND11_MAP_LIST_NEXT1(test, next) \ + PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0) +#endif +#define PYBIND11_MAP_LIST_NEXT(test, next) \ + PYBIND11_MAP_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next) +#define PYBIND11_MAP_LIST0(f, t, x, peek, ...) \ + f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST1) (f, t, peek, __VA_ARGS__) +#define PYBIND11_MAP_LIST1(f, t, x, peek, ...) \ + f(t, x) PYBIND11_MAP_LIST_NEXT (peek, PYBIND11_MAP_LIST0) (f, t, peek, __VA_ARGS__) +// PYBIND11_MAP_LIST(f, t, a1, a2, ...) expands to f(t, a1), f(t, a2), ... +#define PYBIND11_MAP_LIST(f, t, ...) \ + PYBIND11_EVAL (PYBIND11_MAP_LIST1 (f, t, __VA_ARGS__, (), 0)) + +#define PYBIND11_NUMPY_DTYPE(Type, ...) \ + ::pybind11::detail::npy_format_descriptor::register_dtype \ + (::std::vector<::pybind11::detail::field_descriptor> \ + {PYBIND11_MAP_LIST (PYBIND11_FIELD_DESCRIPTOR, Type, __VA_ARGS__)}) + +#ifdef _MSC_VER +#define PYBIND11_MAP2_LIST_NEXT1(test, next) \ + PYBIND11_EVAL0 (PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0)) +#else +#define PYBIND11_MAP2_LIST_NEXT1(test, next) \ + PYBIND11_MAP_NEXT0 (test, PYBIND11_MAP_COMMA next, 0) +#endif +#define PYBIND11_MAP2_LIST_NEXT(test, next) \ + PYBIND11_MAP2_LIST_NEXT1 (PYBIND11_MAP_GET_END test, next) +#define PYBIND11_MAP2_LIST0(f, t, x1, x2, peek, ...) \ + f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST1) (f, t, peek, __VA_ARGS__) +#define PYBIND11_MAP2_LIST1(f, t, x1, x2, peek, ...) \ + f(t, x1, x2) PYBIND11_MAP2_LIST_NEXT (peek, PYBIND11_MAP2_LIST0) (f, t, peek, __VA_ARGS__) +// PYBIND11_MAP2_LIST(f, t, a1, a2, ...) expands to f(t, a1, a2), f(t, a3, a4), ... +#define PYBIND11_MAP2_LIST(f, t, ...) \ + PYBIND11_EVAL (PYBIND11_MAP2_LIST1 (f, t, __VA_ARGS__, (), 0)) + +#define PYBIND11_NUMPY_DTYPE_EX(Type, ...) \ + ::pybind11::detail::npy_format_descriptor::register_dtype \ + (::std::vector<::pybind11::detail::field_descriptor> \ + {PYBIND11_MAP2_LIST (PYBIND11_FIELD_DESCRIPTOR_EX, Type, __VA_ARGS__)}) + +#endif // __CLION_IDE__ + +template +using array_iterator = typename std::add_pointer::type; + +template +array_iterator array_begin(const buffer_info& buffer) { + return array_iterator(reinterpret_cast(buffer.ptr)); +} + +template +array_iterator array_end(const buffer_info& buffer) { + return array_iterator(reinterpret_cast(buffer.ptr) + buffer.size); +} + +class common_iterator { +public: + using container_type = std::vector; + using value_type = container_type::value_type; + using size_type = container_type::size_type; + + common_iterator() : p_ptr(0), m_strides() {} + + common_iterator(void* ptr, const container_type& strides, const container_type& shape) + : p_ptr(reinterpret_cast(ptr)), m_strides(strides.size()) { + m_strides.back() = static_cast(strides.back()); + for (size_type i = m_strides.size() - 1; i != 0; --i) { + size_type j = i - 1; + value_type s = static_cast(shape[i]); + m_strides[j] = strides[j] + m_strides[i] - strides[i] * s; + } + } + + void increment(size_type dim) { + p_ptr += m_strides[dim]; + } + + void* data() const { + return p_ptr; + } + +private: + char* p_ptr; + container_type m_strides; +}; + +template class multi_array_iterator { +public: + using container_type = std::vector; + + multi_array_iterator(const std::array &buffers, + const container_type &shape) + : m_shape(shape.size()), m_index(shape.size(), 0), + m_common_iterator() { + + // Manual copy to avoid conversion warning if using std::copy + for (size_t i = 0; i < shape.size(); ++i) + m_shape[i] = shape[i]; + + container_type strides(shape.size()); + for (size_t i = 0; i < N; ++i) + init_common_iterator(buffers[i], shape, m_common_iterator[i], strides); + } + + multi_array_iterator& operator++() { + for (size_t j = m_index.size(); j != 0; --j) { + size_t i = j - 1; + if (++m_index[i] != m_shape[i]) { + increment_common_iterator(i); + break; + } else { + m_index[i] = 0; + } + } + return *this; + } + + template T* data() const { + return reinterpret_cast(m_common_iterator[K].data()); + } + +private: + + using common_iter = common_iterator; + + void init_common_iterator(const buffer_info &buffer, + const container_type &shape, + common_iter &iterator, + container_type &strides) { + auto buffer_shape_iter = buffer.shape.rbegin(); + auto buffer_strides_iter = buffer.strides.rbegin(); + auto shape_iter = shape.rbegin(); + auto strides_iter = strides.rbegin(); + + while (buffer_shape_iter != buffer.shape.rend()) { + if (*shape_iter == *buffer_shape_iter) + *strides_iter = *buffer_strides_iter; + else + *strides_iter = 0; + + ++buffer_shape_iter; + ++buffer_strides_iter; + ++shape_iter; + ++strides_iter; + } + + std::fill(strides_iter, strides.rend(), 0); + iterator = common_iter(buffer.ptr, strides, shape); + } + + void increment_common_iterator(size_t dim) { + for (auto &iter : m_common_iterator) + iter.increment(dim); + } + + container_type m_shape; + container_type m_index; + std::array m_common_iterator; +}; + +enum class broadcast_trivial { non_trivial, c_trivial, f_trivial }; + +// Populates the shape and number of dimensions for the set of buffers. Returns a broadcast_trivial +// enum value indicating whether the broadcast is "trivial"--that is, has each buffer being either a +// singleton or a full-size, C-contiguous (`c_trivial`) or Fortran-contiguous (`f_trivial`) storage +// buffer; returns `non_trivial` otherwise. +template +broadcast_trivial broadcast(const std::array &buffers, ssize_t &ndim, std::vector &shape) { + ndim = std::accumulate(buffers.begin(), buffers.end(), ssize_t(0), [](ssize_t res, const buffer_info &buf) { + return std::max(res, buf.ndim); + }); + + shape.clear(); + shape.resize((size_t) ndim, 1); + + // Figure out the output size, and make sure all input arrays conform (i.e. are either size 1 or + // the full size). + for (size_t i = 0; i < N; ++i) { + auto res_iter = shape.rbegin(); + auto end = buffers[i].shape.rend(); + for (auto shape_iter = buffers[i].shape.rbegin(); shape_iter != end; ++shape_iter, ++res_iter) { + const auto &dim_size_in = *shape_iter; + auto &dim_size_out = *res_iter; + + // Each input dimension can either be 1 or `n`, but `n` values must match across buffers + if (dim_size_out == 1) + dim_size_out = dim_size_in; + else if (dim_size_in != 1 && dim_size_in != dim_size_out) + pybind11_fail("pybind11::vectorize: incompatible size/dimension of inputs!"); + } + } + + bool trivial_broadcast_c = true; + bool trivial_broadcast_f = true; + for (size_t i = 0; i < N && (trivial_broadcast_c || trivial_broadcast_f); ++i) { + if (buffers[i].size == 1) + continue; + + // Require the same number of dimensions: + if (buffers[i].ndim != ndim) + return broadcast_trivial::non_trivial; + + // Require all dimensions be full-size: + if (!std::equal(buffers[i].shape.cbegin(), buffers[i].shape.cend(), shape.cbegin())) + return broadcast_trivial::non_trivial; + + // Check for C contiguity (but only if previous inputs were also C contiguous) + if (trivial_broadcast_c) { + ssize_t expect_stride = buffers[i].itemsize; + auto end = buffers[i].shape.crend(); + for (auto shape_iter = buffers[i].shape.crbegin(), stride_iter = buffers[i].strides.crbegin(); + trivial_broadcast_c && shape_iter != end; ++shape_iter, ++stride_iter) { + if (expect_stride == *stride_iter) + expect_stride *= *shape_iter; + else + trivial_broadcast_c = false; + } + } + + // Check for Fortran contiguity (if previous inputs were also F contiguous) + if (trivial_broadcast_f) { + ssize_t expect_stride = buffers[i].itemsize; + auto end = buffers[i].shape.cend(); + for (auto shape_iter = buffers[i].shape.cbegin(), stride_iter = buffers[i].strides.cbegin(); + trivial_broadcast_f && shape_iter != end; ++shape_iter, ++stride_iter) { + if (expect_stride == *stride_iter) + expect_stride *= *shape_iter; + else + trivial_broadcast_f = false; + } + } + } + + return + trivial_broadcast_c ? broadcast_trivial::c_trivial : + trivial_broadcast_f ? broadcast_trivial::f_trivial : + broadcast_trivial::non_trivial; +} + +template +struct vectorize_arg { + static_assert(!std::is_rvalue_reference::value, "Functions with rvalue reference arguments cannot be vectorized"); + // The wrapped function gets called with this type: + using call_type = remove_reference_t; + // Is this a vectorized argument? + static constexpr bool vectorize = + satisfies_any_of::value && + satisfies_none_of::value && + (!std::is_reference::value || + (std::is_lvalue_reference::value && std::is_const::value)); + // Accept this type: an array for vectorized types, otherwise the type as-is: + using type = conditional_t, array::forcecast>, T>; +}; + +template +struct vectorize_helper { +private: + static constexpr size_t N = sizeof...(Args); + static constexpr size_t NVectorized = constexpr_sum(vectorize_arg::vectorize...); + static_assert(NVectorized >= 1, + "pybind11::vectorize(...) requires a function with at least one vectorizable argument"); + +public: + template + explicit vectorize_helper(T &&f) : f(std::forward(f)) { } + + object operator()(typename vectorize_arg::type... args) { + return run(args..., + make_index_sequence(), + select_indices::vectorize...>(), + make_index_sequence()); + } + +private: + remove_reference_t f; + + // Internal compiler error in MSVC 19.16.27025.1 (Visual Studio 2017 15.9.4), when compiling with "/permissive-" flag + // when arg_call_types is manually inlined. + using arg_call_types = std::tuple::call_type...>; + template using param_n_t = typename std::tuple_element::type; + + // Runs a vectorized function given arguments tuple and three index sequences: + // - Index is the full set of 0 ... (N-1) argument indices; + // - VIndex is the subset of argument indices with vectorized parameters, letting us access + // vectorized arguments (anything not in this sequence is passed through) + // - BIndex is a incremental sequence (beginning at 0) of the same size as VIndex, so that + // we can store vectorized buffer_infos in an array (argument VIndex has its buffer at + // index BIndex in the array). + template object run( + typename vectorize_arg::type &...args, + index_sequence i_seq, index_sequence vi_seq, index_sequence bi_seq) { + + // Pointers to values the function was called with; the vectorized ones set here will start + // out as array_t pointers, but they will be changed them to T pointers before we make + // call the wrapped function. Non-vectorized pointers are left as-is. + std::array params{{ &args... }}; + + // The array of `buffer_info`s of vectorized arguments: + std::array buffers{{ reinterpret_cast(params[VIndex])->request()... }}; + + /* Determine dimensions parameters of output array */ + ssize_t nd = 0; + std::vector shape(0); + auto trivial = broadcast(buffers, nd, shape); + size_t ndim = (size_t) nd; + + size_t size = std::accumulate(shape.begin(), shape.end(), (size_t) 1, std::multiplies()); + + // If all arguments are 0-dimension arrays (i.e. single values) return a plain value (i.e. + // not wrapped in an array). + if (size == 1 && ndim == 0) { + PYBIND11_EXPAND_SIDE_EFFECTS(params[VIndex] = buffers[BIndex].ptr); + return cast(f(*reinterpret_cast *>(params[Index])...)); + } + + array_t result; + if (trivial == broadcast_trivial::f_trivial) result = array_t(shape); + else result = array_t(shape); + + if (size == 0) return std::move(result); + + /* Call the function */ + if (trivial == broadcast_trivial::non_trivial) + apply_broadcast(buffers, params, result, i_seq, vi_seq, bi_seq); + else + apply_trivial(buffers, params, result.mutable_data(), size, i_seq, vi_seq, bi_seq); + + return std::move(result); + } + + template + void apply_trivial(std::array &buffers, + std::array ¶ms, + Return *out, + size_t size, + index_sequence, index_sequence, index_sequence) { + + // Initialize an array of mutable byte references and sizes with references set to the + // appropriate pointer in `params`; as we iterate, we'll increment each pointer by its size + // (except for singletons, which get an increment of 0). + std::array, NVectorized> vecparams{{ + std::pair( + reinterpret_cast(params[VIndex] = buffers[BIndex].ptr), + buffers[BIndex].size == 1 ? 0 : sizeof(param_n_t) + )... + }}; + + for (size_t i = 0; i < size; ++i) { + out[i] = f(*reinterpret_cast *>(params[Index])...); + for (auto &x : vecparams) x.first += x.second; + } + } + + template + void apply_broadcast(std::array &buffers, + std::array ¶ms, + array_t &output_array, + index_sequence, index_sequence, index_sequence) { + + buffer_info output = output_array.request(); + multi_array_iterator input_iter(buffers, output.shape); + + for (array_iterator iter = array_begin(output), end = array_end(output); + iter != end; + ++iter, ++input_iter) { + PYBIND11_EXPAND_SIDE_EFFECTS(( + params[VIndex] = input_iter.template data() + )); + *iter = f(*reinterpret_cast *>(std::get(params))...); + } + } +}; + +template +vectorize_helper +vectorize_extractor(const Func &f, Return (*) (Args ...)) { + return detail::vectorize_helper(f); +} + +template struct handle_type_name> { + static constexpr auto name = _("numpy.ndarray[") + npy_format_descriptor::name + _("]"); +}; + +NAMESPACE_END(detail) + +// Vanilla pointer vectorizer: +template +detail::vectorize_helper +vectorize(Return (*f) (Args ...)) { + return detail::vectorize_helper(f); +} + +// lambda vectorizer: +template ::value, int> = 0> +auto vectorize(Func &&f) -> decltype( + detail::vectorize_extractor(std::forward(f), (detail::function_signature_t *) nullptr)) { + return detail::vectorize_extractor(std::forward(f), (detail::function_signature_t *) nullptr); +} + +// Vectorize a class method (non-const): +template ())), Return, Class *, Args...>> +Helper vectorize(Return (Class::*f)(Args...)) { + return Helper(std::mem_fn(f)); +} + +// Vectorize a class method (const): +template ())), Return, const Class *, Args...>> +Helper vectorize(Return (Class::*f)(Args...) const) { + return Helper(std::mem_fn(f)); +} + +NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/thirdparty/pybind11/include/pybind11/operators.h b/thirdparty/pybind11/include/pybind11/operators.h new file mode 100644 index 000000000..b3dd62c3b --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/operators.h @@ -0,0 +1,168 @@ +/* + pybind11/operator.h: Metatemplates for operator overloading + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" + +#if defined(__clang__) && !defined(__INTEL_COMPILER) +# pragma clang diagnostic ignored "-Wunsequenced" // multiple unsequenced modifications to 'self' (when using def(py::self OP Type())) +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4127) // warning C4127: Conditional expression is constant +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +/// Enumeration with all supported operator types +enum op_id : int { + op_add, op_sub, op_mul, op_div, op_mod, op_divmod, op_pow, op_lshift, + op_rshift, op_and, op_xor, op_or, op_neg, op_pos, op_abs, op_invert, + op_int, op_long, op_float, op_str, op_cmp, op_gt, op_ge, op_lt, op_le, + op_eq, op_ne, op_iadd, op_isub, op_imul, op_idiv, op_imod, op_ilshift, + op_irshift, op_iand, op_ixor, op_ior, op_complex, op_bool, op_nonzero, + op_repr, op_truediv, op_itruediv, op_hash +}; + +enum op_type : int { + op_l, /* base type on left */ + op_r, /* base type on right */ + op_u /* unary operator */ +}; + +struct self_t { }; +static const self_t self = self_t(); + +/// Type for an unused type slot +struct undefined_t { }; + +/// Don't warn about an unused variable +inline self_t __self() { return self; } + +/// base template of operator implementations +template struct op_impl { }; + +/// Operator implementation generator +template struct op_ { + template void execute(Class &cl, const Extra&... extra) const { + using Base = typename Class::type; + using L_type = conditional_t::value, Base, L>; + using R_type = conditional_t::value, Base, R>; + using op = op_impl; + cl.def(op::name(), &op::execute, is_operator(), extra...); + #if PY_MAJOR_VERSION < 3 + if (id == op_truediv || id == op_itruediv) + cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__", + &op::execute, is_operator(), extra...); + #endif + } + template void execute_cast(Class &cl, const Extra&... extra) const { + using Base = typename Class::type; + using L_type = conditional_t::value, Base, L>; + using R_type = conditional_t::value, Base, R>; + using op = op_impl; + cl.def(op::name(), &op::execute_cast, is_operator(), extra...); + #if PY_MAJOR_VERSION < 3 + if (id == op_truediv || id == op_itruediv) + cl.def(id == op_itruediv ? "__idiv__" : ot == op_l ? "__div__" : "__rdiv__", + &op::execute, is_operator(), extra...); + #endif + } +}; + +#define PYBIND11_BINARY_OPERATOR(id, rid, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(const L &l, const R &r) -> decltype(expr) { return (expr); } \ + static B execute_cast(const L &l, const R &r) { return B(expr); } \ +}; \ +template struct op_impl { \ + static char const* name() { return "__" #rid "__"; } \ + static auto execute(const R &r, const L &l) -> decltype(expr) { return (expr); } \ + static B execute_cast(const R &r, const L &l) { return B(expr); } \ +}; \ +inline op_ op(const self_t &, const self_t &) { \ + return op_(); \ +} \ +template op_ op(const self_t &, const T &) { \ + return op_(); \ +} \ +template op_ op(const T &, const self_t &) { \ + return op_(); \ +} + +#define PYBIND11_INPLACE_OPERATOR(id, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(L &l, const R &r) -> decltype(expr) { return expr; } \ + static B execute_cast(L &l, const R &r) { return B(expr); } \ +}; \ +template op_ op(const self_t &, const T &) { \ + return op_(); \ +} + +#define PYBIND11_UNARY_OPERATOR(id, op, expr) \ +template struct op_impl { \ + static char const* name() { return "__" #id "__"; } \ + static auto execute(const L &l) -> decltype(expr) { return expr; } \ + static B execute_cast(const L &l) { return B(expr); } \ +}; \ +inline op_ op(const self_t &) { \ + return op_(); \ +} + +PYBIND11_BINARY_OPERATOR(sub, rsub, operator-, l - r) +PYBIND11_BINARY_OPERATOR(add, radd, operator+, l + r) +PYBIND11_BINARY_OPERATOR(mul, rmul, operator*, l * r) +PYBIND11_BINARY_OPERATOR(truediv, rtruediv, operator/, l / r) +PYBIND11_BINARY_OPERATOR(mod, rmod, operator%, l % r) +PYBIND11_BINARY_OPERATOR(lshift, rlshift, operator<<, l << r) +PYBIND11_BINARY_OPERATOR(rshift, rrshift, operator>>, l >> r) +PYBIND11_BINARY_OPERATOR(and, rand, operator&, l & r) +PYBIND11_BINARY_OPERATOR(xor, rxor, operator^, l ^ r) +PYBIND11_BINARY_OPERATOR(eq, eq, operator==, l == r) +PYBIND11_BINARY_OPERATOR(ne, ne, operator!=, l != r) +PYBIND11_BINARY_OPERATOR(or, ror, operator|, l | r) +PYBIND11_BINARY_OPERATOR(gt, lt, operator>, l > r) +PYBIND11_BINARY_OPERATOR(ge, le, operator>=, l >= r) +PYBIND11_BINARY_OPERATOR(lt, gt, operator<, l < r) +PYBIND11_BINARY_OPERATOR(le, ge, operator<=, l <= r) +//PYBIND11_BINARY_OPERATOR(pow, rpow, pow, std::pow(l, r)) +PYBIND11_INPLACE_OPERATOR(iadd, operator+=, l += r) +PYBIND11_INPLACE_OPERATOR(isub, operator-=, l -= r) +PYBIND11_INPLACE_OPERATOR(imul, operator*=, l *= r) +PYBIND11_INPLACE_OPERATOR(itruediv, operator/=, l /= r) +PYBIND11_INPLACE_OPERATOR(imod, operator%=, l %= r) +PYBIND11_INPLACE_OPERATOR(ilshift, operator<<=, l <<= r) +PYBIND11_INPLACE_OPERATOR(irshift, operator>>=, l >>= r) +PYBIND11_INPLACE_OPERATOR(iand, operator&=, l &= r) +PYBIND11_INPLACE_OPERATOR(ixor, operator^=, l ^= r) +PYBIND11_INPLACE_OPERATOR(ior, operator|=, l |= r) +PYBIND11_UNARY_OPERATOR(neg, operator-, -l) +PYBIND11_UNARY_OPERATOR(pos, operator+, +l) +PYBIND11_UNARY_OPERATOR(abs, abs, std::abs(l)) +PYBIND11_UNARY_OPERATOR(hash, hash, std::hash()(l)) +PYBIND11_UNARY_OPERATOR(invert, operator~, (~l)) +PYBIND11_UNARY_OPERATOR(bool, operator!, !!l) +PYBIND11_UNARY_OPERATOR(int, int_, (int) l) +PYBIND11_UNARY_OPERATOR(float, float_, (double) l) + +#undef PYBIND11_BINARY_OPERATOR +#undef PYBIND11_INPLACE_OPERATOR +#undef PYBIND11_UNARY_OPERATOR +NAMESPACE_END(detail) + +using detail::self; + +NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif diff --git a/thirdparty/pybind11/include/pybind11/options.h b/thirdparty/pybind11/include/pybind11/options.h new file mode 100644 index 000000000..cc1e1f6f0 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/options.h @@ -0,0 +1,65 @@ +/* + pybind11/options.h: global settings that are configurable at runtime. + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +class options { +public: + + // Default RAII constructor, which leaves settings as they currently are. + options() : previous_state(global_state()) {} + + // Class is non-copyable. + options(const options&) = delete; + options& operator=(const options&) = delete; + + // Destructor, which restores settings that were in effect before. + ~options() { + global_state() = previous_state; + } + + // Setter methods (affect the global state): + + options& disable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = false; return *this; } + + options& enable_user_defined_docstrings() & { global_state().show_user_defined_docstrings = true; return *this; } + + options& disable_function_signatures() & { global_state().show_function_signatures = false; return *this; } + + options& enable_function_signatures() & { global_state().show_function_signatures = true; return *this; } + + // Getter methods (return the global state): + + static bool show_user_defined_docstrings() { return global_state().show_user_defined_docstrings; } + + static bool show_function_signatures() { return global_state().show_function_signatures; } + + // This type is not meant to be allocated on the heap. + void* operator new(size_t) = delete; + +private: + + struct state { + bool show_user_defined_docstrings = true; //< Include user-supplied texts in docstrings. + bool show_function_signatures = true; //< Include auto-generated function signatures in docstrings. + }; + + static state &global_state() { + static state instance; + return instance; + } + + state previous_state; +}; + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/pybind11.h b/thirdparty/pybind11/include/pybind11/pybind11.h new file mode 100644 index 000000000..d95d61f7b --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/pybind11.h @@ -0,0 +1,2183 @@ +/* + pybind11/pybind11.h: Main header file of the C++11 python + binding generator library + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#if defined(__INTEL_COMPILER) +# pragma warning push +# pragma warning disable 68 // integer conversion resulted in a change of sign +# pragma warning disable 186 // pointless comparison of unsigned integer with zero +# pragma warning disable 878 // incompatible exception specifications +# pragma warning disable 1334 // the "template" keyword used for syntactic disambiguation may only be used within a template +# pragma warning disable 1682 // implicit conversion of a 64-bit integral type to a smaller integral type (potential portability problem) +# pragma warning disable 1786 // function "strdup" was declared deprecated +# pragma warning disable 1875 // offsetof applied to non-POD (Plain Old Data) types is nonstandard +# pragma warning disable 2196 // warning #2196: routine is both "inline" and "noinline" +#elif defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable: 4100) // warning C4100: Unreferenced formal parameter +# pragma warning(disable: 4127) // warning C4127: Conditional expression is constant +# pragma warning(disable: 4512) // warning C4512: Assignment operator was implicitly defined as deleted +# pragma warning(disable: 4800) // warning C4800: 'int': forcing value to bool 'true' or 'false' (performance warning) +# pragma warning(disable: 4996) // warning C4996: The POSIX name for this item is deprecated. Instead, use the ISO C and C++ conformant name +# pragma warning(disable: 4702) // warning C4702: unreachable code +# pragma warning(disable: 4522) // warning C4522: multiple assignment operators specified +#elif defined(__GNUG__) && !defined(__clang__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-but-set-parameter" +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +# pragma GCC diagnostic ignored "-Wmissing-field-initializers" +# pragma GCC diagnostic ignored "-Wstrict-aliasing" +# pragma GCC diagnostic ignored "-Wattributes" +# if __GNUC__ >= 7 +# pragma GCC diagnostic ignored "-Wnoexcept-type" +# endif +#endif + +#include "attr.h" +#include "options.h" +#include "detail/class.h" +#include "detail/init.h" + +#if defined(__GNUG__) && !defined(__clang__) +# include +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/// Wraps an arbitrary C++ function/method/lambda function/.. into a callable Python object +class cpp_function : public function { +public: + cpp_function() { } + cpp_function(std::nullptr_t) { } + + /// Construct a cpp_function from a vanilla function pointer + template + cpp_function(Return (*f)(Args...), const Extra&... extra) { + initialize(f, f, extra...); + } + + /// Construct a cpp_function from a lambda function (possibly with internal state) + template ::value>> + cpp_function(Func &&f, const Extra&... extra) { + initialize(std::forward(f), + (detail::function_signature_t *) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (non-const) + template + cpp_function(Return (Class::*f)(Arg...), const Extra&... extra) { + initialize([f](Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + (Return (*) (Class *, Arg...)) nullptr, extra...); + } + + /// Construct a cpp_function from a class method (const) + template + cpp_function(Return (Class::*f)(Arg...) const, const Extra&... extra) { + initialize([f](const Class *c, Arg... args) -> Return { return (c->*f)(args...); }, + (Return (*)(const Class *, Arg ...)) nullptr, extra...); + } + + /// Return the function name + object name() const { return attr("__name__"); } + +protected: + /// Space optimization: don't inline this frequently instantiated fragment + PYBIND11_NOINLINE detail::function_record *make_function_record() { + return new detail::function_record(); + } + + /// Special internal constructor for functors, lambda functions, etc. + template + void initialize(Func &&f, Return (*)(Args...), const Extra&... extra) { + using namespace detail; + struct capture { remove_reference_t f; }; + + /* Store the function including any extra state it might have (e.g. a lambda capture object) */ + auto rec = make_function_record(); + + /* Store the capture object directly in the function record if there is enough space */ + if (sizeof(capture) <= sizeof(rec->data)) { + /* Without these pragmas, GCC warns that there might not be + enough space to use the placement new operator. However, the + 'if' statement above ensures that this is the case. */ +#if defined(__GNUG__) && !defined(__clang__) && __GNUC__ >= 6 +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wplacement-new" +#endif + new ((capture *) &rec->data) capture { std::forward(f) }; +#if defined(__GNUG__) && !defined(__clang__) && __GNUC__ >= 6 +# pragma GCC diagnostic pop +#endif + if (!std::is_trivially_destructible::value) + rec->free_data = [](function_record *r) { ((capture *) &r->data)->~capture(); }; + } else { + rec->data[0] = new capture { std::forward(f) }; + rec->free_data = [](function_record *r) { delete ((capture *) r->data[0]); }; + } + + /* Type casters for the function arguments and return value */ + using cast_in = argument_loader; + using cast_out = make_caster< + conditional_t::value, void_type, Return> + >; + + static_assert(expected_num_args(sizeof...(Args), cast_in::has_args, cast_in::has_kwargs), + "The number of argument annotations does not match the number of function arguments"); + + /* Dispatch code which converts function arguments and performs the actual function call */ + rec->impl = [](function_call &call) -> handle { + cast_in args_converter; + + /* Try to cast the function arguments into the C++ domain */ + if (!args_converter.load_args(call)) + return PYBIND11_TRY_NEXT_OVERLOAD; + + /* Invoke call policy pre-call hook */ + process_attributes::precall(call); + + /* Get a pointer to the capture object */ + auto data = (sizeof(capture) <= sizeof(call.func.data) + ? &call.func.data : call.func.data[0]); + capture *cap = const_cast(reinterpret_cast(data)); + + /* Override policy for rvalues -- usually to enforce rvp::move on an rvalue */ + return_value_policy policy = return_value_policy_override::policy(call.func.policy); + + /* Function scope guard -- defaults to the compile-to-nothing `void_type` */ + using Guard = extract_guard_t; + + /* Perform the function call */ + handle result = cast_out::cast( + std::move(args_converter).template call(cap->f), policy, call.parent); + + /* Invoke call policy post-call hook */ + process_attributes::postcall(call, result); + + return result; + }; + + /* Process any user-provided function attributes */ + process_attributes::init(extra..., rec); + + /* Generate a readable signature describing the function's arguments and return value types */ + static constexpr auto signature = _("(") + cast_in::arg_names + _(") -> ") + cast_out::name; + PYBIND11_DESCR_CONSTEXPR auto types = decltype(signature)::types(); + + /* Register the function with Python from generic (non-templated) code */ + initialize_generic(rec, signature.text, types.data(), sizeof...(Args)); + + if (cast_in::has_args) rec->has_args = true; + if (cast_in::has_kwargs) rec->has_kwargs = true; + + /* Stash some additional information used by an important optimization in 'functional.h' */ + using FunctionType = Return (*)(Args...); + constexpr bool is_function_ptr = + std::is_convertible::value && + sizeof(capture) == sizeof(void *); + if (is_function_ptr) { + rec->is_stateless = true; + rec->data[1] = const_cast(reinterpret_cast(&typeid(FunctionType))); + } + } + + /// Register a function call with Python (generic non-templated code goes here) + void initialize_generic(detail::function_record *rec, const char *text, + const std::type_info *const *types, size_t args) { + + /* Create copies of all referenced C-style strings */ + rec->name = strdup(rec->name ? rec->name : ""); + if (rec->doc) rec->doc = strdup(rec->doc); + for (auto &a: rec->args) { + if (a.name) + a.name = strdup(a.name); + if (a.descr) + a.descr = strdup(a.descr); + else if (a.value) + a.descr = strdup(a.value.attr("__repr__")().cast().c_str()); + } + + rec->is_constructor = !strcmp(rec->name, "__init__") || !strcmp(rec->name, "__setstate__"); + +#if !defined(NDEBUG) && !defined(PYBIND11_DISABLE_NEW_STYLE_INIT_WARNING) + if (rec->is_constructor && !rec->is_new_style_constructor) { + const auto class_name = std::string(((PyTypeObject *) rec->scope.ptr())->tp_name); + const auto func_name = std::string(rec->name); + PyErr_WarnEx( + PyExc_FutureWarning, + ("pybind11-bound class '" + class_name + "' is using an old-style " + "placement-new '" + func_name + "' which has been deprecated. See " + "the upgrade guide in pybind11's docs. This message is only visible " + "when compiled in debug mode.").c_str(), 0 + ); + } +#endif + + /* Generate a proper function signature */ + std::string signature; + size_t type_index = 0, arg_index = 0; + for (auto *pc = text; *pc != '\0'; ++pc) { + const auto c = *pc; + + if (c == '{') { + // Write arg name for everything except *args and **kwargs. + if (*(pc + 1) == '*') + continue; + + if (arg_index < rec->args.size() && rec->args[arg_index].name) { + signature += rec->args[arg_index].name; + } else if (arg_index == 0 && rec->is_method) { + signature += "self"; + } else { + signature += "arg" + std::to_string(arg_index - (rec->is_method ? 1 : 0)); + } + signature += ": "; + } else if (c == '}') { + // Write default value if available. + if (arg_index < rec->args.size() && rec->args[arg_index].descr) { + signature += " = "; + signature += rec->args[arg_index].descr; + } + arg_index++; + } else if (c == '%') { + const std::type_info *t = types[type_index++]; + if (!t) + pybind11_fail("Internal error while parsing type signature (1)"); + if (auto tinfo = detail::get_type_info(*t)) { + handle th((PyObject *) tinfo->type); + signature += + th.attr("__module__").cast() + "." + + th.attr("__qualname__").cast(); // Python 3.3+, but we backport it to earlier versions + } else if (rec->is_new_style_constructor && arg_index == 0) { + // A new-style `__init__` takes `self` as `value_and_holder`. + // Rewrite it to the proper class type. + signature += + rec->scope.attr("__module__").cast() + "." + + rec->scope.attr("__qualname__").cast(); + } else { + std::string tname(t->name()); + detail::clean_type_id(tname); + signature += tname; + } + } else { + signature += c; + } + } + if (arg_index != args || types[type_index] != nullptr) + pybind11_fail("Internal error while parsing type signature (2)"); + +#if PY_MAJOR_VERSION < 3 + if (strcmp(rec->name, "__next__") == 0) { + std::free(rec->name); + rec->name = strdup("next"); + } else if (strcmp(rec->name, "__bool__") == 0) { + std::free(rec->name); + rec->name = strdup("__nonzero__"); + } +#endif + rec->signature = strdup(signature.c_str()); + rec->args.shrink_to_fit(); + rec->nargs = (std::uint16_t) args; + + if (rec->sibling && PYBIND11_INSTANCE_METHOD_CHECK(rec->sibling.ptr())) + rec->sibling = PYBIND11_INSTANCE_METHOD_GET_FUNCTION(rec->sibling.ptr()); + + detail::function_record *chain = nullptr, *chain_start = rec; + if (rec->sibling) { + if (PyCFunction_Check(rec->sibling.ptr())) { + auto rec_capsule = reinterpret_borrow(PyCFunction_GET_SELF(rec->sibling.ptr())); + chain = (detail::function_record *) rec_capsule; + /* Never append a method to an overload chain of a parent class; + instead, hide the parent's overloads in this case */ + if (!chain->scope.is(rec->scope)) + chain = nullptr; + } + // Don't trigger for things like the default __init__, which are wrapper_descriptors that we are intentionally replacing + else if (!rec->sibling.is_none() && rec->name[0] != '_') + pybind11_fail("Cannot overload existing non-function object \"" + std::string(rec->name) + + "\" with a function of the same name"); + } + + if (!chain) { + /* No existing overload was found, create a new function object */ + rec->def = new PyMethodDef(); + std::memset(rec->def, 0, sizeof(PyMethodDef)); + rec->def->ml_name = rec->name; + rec->def->ml_meth = reinterpret_cast(reinterpret_cast(*dispatcher)); + rec->def->ml_flags = METH_VARARGS | METH_KEYWORDS; + + capsule rec_capsule(rec, [](void *ptr) { + destruct((detail::function_record *) ptr); + }); + + object scope_module; + if (rec->scope) { + if (hasattr(rec->scope, "__module__")) { + scope_module = rec->scope.attr("__module__"); + } else if (hasattr(rec->scope, "__name__")) { + scope_module = rec->scope.attr("__name__"); + } + } + + m_ptr = PyCFunction_NewEx(rec->def, rec_capsule.ptr(), scope_module.ptr()); + if (!m_ptr) + pybind11_fail("cpp_function::cpp_function(): Could not allocate function object"); + } else { + /* Append at the end of the overload chain */ + m_ptr = rec->sibling.ptr(); + inc_ref(); + chain_start = chain; + if (chain->is_method != rec->is_method) + pybind11_fail("overloading a method with both static and instance methods is not supported; " + #if defined(NDEBUG) + "compile in debug mode for more details" + #else + "error while attempting to bind " + std::string(rec->is_method ? "instance" : "static") + " method " + + std::string(pybind11::str(rec->scope.attr("__name__"))) + "." + std::string(rec->name) + signature + #endif + ); + while (chain->next) + chain = chain->next; + chain->next = rec; + } + + std::string signatures; + int index = 0; + /* Create a nice pydoc rec including all signatures and + docstrings of the functions in the overload chain */ + if (chain && options::show_function_signatures()) { + // First a generic signature + signatures += rec->name; + signatures += "(*args, **kwargs)\n"; + signatures += "Overloaded function.\n\n"; + } + // Then specific overload signatures + bool first_user_def = true; + for (auto it = chain_start; it != nullptr; it = it->next) { + if (options::show_function_signatures()) { + if (index > 0) signatures += "\n"; + if (chain) + signatures += std::to_string(++index) + ". "; + signatures += rec->name; + signatures += it->signature; + signatures += "\n"; + } + if (it->doc && strlen(it->doc) > 0 && options::show_user_defined_docstrings()) { + // If we're appending another docstring, and aren't printing function signatures, we + // need to append a newline first: + if (!options::show_function_signatures()) { + if (first_user_def) first_user_def = false; + else signatures += "\n"; + } + if (options::show_function_signatures()) signatures += "\n"; + signatures += it->doc; + if (options::show_function_signatures()) signatures += "\n"; + } + } + + /* Install docstring */ + PyCFunctionObject *func = (PyCFunctionObject *) m_ptr; + if (func->m_ml->ml_doc) + std::free(const_cast(func->m_ml->ml_doc)); + func->m_ml->ml_doc = strdup(signatures.c_str()); + + if (rec->is_method) { + m_ptr = PYBIND11_INSTANCE_METHOD_NEW(m_ptr, rec->scope.ptr()); + if (!m_ptr) + pybind11_fail("cpp_function::cpp_function(): Could not allocate instance method object"); + Py_DECREF(func); + } + } + + /// When a cpp_function is GCed, release any memory allocated by pybind11 + static void destruct(detail::function_record *rec) { + while (rec) { + detail::function_record *next = rec->next; + if (rec->free_data) + rec->free_data(rec); + std::free((char *) rec->name); + std::free((char *) rec->doc); + std::free((char *) rec->signature); + for (auto &arg: rec->args) { + std::free(const_cast(arg.name)); + std::free(const_cast(arg.descr)); + arg.value.dec_ref(); + } + if (rec->def) { + std::free(const_cast(rec->def->ml_doc)); + delete rec->def; + } + delete rec; + rec = next; + } + } + + /// Main dispatch logic for calls to functions bound using pybind11 + static PyObject *dispatcher(PyObject *self, PyObject *args_in, PyObject *kwargs_in) { + using namespace detail; + + /* Iterator over the list of potentially admissible overloads */ + const function_record *overloads = (function_record *) PyCapsule_GetPointer(self, nullptr), + *it = overloads; + + /* Need to know how many arguments + keyword arguments there are to pick the right overload */ + const size_t n_args_in = (size_t) PyTuple_GET_SIZE(args_in); + + handle parent = n_args_in > 0 ? PyTuple_GET_ITEM(args_in, 0) : nullptr, + result = PYBIND11_TRY_NEXT_OVERLOAD; + + auto self_value_and_holder = value_and_holder(); + if (overloads->is_constructor) { + const auto tinfo = get_type_info((PyTypeObject *) overloads->scope.ptr()); + const auto pi = reinterpret_cast(parent.ptr()); + self_value_and_holder = pi->get_value_and_holder(tinfo, false); + + if (!self_value_and_holder.type || !self_value_and_holder.inst) { + PyErr_SetString(PyExc_TypeError, "__init__(self, ...) called with invalid `self` argument"); + return nullptr; + } + + // If this value is already registered it must mean __init__ is invoked multiple times; + // we really can't support that in C++, so just ignore the second __init__. + if (self_value_and_holder.instance_registered()) + return none().release().ptr(); + } + + try { + // We do this in two passes: in the first pass, we load arguments with `convert=false`; + // in the second, we allow conversion (except for arguments with an explicit + // py::arg().noconvert()). This lets us prefer calls without conversion, with + // conversion as a fallback. + std::vector second_pass; + + // However, if there are no overloads, we can just skip the no-convert pass entirely + const bool overloaded = it != nullptr && it->next != nullptr; + + for (; it != nullptr; it = it->next) { + + /* For each overload: + 1. Copy all positional arguments we were given, also checking to make sure that + named positional arguments weren't *also* specified via kwarg. + 2. If we weren't given enough, try to make up the omitted ones by checking + whether they were provided by a kwarg matching the `py::arg("name")` name. If + so, use it (and remove it from kwargs; if not, see if the function binding + provided a default that we can use. + 3. Ensure that either all keyword arguments were "consumed", or that the function + takes a kwargs argument to accept unconsumed kwargs. + 4. Any positional arguments still left get put into a tuple (for args), and any + leftover kwargs get put into a dict. + 5. Pack everything into a vector; if we have py::args or py::kwargs, they are an + extra tuple or dict at the end of the positional arguments. + 6. Call the function call dispatcher (function_record::impl) + + If one of these fail, move on to the next overload and keep trying until we get a + result other than PYBIND11_TRY_NEXT_OVERLOAD. + */ + + const function_record &func = *it; + size_t pos_args = func.nargs; // Number of positional arguments that we need + if (func.has_args) --pos_args; // (but don't count py::args + if (func.has_kwargs) --pos_args; // or py::kwargs) + + if (!func.has_args && n_args_in > pos_args) + continue; // Too many arguments for this overload + + if (n_args_in < pos_args && func.args.size() < pos_args) + continue; // Not enough arguments given, and not enough defaults to fill in the blanks + + function_call call(func, parent); + + size_t args_to_copy = (std::min)(pos_args, n_args_in); // Protect std::min with parentheses + size_t args_copied = 0; + + // 0. Inject new-style `self` argument + if (func.is_new_style_constructor) { + // The `value` may have been preallocated by an old-style `__init__` + // if it was a preceding candidate for overload resolution. + if (self_value_and_holder) + self_value_and_holder.type->dealloc(self_value_and_holder); + + call.init_self = PyTuple_GET_ITEM(args_in, 0); + call.args.push_back(reinterpret_cast(&self_value_and_holder)); + call.args_convert.push_back(false); + ++args_copied; + } + + // 1. Copy any position arguments given. + bool bad_arg = false; + for (; args_copied < args_to_copy; ++args_copied) { + const argument_record *arg_rec = args_copied < func.args.size() ? &func.args[args_copied] : nullptr; + if (kwargs_in && arg_rec && arg_rec->name && PyDict_GetItemString(kwargs_in, arg_rec->name)) { + bad_arg = true; + break; + } + + handle arg(PyTuple_GET_ITEM(args_in, args_copied)); + if (arg_rec && !arg_rec->none && arg.is_none()) { + bad_arg = true; + break; + } + call.args.push_back(arg); + call.args_convert.push_back(arg_rec ? arg_rec->convert : true); + } + if (bad_arg) + continue; // Maybe it was meant for another overload (issue #688) + + // We'll need to copy this if we steal some kwargs for defaults + dict kwargs = reinterpret_borrow(kwargs_in); + + // 2. Check kwargs and, failing that, defaults that may help complete the list + if (args_copied < pos_args) { + bool copied_kwargs = false; + + for (; args_copied < pos_args; ++args_copied) { + const auto &arg = func.args[args_copied]; + + handle value; + if (kwargs_in && arg.name) + value = PyDict_GetItemString(kwargs.ptr(), arg.name); + + if (value) { + // Consume a kwargs value + if (!copied_kwargs) { + kwargs = reinterpret_steal(PyDict_Copy(kwargs.ptr())); + copied_kwargs = true; + } + PyDict_DelItemString(kwargs.ptr(), arg.name); + } else if (arg.value) { + value = arg.value; + } + + if (value) { + call.args.push_back(value); + call.args_convert.push_back(arg.convert); + } + else + break; + } + + if (args_copied < pos_args) + continue; // Not enough arguments, defaults, or kwargs to fill the positional arguments + } + + // 3. Check everything was consumed (unless we have a kwargs arg) + if (kwargs && kwargs.size() > 0 && !func.has_kwargs) + continue; // Unconsumed kwargs, but no py::kwargs argument to accept them + + // 4a. If we have a py::args argument, create a new tuple with leftovers + if (func.has_args) { + tuple extra_args; + if (args_to_copy == 0) { + // We didn't copy out any position arguments from the args_in tuple, so we + // can reuse it directly without copying: + extra_args = reinterpret_borrow(args_in); + } else if (args_copied >= n_args_in) { + extra_args = tuple(0); + } else { + size_t args_size = n_args_in - args_copied; + extra_args = tuple(args_size); + for (size_t i = 0; i < args_size; ++i) { + extra_args[i] = PyTuple_GET_ITEM(args_in, args_copied + i); + } + } + call.args.push_back(extra_args); + call.args_convert.push_back(false); + call.args_ref = std::move(extra_args); + } + + // 4b. If we have a py::kwargs, pass on any remaining kwargs + if (func.has_kwargs) { + if (!kwargs.ptr()) + kwargs = dict(); // If we didn't get one, send an empty one + call.args.push_back(kwargs); + call.args_convert.push_back(false); + call.kwargs_ref = std::move(kwargs); + } + + // 5. Put everything in a vector. Not technically step 5, we've been building it + // in `call.args` all along. + #if !defined(NDEBUG) + if (call.args.size() != func.nargs || call.args_convert.size() != func.nargs) + pybind11_fail("Internal error: function call dispatcher inserted wrong number of arguments!"); + #endif + + std::vector second_pass_convert; + if (overloaded) { + // We're in the first no-convert pass, so swap out the conversion flags for a + // set of all-false flags. If the call fails, we'll swap the flags back in for + // the conversion-allowed call below. + second_pass_convert.resize(func.nargs, false); + call.args_convert.swap(second_pass_convert); + } + + // 6. Call the function. + try { + loader_life_support guard{}; + result = func.impl(call); + } catch (reference_cast_error &) { + result = PYBIND11_TRY_NEXT_OVERLOAD; + } + + if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) + break; + + if (overloaded) { + // The (overloaded) call failed; if the call has at least one argument that + // permits conversion (i.e. it hasn't been explicitly specified `.noconvert()`) + // then add this call to the list of second pass overloads to try. + for (size_t i = func.is_method ? 1 : 0; i < pos_args; i++) { + if (second_pass_convert[i]) { + // Found one: swap the converting flags back in and store the call for + // the second pass. + call.args_convert.swap(second_pass_convert); + second_pass.push_back(std::move(call)); + break; + } + } + } + } + + if (overloaded && !second_pass.empty() && result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) { + // The no-conversion pass finished without success, try again with conversion allowed + for (auto &call : second_pass) { + try { + loader_life_support guard{}; + result = call.func.impl(call); + } catch (reference_cast_error &) { + result = PYBIND11_TRY_NEXT_OVERLOAD; + } + + if (result.ptr() != PYBIND11_TRY_NEXT_OVERLOAD) { + // The error reporting logic below expects 'it' to be valid, as it would be + // if we'd encountered this failure in the first-pass loop. + if (!result) + it = &call.func; + break; + } + } + } + } catch (error_already_set &e) { + e.restore(); + return nullptr; +#if defined(__GNUG__) && !defined(__clang__) + } catch ( abi::__forced_unwind& ) { + throw; +#endif + } catch (...) { + /* When an exception is caught, give each registered exception + translator a chance to translate it to a Python exception + in reverse order of registration. + + A translator may choose to do one of the following: + + - catch the exception and call PyErr_SetString or PyErr_SetObject + to set a standard (or custom) Python exception, or + - do nothing and let the exception fall through to the next translator, or + - delegate translation to the next translator by throwing a new type of exception. */ + + auto last_exception = std::current_exception(); + auto ®istered_exception_translators = get_internals().registered_exception_translators; + for (auto& translator : registered_exception_translators) { + try { + translator(last_exception); + } catch (...) { + last_exception = std::current_exception(); + continue; + } + return nullptr; + } + PyErr_SetString(PyExc_SystemError, "Exception escaped from default exception translator!"); + return nullptr; + } + + auto append_note_if_missing_header_is_suspected = [](std::string &msg) { + if (msg.find("std::") != std::string::npos) { + msg += "\n\n" + "Did you forget to `#include `? Or ,\n" + ", , etc. Some automatic\n" + "conversions are optional and require extra headers to be included\n" + "when compiling your pybind11 module."; + } + }; + + if (result.ptr() == PYBIND11_TRY_NEXT_OVERLOAD) { + if (overloads->is_operator) + return handle(Py_NotImplemented).inc_ref().ptr(); + + std::string msg = std::string(overloads->name) + "(): incompatible " + + std::string(overloads->is_constructor ? "constructor" : "function") + + " arguments. The following argument types are supported:\n"; + + int ctr = 0; + for (const function_record *it2 = overloads; it2 != nullptr; it2 = it2->next) { + msg += " "+ std::to_string(++ctr) + ". "; + + bool wrote_sig = false; + if (overloads->is_constructor) { + // For a constructor, rewrite `(self: Object, arg0, ...) -> NoneType` as `Object(arg0, ...)` + std::string sig = it2->signature; + size_t start = sig.find('(') + 7; // skip "(self: " + if (start < sig.size()) { + // End at the , for the next argument + size_t end = sig.find(", "), next = end + 2; + size_t ret = sig.rfind(" -> "); + // Or the ), if there is no comma: + if (end >= sig.size()) next = end = sig.find(')'); + if (start < end && next < sig.size()) { + msg.append(sig, start, end - start); + msg += '('; + msg.append(sig, next, ret - next); + wrote_sig = true; + } + } + } + if (!wrote_sig) msg += it2->signature; + + msg += "\n"; + } + msg += "\nInvoked with: "; + auto args_ = reinterpret_borrow(args_in); + bool some_args = false; + for (size_t ti = overloads->is_constructor ? 1 : 0; ti < args_.size(); ++ti) { + if (!some_args) some_args = true; + else msg += ", "; + msg += pybind11::repr(args_[ti]); + } + if (kwargs_in) { + auto kwargs = reinterpret_borrow(kwargs_in); + if (kwargs.size() > 0) { + if (some_args) msg += "; "; + msg += "kwargs: "; + bool first = true; + for (auto kwarg : kwargs) { + if (first) first = false; + else msg += ", "; + msg += pybind11::str("{}={!r}").format(kwarg.first, kwarg.second); + } + } + } + + append_note_if_missing_header_is_suspected(msg); + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return nullptr; + } else if (!result) { + std::string msg = "Unable to convert function return value to a " + "Python type! The signature was\n\t"; + msg += it->signature; + append_note_if_missing_header_is_suspected(msg); + PyErr_SetString(PyExc_TypeError, msg.c_str()); + return nullptr; + } else { + if (overloads->is_constructor && !self_value_and_holder.holder_constructed()) { + auto *pi = reinterpret_cast(parent.ptr()); + self_value_and_holder.type->init_instance(pi, nullptr); + } + return result.ptr(); + } + } +}; + +/// Wrapper for Python extension modules +class module : public object { +public: + PYBIND11_OBJECT_DEFAULT(module, object, PyModule_Check) + + /// Create a new top-level Python module with the given name and docstring + explicit module(const char *name, const char *doc = nullptr) { + if (!options::show_user_defined_docstrings()) doc = nullptr; +#if PY_MAJOR_VERSION >= 3 + PyModuleDef *def = new PyModuleDef(); + std::memset(def, 0, sizeof(PyModuleDef)); + def->m_name = name; + def->m_doc = doc; + def->m_size = -1; + Py_INCREF(def); + m_ptr = PyModule_Create(def); +#else + m_ptr = Py_InitModule3(name, nullptr, doc); +#endif + if (m_ptr == nullptr) + pybind11_fail("Internal error in module::module()"); + inc_ref(); + } + + /** \rst + Create Python binding for a new function within the module scope. ``Func`` + can be a plain C++ function, a function pointer, or a lambda function. For + details on the ``Extra&& ... extra`` argument, see section :ref:`extras`. + \endrst */ + template + module &def(const char *name_, Func &&f, const Extra& ... extra) { + cpp_function func(std::forward(f), name(name_), scope(*this), + sibling(getattr(*this, name_, none())), extra...); + // NB: allow overwriting here because cpp_function sets up a chain with the intention of + // overwriting (and has already checked internally that it isn't overwriting non-functions). + add_object(name_, func, true /* overwrite */); + return *this; + } + + /** \rst + Create and return a new Python submodule with the given name and docstring. + This also works recursively, i.e. + + .. code-block:: cpp + + py::module m("example", "pybind11 example plugin"); + py::module m2 = m.def_submodule("sub", "A submodule of 'example'"); + py::module m3 = m2.def_submodule("subsub", "A submodule of 'example.sub'"); + \endrst */ + module def_submodule(const char *name, const char *doc = nullptr) { + std::string full_name = std::string(PyModule_GetName(m_ptr)) + + std::string(".") + std::string(name); + auto result = reinterpret_borrow(PyImport_AddModule(full_name.c_str())); + if (doc && options::show_user_defined_docstrings()) + result.attr("__doc__") = pybind11::str(doc); + attr(name) = result; + return result; + } + + /// Import and return a module or throws `error_already_set`. + static module import(const char *name) { + PyObject *obj = PyImport_ImportModule(name); + if (!obj) + throw error_already_set(); + return reinterpret_steal(obj); + } + + /// Reload the module or throws `error_already_set`. + void reload() { + PyObject *obj = PyImport_ReloadModule(ptr()); + if (!obj) + throw error_already_set(); + *this = reinterpret_steal(obj); + } + + // Adds an object to the module using the given name. Throws if an object with the given name + // already exists. + // + // overwrite should almost always be false: attempting to overwrite objects that pybind11 has + // established will, in most cases, break things. + PYBIND11_NOINLINE void add_object(const char *name, handle obj, bool overwrite = false) { + if (!overwrite && hasattr(*this, name)) + pybind11_fail("Error during initialization: multiple incompatible definitions with name \"" + + std::string(name) + "\""); + + PyModule_AddObject(ptr(), name, obj.inc_ref().ptr() /* steals a reference */); + } +}; + +/// \ingroup python_builtins +/// Return a dictionary representing the global variables in the current execution frame, +/// or ``__main__.__dict__`` if there is no frame (usually when the interpreter is embedded). +inline dict globals() { + PyObject *p = PyEval_GetGlobals(); + return reinterpret_borrow(p ? p : module::import("__main__").attr("__dict__").ptr()); +} + +NAMESPACE_BEGIN(detail) +/// Generic support for creating new Python heap types +class generic_type : public object { + template friend class class_; +public: + PYBIND11_OBJECT_DEFAULT(generic_type, object, PyType_Check) +protected: + void initialize(const type_record &rec) { + if (rec.scope && hasattr(rec.scope, rec.name)) + pybind11_fail("generic_type: cannot initialize type \"" + std::string(rec.name) + + "\": an object with that name is already defined"); + + if (rec.module_local ? get_local_type_info(*rec.type) : get_global_type_info(*rec.type)) + pybind11_fail("generic_type: type \"" + std::string(rec.name) + + "\" is already registered!"); + + m_ptr = make_new_python_type(rec); + + /* Register supplemental type information in C++ dict */ + auto *tinfo = new detail::type_info(); + tinfo->type = (PyTypeObject *) m_ptr; + tinfo->cpptype = rec.type; + tinfo->type_size = rec.type_size; + tinfo->type_align = rec.type_align; + tinfo->operator_new = rec.operator_new; + tinfo->holder_size_in_ptrs = size_in_ptrs(rec.holder_size); + tinfo->init_instance = rec.init_instance; + tinfo->dealloc = rec.dealloc; + tinfo->simple_type = true; + tinfo->simple_ancestors = true; + tinfo->default_holder = rec.default_holder; + tinfo->module_local = rec.module_local; + + auto &internals = get_internals(); + auto tindex = std::type_index(*rec.type); + tinfo->direct_conversions = &internals.direct_conversions[tindex]; + if (rec.module_local) + registered_local_types_cpp()[tindex] = tinfo; + else + internals.registered_types_cpp[tindex] = tinfo; + internals.registered_types_py[(PyTypeObject *) m_ptr] = { tinfo }; + + if (rec.bases.size() > 1 || rec.multiple_inheritance) { + mark_parents_nonsimple(tinfo->type); + tinfo->simple_ancestors = false; + } + else if (rec.bases.size() == 1) { + auto parent_tinfo = get_type_info((PyTypeObject *) rec.bases[0].ptr()); + tinfo->simple_ancestors = parent_tinfo->simple_ancestors; + } + + if (rec.module_local) { + // Stash the local typeinfo and loader so that external modules can access it. + tinfo->module_local_load = &type_caster_generic::local_load; + setattr(m_ptr, PYBIND11_MODULE_LOCAL_ID, capsule(tinfo)); + } + } + + /// Helper function which tags all parents of a type using mult. inheritance + void mark_parents_nonsimple(PyTypeObject *value) { + auto t = reinterpret_borrow(value->tp_bases); + for (handle h : t) { + auto tinfo2 = get_type_info((PyTypeObject *) h.ptr()); + if (tinfo2) + tinfo2->simple_type = false; + mark_parents_nonsimple((PyTypeObject *) h.ptr()); + } + } + + void install_buffer_funcs( + buffer_info *(*get_buffer)(PyObject *, void *), + void *get_buffer_data) { + PyHeapTypeObject *type = (PyHeapTypeObject*) m_ptr; + auto tinfo = detail::get_type_info(&type->ht_type); + + if (!type->ht_type.tp_as_buffer) + pybind11_fail( + "To be able to register buffer protocol support for the type '" + + std::string(tinfo->type->tp_name) + + "' the associated class<>(..) invocation must " + "include the pybind11::buffer_protocol() annotation!"); + + tinfo->get_buffer = get_buffer; + tinfo->get_buffer_data = get_buffer_data; + } + + // rec_func must be set for either fget or fset. + void def_property_static_impl(const char *name, + handle fget, handle fset, + detail::function_record *rec_func) { + const auto is_static = rec_func && !(rec_func->is_method && rec_func->scope); + const auto has_doc = rec_func && rec_func->doc && pybind11::options::show_user_defined_docstrings(); + auto property = handle((PyObject *) (is_static ? get_internals().static_property_type + : &PyProperty_Type)); + attr(name) = property(fget.ptr() ? fget : none(), + fset.ptr() ? fset : none(), + /*deleter*/none(), + pybind11::str(has_doc ? rec_func->doc : "")); + } +}; + +/// Set the pointer to operator new if it exists. The cast is needed because it can be overloaded. +template (T::operator new))>> +void set_operator_new(type_record *r) { r->operator_new = &T::operator new; } + +template void set_operator_new(...) { } + +template struct has_operator_delete : std::false_type { }; +template struct has_operator_delete(T::operator delete))>> + : std::true_type { }; +template struct has_operator_delete_size : std::false_type { }; +template struct has_operator_delete_size(T::operator delete))>> + : std::true_type { }; +/// Call class-specific delete if it exists or global otherwise. Can also be an overload set. +template ::value, int> = 0> +void call_operator_delete(T *p, size_t, size_t) { T::operator delete(p); } +template ::value && has_operator_delete_size::value, int> = 0> +void call_operator_delete(T *p, size_t s, size_t) { T::operator delete(p, s); } + +inline void call_operator_delete(void *p, size_t s, size_t a) { + (void)s; (void)a; + #if defined(__cpp_aligned_new) && (!defined(_MSC_VER) || _MSC_VER >= 1912) + if (a > __STDCPP_DEFAULT_NEW_ALIGNMENT__) { + #ifdef __cpp_sized_deallocation + ::operator delete(p, s, std::align_val_t(a)); + #else + ::operator delete(p, std::align_val_t(a)); + #endif + return; + } + #endif + #ifdef __cpp_sized_deallocation + ::operator delete(p, s); + #else + ::operator delete(p); + #endif +} + +NAMESPACE_END(detail) + +/// Given a pointer to a member function, cast it to its `Derived` version. +/// Forward everything else unchanged. +template +auto method_adaptor(F &&f) -> decltype(std::forward(f)) { return std::forward(f); } + +template +auto method_adaptor(Return (Class::*pmf)(Args...)) -> Return (Derived::*)(Args...) { + static_assert(detail::is_accessible_base_of::value, + "Cannot bind an inaccessible base class method; use a lambda definition instead"); + return pmf; +} + +template +auto method_adaptor(Return (Class::*pmf)(Args...) const) -> Return (Derived::*)(Args...) const { + static_assert(detail::is_accessible_base_of::value, + "Cannot bind an inaccessible base class method; use a lambda definition instead"); + return pmf; +} + +template +class class_ : public detail::generic_type { + template using is_holder = detail::is_holder_type; + template using is_subtype = detail::is_strict_base_of; + template using is_base = detail::is_strict_base_of; + // struct instead of using here to help MSVC: + template struct is_valid_class_option : + detail::any_of, is_subtype, is_base> {}; + +public: + using type = type_; + using type_alias = detail::exactly_one_t; + constexpr static bool has_alias = !std::is_void::value; + using holder_type = detail::exactly_one_t, options...>; + + static_assert(detail::all_of...>::value, + "Unknown/invalid class_ template parameters provided"); + + static_assert(!has_alias || std::is_polymorphic::value, + "Cannot use an alias class with a non-polymorphic type"); + + PYBIND11_OBJECT(class_, generic_type, PyType_Check) + + template + class_(handle scope, const char *name, const Extra &... extra) { + using namespace detail; + + // MI can only be specified via class_ template options, not constructor parameters + static_assert( + none_of...>::value || // no base class arguments, or: + ( constexpr_sum(is_pyobject::value...) == 1 && // Exactly one base + constexpr_sum(is_base::value...) == 0 && // no template option bases + none_of...>::value), // no multiple_inheritance attr + "Error: multiple inheritance bases must be specified via class_ template options"); + + type_record record; + record.scope = scope; + record.name = name; + record.type = &typeid(type); + record.type_size = sizeof(conditional_t); + record.type_align = alignof(conditional_t&); + record.holder_size = sizeof(holder_type); + record.init_instance = init_instance; + record.dealloc = dealloc; + record.default_holder = detail::is_instantiation::value; + + set_operator_new(&record); + + /* Register base classes specified via template arguments to class_, if any */ + PYBIND11_EXPAND_SIDE_EFFECTS(add_base(record)); + + /* Process optional arguments, if any */ + process_attributes::init(extra..., &record); + + generic_type::initialize(record); + + if (has_alias) { + auto &instances = record.module_local ? registered_local_types_cpp() : get_internals().registered_types_cpp; + instances[std::type_index(typeid(type_alias))] = instances[std::type_index(typeid(type))]; + } + } + + template ::value, int> = 0> + static void add_base(detail::type_record &rec) { + rec.add_base(typeid(Base), [](void *src) -> void * { + return static_cast(reinterpret_cast(src)); + }); + } + + template ::value, int> = 0> + static void add_base(detail::type_record &) { } + + template + class_ &def(const char *name_, Func&& f, const Extra&... extra) { + cpp_function cf(method_adaptor(std::forward(f)), name(name_), is_method(*this), + sibling(getattr(*this, name_, none())), extra...); + attr(cf.name()) = cf; + return *this; + } + + template class_ & + def_static(const char *name_, Func &&f, const Extra&... extra) { + static_assert(!std::is_member_function_pointer::value, + "def_static(...) called with a non-static member function pointer"); + cpp_function cf(std::forward(f), name(name_), scope(*this), + sibling(getattr(*this, name_, none())), extra...); + attr(cf.name()) = staticmethod(cf); + return *this; + } + + template + class_ &def(const detail::op_ &op, const Extra&... extra) { + op.execute(*this, extra...); + return *this; + } + + template + class_ & def_cast(const detail::op_ &op, const Extra&... extra) { + op.execute_cast(*this, extra...); + return *this; + } + + template + class_ &def(const detail::initimpl::constructor &init, const Extra&... extra) { + init.execute(*this, extra...); + return *this; + } + + template + class_ &def(const detail::initimpl::alias_constructor &init, const Extra&... extra) { + init.execute(*this, extra...); + return *this; + } + + template + class_ &def(detail::initimpl::factory &&init, const Extra&... extra) { + std::move(init).execute(*this, extra...); + return *this; + } + + template + class_ &def(detail::initimpl::pickle_factory &&pf, const Extra &...extra) { + std::move(pf).execute(*this, extra...); + return *this; + } + + template class_& def_buffer(Func &&func) { + struct capture { Func func; }; + capture *ptr = new capture { std::forward(func) }; + install_buffer_funcs([](PyObject *obj, void *ptr) -> buffer_info* { + detail::make_caster caster; + if (!caster.load(obj, false)) + return nullptr; + return new buffer_info(((capture *) ptr)->func(caster)); + }, ptr); + return *this; + } + + template + class_ &def_buffer(Return (Class::*func)(Args...)) { + return def_buffer([func] (type &obj) { return (obj.*func)(); }); + } + + template + class_ &def_buffer(Return (Class::*func)(Args...) const) { + return def_buffer([func] (const type &obj) { return (obj.*func)(); }); + } + + template + class_ &def_readwrite(const char *name, D C::*pm, const Extra&... extra) { + static_assert(std::is_same::value || std::is_base_of::value, "def_readwrite() requires a class member (or base class member)"); + cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this)), + fset([pm](type &c, const D &value) { c.*pm = value; }, is_method(*this)); + def_property(name, fget, fset, return_value_policy::reference_internal, extra...); + return *this; + } + + template + class_ &def_readonly(const char *name, const D C::*pm, const Extra& ...extra) { + static_assert(std::is_same::value || std::is_base_of::value, "def_readonly() requires a class member (or base class member)"); + cpp_function fget([pm](const type &c) -> const D &{ return c.*pm; }, is_method(*this)); + def_property_readonly(name, fget, return_value_policy::reference_internal, extra...); + return *this; + } + + template + class_ &def_readwrite_static(const char *name, D *pm, const Extra& ...extra) { + cpp_function fget([pm](object) -> const D &{ return *pm; }, scope(*this)), + fset([pm](object, const D &value) { *pm = value; }, scope(*this)); + def_property_static(name, fget, fset, return_value_policy::reference, extra...); + return *this; + } + + template + class_ &def_readonly_static(const char *name, const D *pm, const Extra& ...extra) { + cpp_function fget([pm](object) -> const D &{ return *pm; }, scope(*this)); + def_property_readonly_static(name, fget, return_value_policy::reference, extra...); + return *this; + } + + /// Uses return_value_policy::reference_internal by default + template + class_ &def_property_readonly(const char *name, const Getter &fget, const Extra& ...extra) { + return def_property_readonly(name, cpp_function(method_adaptor(fget)), + return_value_policy::reference_internal, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_readonly(const char *name, const cpp_function &fget, const Extra& ...extra) { + return def_property(name, fget, nullptr, extra...); + } + + /// Uses return_value_policy::reference by default + template + class_ &def_property_readonly_static(const char *name, const Getter &fget, const Extra& ...extra) { + return def_property_readonly_static(name, cpp_function(fget), return_value_policy::reference, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_readonly_static(const char *name, const cpp_function &fget, const Extra& ...extra) { + return def_property_static(name, fget, nullptr, extra...); + } + + /// Uses return_value_policy::reference_internal by default + template + class_ &def_property(const char *name, const Getter &fget, const Setter &fset, const Extra& ...extra) { + return def_property(name, fget, cpp_function(method_adaptor(fset)), extra...); + } + template + class_ &def_property(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property(name, cpp_function(method_adaptor(fget)), fset, + return_value_policy::reference_internal, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property_static(name, fget, fset, is_method(*this), extra...); + } + + /// Uses return_value_policy::reference by default + template + class_ &def_property_static(const char *name, const Getter &fget, const cpp_function &fset, const Extra& ...extra) { + return def_property_static(name, cpp_function(fget), fset, return_value_policy::reference, extra...); + } + + /// Uses cpp_function's return_value_policy by default + template + class_ &def_property_static(const char *name, const cpp_function &fget, const cpp_function &fset, const Extra& ...extra) { + static_assert( 0 == detail::constexpr_sum(std::is_base_of::value...), + "Argument annotations are not allowed for properties"); + auto rec_fget = get_function_record(fget), rec_fset = get_function_record(fset); + auto *rec_active = rec_fget; + if (rec_fget) { + char *doc_prev = rec_fget->doc; /* 'extra' field may include a property-specific documentation string */ + detail::process_attributes::init(extra..., rec_fget); + if (rec_fget->doc && rec_fget->doc != doc_prev) { + free(doc_prev); + rec_fget->doc = strdup(rec_fget->doc); + } + } + if (rec_fset) { + char *doc_prev = rec_fset->doc; + detail::process_attributes::init(extra..., rec_fset); + if (rec_fset->doc && rec_fset->doc != doc_prev) { + free(doc_prev); + rec_fset->doc = strdup(rec_fset->doc); + } + if (! rec_active) rec_active = rec_fset; + } + def_property_static_impl(name, fget, fset, rec_active); + return *this; + } + +private: + /// Initialize holder object, variant 1: object derives from enable_shared_from_this + template + static void init_holder(detail::instance *inst, detail::value_and_holder &v_h, + const holder_type * /* unused */, const std::enable_shared_from_this * /* dummy */) { + try { + auto sh = std::dynamic_pointer_cast( + v_h.value_ptr()->shared_from_this()); + if (sh) { + new (std::addressof(v_h.holder())) holder_type(std::move(sh)); + v_h.set_holder_constructed(); + } + } catch (const std::bad_weak_ptr &) {} + + if (!v_h.holder_constructed() && inst->owned) { + new (std::addressof(v_h.holder())) holder_type(v_h.value_ptr()); + v_h.set_holder_constructed(); + } + } + + static void init_holder_from_existing(const detail::value_and_holder &v_h, + const holder_type *holder_ptr, std::true_type /*is_copy_constructible*/) { + new (std::addressof(v_h.holder())) holder_type(*reinterpret_cast(holder_ptr)); + } + + static void init_holder_from_existing(const detail::value_and_holder &v_h, + const holder_type *holder_ptr, std::false_type /*is_copy_constructible*/) { + new (std::addressof(v_h.holder())) holder_type(std::move(*const_cast(holder_ptr))); + } + + /// Initialize holder object, variant 2: try to construct from existing holder object, if possible + static void init_holder(detail::instance *inst, detail::value_and_holder &v_h, + const holder_type *holder_ptr, const void * /* dummy -- not enable_shared_from_this) */) { + if (holder_ptr) { + init_holder_from_existing(v_h, holder_ptr, std::is_copy_constructible()); + v_h.set_holder_constructed(); + } else if (inst->owned || detail::always_construct_holder::value) { + new (std::addressof(v_h.holder())) holder_type(v_h.value_ptr()); + v_h.set_holder_constructed(); + } + } + + /// Performs instance initialization including constructing a holder and registering the known + /// instance. Should be called as soon as the `type` value_ptr is set for an instance. Takes an + /// optional pointer to an existing holder to use; if not specified and the instance is + /// `.owned`, a new holder will be constructed to manage the value pointer. + static void init_instance(detail::instance *inst, const void *holder_ptr) { + auto v_h = inst->get_value_and_holder(detail::get_type_info(typeid(type))); + if (!v_h.instance_registered()) { + register_instance(inst, v_h.value_ptr(), v_h.type); + v_h.set_instance_registered(); + } + init_holder(inst, v_h, (const holder_type *) holder_ptr, v_h.value_ptr()); + } + + /// Deallocates an instance; via holder, if constructed; otherwise via operator delete. + static void dealloc(detail::value_and_holder &v_h) { + if (v_h.holder_constructed()) { + v_h.holder().~holder_type(); + v_h.set_holder_constructed(false); + } + else { + detail::call_operator_delete(v_h.value_ptr(), + v_h.type->type_size, + v_h.type->type_align + ); + } + v_h.value_ptr() = nullptr; + } + + static detail::function_record *get_function_record(handle h) { + h = detail::get_function(h); + return h ? (detail::function_record *) reinterpret_borrow(PyCFunction_GET_SELF(h.ptr())) + : nullptr; + } +}; + +/// Binds an existing constructor taking arguments Args... +template detail::initimpl::constructor init() { return {}; } +/// Like `init()`, but the instance is always constructed through the alias class (even +/// when not inheriting on the Python side). +template detail::initimpl::alias_constructor init_alias() { return {}; } + +/// Binds a factory function as a constructor +template > +Ret init(Func &&f) { return {std::forward(f)}; } + +/// Dual-argument factory function: the first function is called when no alias is needed, the second +/// when an alias is needed (i.e. due to python-side inheritance). Arguments must be identical. +template > +Ret init(CFunc &&c, AFunc &&a) { + return {std::forward(c), std::forward(a)}; +} + +/// Binds pickling functions `__getstate__` and `__setstate__` and ensures that the type +/// returned by `__getstate__` is the same as the argument accepted by `__setstate__`. +template +detail::initimpl::pickle_factory pickle(GetState &&g, SetState &&s) { + return {std::forward(g), std::forward(s)}; +} + +NAMESPACE_BEGIN(detail) +struct enum_base { + enum_base(handle base, handle parent) : m_base(base), m_parent(parent) { } + + PYBIND11_NOINLINE void init(bool is_arithmetic, bool is_convertible) { + m_base.attr("__entries") = dict(); + auto property = handle((PyObject *) &PyProperty_Type); + auto static_property = handle((PyObject *) get_internals().static_property_type); + + m_base.attr("__repr__") = cpp_function( + [](handle arg) -> str { + handle type = arg.get_type(); + object type_name = type.attr("__name__"); + dict entries = type.attr("__entries"); + for (const auto &kv : entries) { + object other = kv.second[int_(0)]; + if (other.equal(arg)) + return pybind11::str("{}.{}").format(type_name, kv.first); + } + return pybind11::str("{}.???").format(type_name); + }, is_method(m_base) + ); + + m_base.attr("name") = property(cpp_function( + [](handle arg) -> str { + dict entries = arg.get_type().attr("__entries"); + for (const auto &kv : entries) { + if (handle(kv.second[int_(0)]).equal(arg)) + return pybind11::str(kv.first); + } + return "???"; + }, is_method(m_base) + )); + + m_base.attr("__doc__") = static_property(cpp_function( + [](handle arg) -> std::string { + std::string docstring; + dict entries = arg.attr("__entries"); + if (((PyTypeObject *) arg.ptr())->tp_doc) + docstring += std::string(((PyTypeObject *) arg.ptr())->tp_doc) + "\n\n"; + docstring += "Members:"; + for (const auto &kv : entries) { + auto key = std::string(pybind11::str(kv.first)); + auto comment = kv.second[int_(1)]; + docstring += "\n\n " + key; + if (!comment.is_none()) + docstring += " : " + (std::string) pybind11::str(comment); + } + return docstring; + } + ), none(), none(), ""); + + m_base.attr("__members__") = static_property(cpp_function( + [](handle arg) -> dict { + dict entries = arg.attr("__entries"), m; + for (const auto &kv : entries) + m[kv.first] = kv.second[int_(0)]; + return m; + }), none(), none(), "" + ); + + #define PYBIND11_ENUM_OP_STRICT(op, expr, strict_behavior) \ + m_base.attr(op) = cpp_function( \ + [](object a, object b) { \ + if (!a.get_type().is(b.get_type())) \ + strict_behavior; \ + return expr; \ + }, \ + is_method(m_base)) + + #define PYBIND11_ENUM_OP_CONV(op, expr) \ + m_base.attr(op) = cpp_function( \ + [](object a_, object b_) { \ + int_ a(a_), b(b_); \ + return expr; \ + }, \ + is_method(m_base)) + + #define PYBIND11_ENUM_OP_CONV_LHS(op, expr) \ + m_base.attr(op) = cpp_function( \ + [](object a_, object b) { \ + int_ a(a_); \ + return expr; \ + }, \ + is_method(m_base)) + + if (is_convertible) { + PYBIND11_ENUM_OP_CONV_LHS("__eq__", !b.is_none() && a.equal(b)); + PYBIND11_ENUM_OP_CONV_LHS("__ne__", b.is_none() || !a.equal(b)); + + if (is_arithmetic) { + PYBIND11_ENUM_OP_CONV("__lt__", a < b); + PYBIND11_ENUM_OP_CONV("__gt__", a > b); + PYBIND11_ENUM_OP_CONV("__le__", a <= b); + PYBIND11_ENUM_OP_CONV("__ge__", a >= b); + PYBIND11_ENUM_OP_CONV("__and__", a & b); + PYBIND11_ENUM_OP_CONV("__rand__", a & b); + PYBIND11_ENUM_OP_CONV("__or__", a | b); + PYBIND11_ENUM_OP_CONV("__ror__", a | b); + PYBIND11_ENUM_OP_CONV("__xor__", a ^ b); + PYBIND11_ENUM_OP_CONV("__rxor__", a ^ b); + m_base.attr("__invert__") = cpp_function( + [](object arg) { return ~(int_(arg)); }, is_method(m_base)); + } + } else { + PYBIND11_ENUM_OP_STRICT("__eq__", int_(a).equal(int_(b)), return false); + PYBIND11_ENUM_OP_STRICT("__ne__", !int_(a).equal(int_(b)), return true); + + if (is_arithmetic) { + #define PYBIND11_THROW throw type_error("Expected an enumeration of matching type!"); + PYBIND11_ENUM_OP_STRICT("__lt__", int_(a) < int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__gt__", int_(a) > int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__le__", int_(a) <= int_(b), PYBIND11_THROW); + PYBIND11_ENUM_OP_STRICT("__ge__", int_(a) >= int_(b), PYBIND11_THROW); + #undef PYBIND11_THROW + } + } + + #undef PYBIND11_ENUM_OP_CONV_LHS + #undef PYBIND11_ENUM_OP_CONV + #undef PYBIND11_ENUM_OP_STRICT + + object getstate = cpp_function( + [](object arg) { return int_(arg); }, is_method(m_base)); + + m_base.attr("__getstate__") = getstate; + m_base.attr("__hash__") = getstate; + } + + PYBIND11_NOINLINE void value(char const* name_, object value, const char *doc = nullptr) { + dict entries = m_base.attr("__entries"); + str name(name_); + if (entries.contains(name)) { + std::string type_name = (std::string) str(m_base.attr("__name__")); + throw value_error(type_name + ": element \"" + std::string(name_) + "\" already exists!"); + } + + entries[name] = std::make_pair(value, doc); + m_base.attr(name) = value; + } + + PYBIND11_NOINLINE void export_values() { + dict entries = m_base.attr("__entries"); + for (const auto &kv : entries) + m_parent.attr(kv.first) = kv.second[int_(0)]; + } + + handle m_base; + handle m_parent; +}; + +NAMESPACE_END(detail) + +/// Binds C++ enumerations and enumeration classes to Python +template class enum_ : public class_ { +public: + using Base = class_; + using Base::def; + using Base::attr; + using Base::def_property_readonly; + using Base::def_property_readonly_static; + using Scalar = typename std::underlying_type::type; + + template + enum_(const handle &scope, const char *name, const Extra&... extra) + : class_(scope, name, extra...), m_base(*this, scope) { + constexpr bool is_arithmetic = detail::any_of...>::value; + constexpr bool is_convertible = std::is_convertible::value; + m_base.init(is_arithmetic, is_convertible); + + def(init([](Scalar i) { return static_cast(i); })); + def("__int__", [](Type value) { return (Scalar) value; }); + #if PY_MAJOR_VERSION < 3 + def("__long__", [](Type value) { return (Scalar) value; }); + #endif + #if PY_MAJOR_VERSION > 3 || (PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 8) + def("__index__", [](Type value) { return (Scalar) value; }); + #endif + + cpp_function setstate( + [](Type &value, Scalar arg) { value = static_cast(arg); }, + is_method(*this)); + attr("__setstate__") = setstate; + } + + /// Export enumeration entries into the parent scope + enum_& export_values() { + m_base.export_values(); + return *this; + } + + /// Add an enumeration entry + enum_& value(char const* name, Type value, const char *doc = nullptr) { + m_base.value(name, pybind11::cast(value, return_value_policy::copy), doc); + return *this; + } + +private: + detail::enum_base m_base; +}; + +NAMESPACE_BEGIN(detail) + + +inline void keep_alive_impl(handle nurse, handle patient) { + if (!nurse || !patient) + pybind11_fail("Could not activate keep_alive!"); + + if (patient.is_none() || nurse.is_none()) + return; /* Nothing to keep alive or nothing to be kept alive by */ + + auto tinfo = all_type_info(Py_TYPE(nurse.ptr())); + if (!tinfo.empty()) { + /* It's a pybind-registered type, so we can store the patient in the + * internal list. */ + add_patient(nurse.ptr(), patient.ptr()); + } + else { + /* Fall back to clever approach based on weak references taken from + * Boost.Python. This is not used for pybind-registered types because + * the objects can be destroyed out-of-order in a GC pass. */ + cpp_function disable_lifesupport( + [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); }); + + weakref wr(nurse, disable_lifesupport); + + patient.inc_ref(); /* reference patient and leak the weak reference */ + (void) wr.release(); + } +} + +PYBIND11_NOINLINE inline void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) { + auto get_arg = [&](size_t n) { + if (n == 0) + return ret; + else if (n == 1 && call.init_self) + return call.init_self; + else if (n <= call.args.size()) + return call.args[n - 1]; + return handle(); + }; + + keep_alive_impl(get_arg(Nurse), get_arg(Patient)); +} + +inline std::pair all_type_info_get_cache(PyTypeObject *type) { + auto res = get_internals().registered_types_py +#ifdef __cpp_lib_unordered_map_try_emplace + .try_emplace(type); +#else + .emplace(type, std::vector()); +#endif + if (res.second) { + // New cache entry created; set up a weak reference to automatically remove it if the type + // gets destroyed: + weakref((PyObject *) type, cpp_function([type](handle wr) { + get_internals().registered_types_py.erase(type); + wr.dec_ref(); + })).release(); + } + + return res; +} + +template +struct iterator_state { + Iterator it; + Sentinel end; + bool first_or_done; +}; + +NAMESPACE_END(detail) + +/// Makes a python iterator from a first and past-the-end C++ InputIterator. +template ()), + typename... Extra> +iterator make_iterator(Iterator first, Sentinel last, Extra &&... extra) { + typedef detail::iterator_state state; + + if (!detail::get_type_info(typeid(state), false)) { + class_(handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state& { return s; }) + .def("__next__", [](state &s) -> ValueType { + if (!s.first_or_done) + ++s.it; + else + s.first_or_done = false; + if (s.it == s.end) { + s.first_or_done = true; + throw stop_iteration(); + } + return *s.it; + }, std::forward(extra)..., Policy); + } + + return cast(state{first, last, true}); +} + +/// Makes an python iterator over the keys (`.first`) of a iterator over pairs from a +/// first and past-the-end InputIterator. +template ()).first), + typename... Extra> +iterator make_key_iterator(Iterator first, Sentinel last, Extra &&... extra) { + typedef detail::iterator_state state; + + if (!detail::get_type_info(typeid(state), false)) { + class_(handle(), "iterator", pybind11::module_local()) + .def("__iter__", [](state &s) -> state& { return s; }) + .def("__next__", [](state &s) -> KeyType { + if (!s.first_or_done) + ++s.it; + else + s.first_or_done = false; + if (s.it == s.end) { + s.first_or_done = true; + throw stop_iteration(); + } + return (*s.it).first; + }, std::forward(extra)..., Policy); + } + + return cast(state{first, last, true}); +} + +/// Makes an iterator over values of an stl container or other container supporting +/// `std::begin()`/`std::end()` +template iterator make_iterator(Type &value, Extra&&... extra) { + return make_iterator(std::begin(value), std::end(value), extra...); +} + +/// Makes an iterator over the keys (`.first`) of a stl map-like container supporting +/// `std::begin()`/`std::end()` +template iterator make_key_iterator(Type &value, Extra&&... extra) { + return make_key_iterator(std::begin(value), std::end(value), extra...); +} + +template void implicitly_convertible() { + struct set_flag { + bool &flag; + set_flag(bool &flag) : flag(flag) { flag = true; } + ~set_flag() { flag = false; } + }; + auto implicit_caster = [](PyObject *obj, PyTypeObject *type) -> PyObject * { + static bool currently_used = false; + if (currently_used) // implicit conversions are non-reentrant + return nullptr; + set_flag flag_helper(currently_used); + if (!detail::make_caster().load(obj, false)) + return nullptr; + tuple args(1); + args[0] = obj; + PyObject *result = PyObject_Call((PyObject *) type, args.ptr(), nullptr); + if (result == nullptr) + PyErr_Clear(); + return result; + }; + + if (auto tinfo = detail::get_type_info(typeid(OutputType))) + tinfo->implicit_conversions.push_back(implicit_caster); + else + pybind11_fail("implicitly_convertible: Unable to find type " + type_id()); +} + +template +void register_exception_translator(ExceptionTranslator&& translator) { + detail::get_internals().registered_exception_translators.push_front( + std::forward(translator)); +} + +/** + * Wrapper to generate a new Python exception type. + * + * This should only be used with PyErr_SetString for now. + * It is not (yet) possible to use as a py::base. + * Template type argument is reserved for future use. + */ +template +class exception : public object { +public: + exception() = default; + exception(handle scope, const char *name, PyObject *base = PyExc_Exception) { + std::string full_name = scope.attr("__name__").cast() + + std::string(".") + name; + m_ptr = PyErr_NewException(const_cast(full_name.c_str()), base, NULL); + if (hasattr(scope, name)) + pybind11_fail("Error during initialization: multiple incompatible " + "definitions with name \"" + std::string(name) + "\""); + scope.attr(name) = *this; + } + + // Sets the current python exception to this exception object with the given message + void operator()(const char *message) { + PyErr_SetString(m_ptr, message); + } +}; + +NAMESPACE_BEGIN(detail) +// Returns a reference to a function-local static exception object used in the simple +// register_exception approach below. (It would be simpler to have the static local variable +// directly in register_exception, but that makes clang <3.5 segfault - issue #1349). +template +exception &get_exception_object() { static exception ex; return ex; } +NAMESPACE_END(detail) + +/** + * Registers a Python exception in `m` of the given `name` and installs an exception translator to + * translate the C++ exception to the created Python exception using the exceptions what() method. + * This is intended for simple exception translations; for more complex translation, register the + * exception object and translator directly. + */ +template +exception ®ister_exception(handle scope, + const char *name, + PyObject *base = PyExc_Exception) { + auto &ex = detail::get_exception_object(); + if (!ex) ex = exception(scope, name, base); + + register_exception_translator([](std::exception_ptr p) { + if (!p) return; + try { + std::rethrow_exception(p); + } catch (const CppException &e) { + detail::get_exception_object()(e.what()); + } + }); + return ex; +} + +NAMESPACE_BEGIN(detail) +PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) { + auto strings = tuple(args.size()); + for (size_t i = 0; i < args.size(); ++i) { + strings[i] = str(args[i]); + } + auto sep = kwargs.contains("sep") ? kwargs["sep"] : cast(" "); + auto line = sep.attr("join")(strings); + + object file; + if (kwargs.contains("file")) { + file = kwargs["file"].cast(); + } else { + try { + file = module::import("sys").attr("stdout"); + } catch (const error_already_set &) { + /* If print() is called from code that is executed as + part of garbage collection during interpreter shutdown, + importing 'sys' can fail. Give up rather than crashing the + interpreter in this case. */ + return; + } + } + + auto write = file.attr("write"); + write(line); + write(kwargs.contains("end") ? kwargs["end"] : cast("\n")); + + if (kwargs.contains("flush") && kwargs["flush"].cast()) + file.attr("flush")(); +} +NAMESPACE_END(detail) + +template +void print(Args &&...args) { + auto c = detail::collect_arguments(std::forward(args)...); + detail::print(c.args(), c.kwargs()); +} + +#if defined(WITH_THREAD) && !defined(PYPY_VERSION) + +/* The functions below essentially reproduce the PyGILState_* API using a RAII + * pattern, but there are a few important differences: + * + * 1. When acquiring the GIL from an non-main thread during the finalization + * phase, the GILState API blindly terminates the calling thread, which + * is often not what is wanted. This API does not do this. + * + * 2. The gil_scoped_release function can optionally cut the relationship + * of a PyThreadState and its associated thread, which allows moving it to + * another thread (this is a fairly rare/advanced use case). + * + * 3. The reference count of an acquired thread state can be controlled. This + * can be handy to prevent cases where callbacks issued from an external + * thread would otherwise constantly construct and destroy thread state data + * structures. + * + * See the Python bindings of NanoGUI (http://github.com/wjakob/nanogui) for an + * example which uses features 2 and 3 to migrate the Python thread of + * execution to another thread (to run the event loop on the original thread, + * in this case). + */ + +class gil_scoped_acquire { +public: + PYBIND11_NOINLINE gil_scoped_acquire() { + auto const &internals = detail::get_internals(); + tstate = (PyThreadState *) PYBIND11_TLS_GET_VALUE(internals.tstate); + + if (!tstate) { + /* Check if the GIL was acquired using the PyGILState_* API instead (e.g. if + calling from a Python thread). Since we use a different key, this ensures + we don't create a new thread state and deadlock in PyEval_AcquireThread + below. Note we don't save this state with internals.tstate, since we don't + create it we would fail to clear it (its reference count should be > 0). */ + tstate = PyGILState_GetThisThreadState(); + } + + if (!tstate) { + tstate = PyThreadState_New(internals.istate); + #if !defined(NDEBUG) + if (!tstate) + pybind11_fail("scoped_acquire: could not create thread state!"); + #endif + tstate->gilstate_counter = 0; + PYBIND11_TLS_REPLACE_VALUE(internals.tstate, tstate); + } else { + release = detail::get_thread_state_unchecked() != tstate; + } + + if (release) { + /* Work around an annoying assertion in PyThreadState_Swap */ + #if defined(Py_DEBUG) + PyInterpreterState *interp = tstate->interp; + tstate->interp = nullptr; + #endif + PyEval_AcquireThread(tstate); + #if defined(Py_DEBUG) + tstate->interp = interp; + #endif + } + + inc_ref(); + } + + void inc_ref() { + ++tstate->gilstate_counter; + } + + PYBIND11_NOINLINE void dec_ref() { + --tstate->gilstate_counter; + #if !defined(NDEBUG) + if (detail::get_thread_state_unchecked() != tstate) + pybind11_fail("scoped_acquire::dec_ref(): thread state must be current!"); + if (tstate->gilstate_counter < 0) + pybind11_fail("scoped_acquire::dec_ref(): reference count underflow!"); + #endif + if (tstate->gilstate_counter == 0) { + #if !defined(NDEBUG) + if (!release) + pybind11_fail("scoped_acquire::dec_ref(): internal error!"); + #endif + PyThreadState_Clear(tstate); + PyThreadState_DeleteCurrent(); + PYBIND11_TLS_DELETE_VALUE(detail::get_internals().tstate); + release = false; + } + } + + PYBIND11_NOINLINE ~gil_scoped_acquire() { + dec_ref(); + if (release) + PyEval_SaveThread(); + } +private: + PyThreadState *tstate = nullptr; + bool release = true; +}; + +class gil_scoped_release { +public: + explicit gil_scoped_release(bool disassoc = false) : disassoc(disassoc) { + // `get_internals()` must be called here unconditionally in order to initialize + // `internals.tstate` for subsequent `gil_scoped_acquire` calls. Otherwise, an + // initialization race could occur as multiple threads try `gil_scoped_acquire`. + const auto &internals = detail::get_internals(); + tstate = PyEval_SaveThread(); + if (disassoc) { + auto key = internals.tstate; + PYBIND11_TLS_DELETE_VALUE(key); + } + } + ~gil_scoped_release() { + if (!tstate) + return; + PyEval_RestoreThread(tstate); + if (disassoc) { + auto key = detail::get_internals().tstate; + PYBIND11_TLS_REPLACE_VALUE(key, tstate); + } + } +private: + PyThreadState *tstate; + bool disassoc; +}; +#elif defined(PYPY_VERSION) +class gil_scoped_acquire { + PyGILState_STATE state; +public: + gil_scoped_acquire() { state = PyGILState_Ensure(); } + ~gil_scoped_acquire() { PyGILState_Release(state); } +}; + +class gil_scoped_release { + PyThreadState *state; +public: + gil_scoped_release() { state = PyEval_SaveThread(); } + ~gil_scoped_release() { PyEval_RestoreThread(state); } +}; +#else +class gil_scoped_acquire { }; +class gil_scoped_release { }; +#endif + +error_already_set::~error_already_set() { + if (m_type) { + gil_scoped_acquire gil; + error_scope scope; + m_type.release().dec_ref(); + m_value.release().dec_ref(); + m_trace.release().dec_ref(); + } +} + +inline function get_type_overload(const void *this_ptr, const detail::type_info *this_type, const char *name) { + handle self = detail::get_object_handle(this_ptr, this_type); + if (!self) + return function(); + handle type = self.get_type(); + auto key = std::make_pair(type.ptr(), name); + + /* Cache functions that aren't overloaded in Python to avoid + many costly Python dictionary lookups below */ + auto &cache = detail::get_internals().inactive_overload_cache; + if (cache.find(key) != cache.end()) + return function(); + + function overload = getattr(self, name, function()); + if (overload.is_cpp_function()) { + cache.insert(key); + return function(); + } + + /* Don't call dispatch code if invoked from overridden function. + Unfortunately this doesn't work on PyPy. */ +#if !defined(PYPY_VERSION) + PyFrameObject *frame = PyThreadState_Get()->frame; + if (frame && (std::string) str(frame->f_code->co_name) == name && + frame->f_code->co_argcount > 0) { + PyFrame_FastToLocals(frame); + PyObject *self_caller = PyDict_GetItem( + frame->f_locals, PyTuple_GET_ITEM(frame->f_code->co_varnames, 0)); + if (self_caller == self.ptr()) + return function(); + } +#else + /* PyPy currently doesn't provide a detailed cpyext emulation of + frame objects, so we have to emulate this using Python. This + is going to be slow..*/ + dict d; d["self"] = self; d["name"] = pybind11::str(name); + PyObject *result = PyRun_String( + "import inspect\n" + "frame = inspect.currentframe()\n" + "if frame is not None:\n" + " frame = frame.f_back\n" + " if frame is not None and str(frame.f_code.co_name) == name and " + "frame.f_code.co_argcount > 0:\n" + " self_caller = frame.f_locals[frame.f_code.co_varnames[0]]\n" + " if self_caller == self:\n" + " self = None\n", + Py_file_input, d.ptr(), d.ptr()); + if (result == nullptr) + throw error_already_set(); + if (d["self"].is_none()) + return function(); + Py_DECREF(result); +#endif + + return overload; +} + +/** \rst + Try to retrieve a python method by the provided name from the instance pointed to by the this_ptr. + + :this_ptr: The pointer to the object the overload should be retrieved for. This should be the first + non-trampoline class encountered in the inheritance chain. + :name: The name of the overloaded Python method to retrieve. + :return: The Python method by this name from the object or an empty function wrapper. + \endrst */ +template function get_overload(const T *this_ptr, const char *name) { + auto tinfo = detail::get_type_info(typeid(T)); + return tinfo ? get_type_overload(this_ptr, tinfo, name) : function(); +} + +#define PYBIND11_OVERLOAD_INT(ret_type, cname, name, ...) { \ + pybind11::gil_scoped_acquire gil; \ + pybind11::function overload = pybind11::get_overload(static_cast(this), name); \ + if (overload) { \ + auto o = overload(__VA_ARGS__); \ + if (pybind11::detail::cast_is_temporary_value_reference::value) { \ + static pybind11::detail::overload_caster_t caster; \ + return pybind11::detail::cast_ref(std::move(o), caster); \ + } \ + else return pybind11::detail::cast_safe(std::move(o)); \ + } \ + } + +/** \rst + Macro to populate the virtual method in the trampoline class. This macro tries to look up a method named 'fn' + from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return + the appropriate type. See :ref:`overriding_virtuals` for more information. This macro should be used when the method + name in C is not the same as the method name in Python. For example with `__str__`. + + .. code-block:: cpp + + std::string toString() override { + PYBIND11_OVERLOAD_NAME( + std::string, // Return type (ret_type) + Animal, // Parent class (cname) + toString, // Name of function in C++ (name) + "__str__", // Name of method in Python (fn) + ); + } +\endrst */ +#define PYBIND11_OVERLOAD_NAME(ret_type, cname, name, fn, ...) \ + PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \ + return cname::fn(__VA_ARGS__) + +/** \rst + Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD_NAME`, except that it + throws if no overload can be found. +\endrst */ +#define PYBIND11_OVERLOAD_PURE_NAME(ret_type, cname, name, fn, ...) \ + PYBIND11_OVERLOAD_INT(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), name, __VA_ARGS__) \ + pybind11::pybind11_fail("Tried to call pure virtual function \"" PYBIND11_STRINGIFY(cname) "::" name "\""); + +/** \rst + Macro to populate the virtual method in the trampoline class. This macro tries to look up the method + from the Python side, deals with the :ref:`gil` and necessary argument conversions to call this method and return + the appropriate type. This macro should be used if the method name in C and in Python are identical. + See :ref:`overriding_virtuals` for more information. + + .. code-block:: cpp + + class PyAnimal : public Animal { + public: + // Inherit the constructors + using Animal::Animal; + + // Trampoline (need one for each virtual function) + std::string go(int n_times) override { + PYBIND11_OVERLOAD_PURE( + std::string, // Return type (ret_type) + Animal, // Parent class (cname) + go, // Name of function in C++ (must match Python name) (fn) + n_times // Argument(s) (...) + ); + } + }; +\endrst */ +#define PYBIND11_OVERLOAD(ret_type, cname, fn, ...) \ + PYBIND11_OVERLOAD_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__) + +/** \rst + Macro for pure virtual functions, this function is identical to :c:macro:`PYBIND11_OVERLOAD`, except that it throws + if no overload can be found. +\endrst */ +#define PYBIND11_OVERLOAD_PURE(ret_type, cname, fn, ...) \ + PYBIND11_OVERLOAD_PURE_NAME(PYBIND11_TYPE(ret_type), PYBIND11_TYPE(cname), #fn, fn, __VA_ARGS__) + +NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +# pragma warning(pop) +#elif defined(__GNUG__) && !defined(__clang__) +# pragma GCC diagnostic pop +#endif diff --git a/thirdparty/pybind11/include/pybind11/pytypes.h b/thirdparty/pybind11/include/pybind11/pytypes.h new file mode 100644 index 000000000..4003d6918 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/pytypes.h @@ -0,0 +1,1484 @@ +/* + pybind11/pytypes.h: Convenience wrapper classes for basic Python types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "buffer_info.h" +#include +#include + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) + +/* A few forward declarations */ +class handle; class object; +class str; class iterator; +struct arg; struct arg_v; + +NAMESPACE_BEGIN(detail) +class args_proxy; +inline bool isinstance_generic(handle obj, const std::type_info &tp); + +// Accessor forward declarations +template class accessor; +namespace accessor_policies { + struct obj_attr; + struct str_attr; + struct generic_item; + struct sequence_item; + struct list_item; + struct tuple_item; +} +using obj_attr_accessor = accessor; +using str_attr_accessor = accessor; +using item_accessor = accessor; +using sequence_accessor = accessor; +using list_accessor = accessor; +using tuple_accessor = accessor; + +/// Tag and check to identify a class which implements the Python object API +class pyobject_tag { }; +template using is_pyobject = std::is_base_of>; + +/** \rst + A mixin class which adds common functions to `handle`, `object` and various accessors. + The only requirement for `Derived` is to implement ``PyObject *Derived::ptr() const``. +\endrst */ +template +class object_api : public pyobject_tag { + const Derived &derived() const { return static_cast(*this); } + +public: + /** \rst + Return an iterator equivalent to calling ``iter()`` in Python. The object + must be a collection which supports the iteration protocol. + \endrst */ + iterator begin() const; + /// Return a sentinel which ends iteration. + iterator end() const; + + /** \rst + Return an internal functor to invoke the object's sequence protocol. Casting + the returned ``detail::item_accessor`` instance to a `handle` or `object` + subclass causes a corresponding call to ``__getitem__``. Assigning a `handle` + or `object` subclass causes a call to ``__setitem__``. + \endrst */ + item_accessor operator[](handle key) const; + /// See above (the only difference is that they key is provided as a string literal) + item_accessor operator[](const char *key) const; + + /** \rst + Return an internal functor to access the object's attributes. Casting the + returned ``detail::obj_attr_accessor`` instance to a `handle` or `object` + subclass causes a corresponding call to ``getattr``. Assigning a `handle` + or `object` subclass causes a call to ``setattr``. + \endrst */ + obj_attr_accessor attr(handle key) const; + /// See above (the only difference is that they key is provided as a string literal) + str_attr_accessor attr(const char *key) const; + + /** \rst + Matches * unpacking in Python, e.g. to unpack arguments out of a ``tuple`` + or ``list`` for a function call. Applying another * to the result yields + ** unpacking, e.g. to unpack a dict as function keyword arguments. + See :ref:`calling_python_functions`. + \endrst */ + args_proxy operator*() const; + + /// Check if the given item is contained within this object, i.e. ``item in obj``. + template bool contains(T &&item) const; + + /** \rst + Assuming the Python object is a function or implements the ``__call__`` + protocol, ``operator()`` invokes the underlying function, passing an + arbitrary set of parameters. The result is returned as a `object` and + may need to be converted back into a Python object using `handle::cast()`. + + When some of the arguments cannot be converted to Python objects, the + function will throw a `cast_error` exception. When the Python function + call fails, a `error_already_set` exception is thrown. + \endrst */ + template + object operator()(Args &&...args) const; + template + PYBIND11_DEPRECATED("call(...) was deprecated in favor of operator()(...)") + object call(Args&&... args) const; + + /// Equivalent to ``obj is other`` in Python. + bool is(object_api const& other) const { return derived().ptr() == other.derived().ptr(); } + /// Equivalent to ``obj is None`` in Python. + bool is_none() const { return derived().ptr() == Py_None; } + /// Equivalent to obj == other in Python + bool equal(object_api const &other) const { return rich_compare(other, Py_EQ); } + bool not_equal(object_api const &other) const { return rich_compare(other, Py_NE); } + bool operator<(object_api const &other) const { return rich_compare(other, Py_LT); } + bool operator<=(object_api const &other) const { return rich_compare(other, Py_LE); } + bool operator>(object_api const &other) const { return rich_compare(other, Py_GT); } + bool operator>=(object_api const &other) const { return rich_compare(other, Py_GE); } + + object operator-() const; + object operator~() const; + object operator+(object_api const &other) const; + object operator+=(object_api const &other) const; + object operator-(object_api const &other) const; + object operator-=(object_api const &other) const; + object operator*(object_api const &other) const; + object operator*=(object_api const &other) const; + object operator/(object_api const &other) const; + object operator/=(object_api const &other) const; + object operator|(object_api const &other) const; + object operator|=(object_api const &other) const; + object operator&(object_api const &other) const; + object operator&=(object_api const &other) const; + object operator^(object_api const &other) const; + object operator^=(object_api const &other) const; + object operator<<(object_api const &other) const; + object operator<<=(object_api const &other) const; + object operator>>(object_api const &other) const; + object operator>>=(object_api const &other) const; + + PYBIND11_DEPRECATED("Use py::str(obj) instead") + pybind11::str str() const; + + /// Get or set the object's docstring, i.e. ``obj.__doc__``. + str_attr_accessor doc() const; + + /// Return the object's current reference count + int ref_count() const { return static_cast(Py_REFCNT(derived().ptr())); } + /// Return a handle to the Python type object underlying the instance + handle get_type() const; + +private: + bool rich_compare(object_api const &other, int value) const; +}; + +NAMESPACE_END(detail) + +/** \rst + Holds a reference to a Python object (no reference counting) + + The `handle` class is a thin wrapper around an arbitrary Python object (i.e. a + ``PyObject *`` in Python's C API). It does not perform any automatic reference + counting and merely provides a basic C++ interface to various Python API functions. + + .. seealso:: + The `object` class inherits from `handle` and adds automatic reference + counting features. +\endrst */ +class handle : public detail::object_api { +public: + /// The default constructor creates a handle with a ``nullptr``-valued pointer + handle() = default; + /// Creates a ``handle`` from the given raw Python object pointer + handle(PyObject *ptr) : m_ptr(ptr) { } // Allow implicit conversion from PyObject* + + /// Return the underlying ``PyObject *`` pointer + PyObject *ptr() const { return m_ptr; } + PyObject *&ptr() { return m_ptr; } + + /** \rst + Manually increase the reference count of the Python object. Usually, it is + preferable to use the `object` class which derives from `handle` and calls + this function automatically. Returns a reference to itself. + \endrst */ + const handle& inc_ref() const & { Py_XINCREF(m_ptr); return *this; } + + /** \rst + Manually decrease the reference count of the Python object. Usually, it is + preferable to use the `object` class which derives from `handle` and calls + this function automatically. Returns a reference to itself. + \endrst */ + const handle& dec_ref() const & { Py_XDECREF(m_ptr); return *this; } + + /** \rst + Attempt to cast the Python object into the given C++ type. A `cast_error` + will be throw upon failure. + \endrst */ + template T cast() const; + /// Return ``true`` when the `handle` wraps a valid Python object + explicit operator bool() const { return m_ptr != nullptr; } + /** \rst + Deprecated: Check that the underlying pointers are the same. + Equivalent to ``obj1 is obj2`` in Python. + \endrst */ + PYBIND11_DEPRECATED("Use obj1.is(obj2) instead") + bool operator==(const handle &h) const { return m_ptr == h.m_ptr; } + PYBIND11_DEPRECATED("Use !obj1.is(obj2) instead") + bool operator!=(const handle &h) const { return m_ptr != h.m_ptr; } + PYBIND11_DEPRECATED("Use handle::operator bool() instead") + bool check() const { return m_ptr != nullptr; } +protected: + PyObject *m_ptr = nullptr; +}; + +/** \rst + Holds a reference to a Python object (with reference counting) + + Like `handle`, the `object` class is a thin wrapper around an arbitrary Python + object (i.e. a ``PyObject *`` in Python's C API). In contrast to `handle`, it + optionally increases the object's reference count upon construction, and it + *always* decreases the reference count when the `object` instance goes out of + scope and is destructed. When using `object` instances consistently, it is much + easier to get reference counting right at the first attempt. +\endrst */ +class object : public handle { +public: + object() = default; + PYBIND11_DEPRECATED("Use reinterpret_borrow() or reinterpret_steal()") + object(handle h, bool is_borrowed) : handle(h) { if (is_borrowed) inc_ref(); } + /// Copy constructor; always increases the reference count + object(const object &o) : handle(o) { inc_ref(); } + /// Move constructor; steals the object from ``other`` and preserves its reference count + object(object &&other) noexcept { m_ptr = other.m_ptr; other.m_ptr = nullptr; } + /// Destructor; automatically calls `handle::dec_ref()` + ~object() { dec_ref(); } + + /** \rst + Resets the internal pointer to ``nullptr`` without without decreasing the + object's reference count. The function returns a raw handle to the original + Python object. + \endrst */ + handle release() { + PyObject *tmp = m_ptr; + m_ptr = nullptr; + return handle(tmp); + } + + object& operator=(const object &other) { + other.inc_ref(); + dec_ref(); + m_ptr = other.m_ptr; + return *this; + } + + object& operator=(object &&other) noexcept { + if (this != &other) { + handle temp(m_ptr); + m_ptr = other.m_ptr; + other.m_ptr = nullptr; + temp.dec_ref(); + } + return *this; + } + + // Calling cast() on an object lvalue just copies (via handle::cast) + template T cast() const &; + // Calling on an object rvalue does a move, if needed and/or possible + template T cast() &&; + +protected: + // Tags for choosing constructors from raw PyObject * + struct borrowed_t { }; + struct stolen_t { }; + + template friend T reinterpret_borrow(handle); + template friend T reinterpret_steal(handle); + +public: + // Only accessible from derived classes and the reinterpret_* functions + object(handle h, borrowed_t) : handle(h) { inc_ref(); } + object(handle h, stolen_t) : handle(h) { } +}; + +/** \rst + Declare that a `handle` or ``PyObject *`` is a certain type and borrow the reference. + The target type ``T`` must be `object` or one of its derived classes. The function + doesn't do any conversions or checks. It's up to the user to make sure that the + target type is correct. + + .. code-block:: cpp + + PyObject *p = PyList_GetItem(obj, index); + py::object o = reinterpret_borrow(p); + // or + py::tuple t = reinterpret_borrow(p); // <-- `p` must be already be a `tuple` +\endrst */ +template T reinterpret_borrow(handle h) { return {h, object::borrowed_t{}}; } + +/** \rst + Like `reinterpret_borrow`, but steals the reference. + + .. code-block:: cpp + + PyObject *p = PyObject_Str(obj); + py::str s = reinterpret_steal(p); // <-- `p` must be already be a `str` +\endrst */ +template T reinterpret_steal(handle h) { return {h, object::stolen_t{}}; } + +NAMESPACE_BEGIN(detail) +inline std::string error_string(); +NAMESPACE_END(detail) + +/// Fetch and hold an error which was already set in Python. An instance of this is typically +/// thrown to propagate python-side errors back through C++ which can either be caught manually or +/// else falls back to the function dispatcher (which then raises the captured error back to +/// python). +class error_already_set : public std::runtime_error { +public: + /// Constructs a new exception from the current Python error indicator, if any. The current + /// Python error indicator will be cleared. + error_already_set() : std::runtime_error(detail::error_string()) { + PyErr_Fetch(&m_type.ptr(), &m_value.ptr(), &m_trace.ptr()); + } + + error_already_set(const error_already_set &) = default; + error_already_set(error_already_set &&) = default; + + inline ~error_already_set(); + + /// Give the currently-held error back to Python, if any. If there is currently a Python error + /// already set it is cleared first. After this call, the current object no longer stores the + /// error variables (but the `.what()` string is still available). + void restore() { PyErr_Restore(m_type.release().ptr(), m_value.release().ptr(), m_trace.release().ptr()); } + + // Does nothing; provided for backwards compatibility. + PYBIND11_DEPRECATED("Use of error_already_set.clear() is deprecated") + void clear() {} + + /// Check if the currently trapped error type matches the given Python exception class (or a + /// subclass thereof). May also be passed a tuple to search for any exception class matches in + /// the given tuple. + bool matches(handle exc) const { return PyErr_GivenExceptionMatches(m_type.ptr(), exc.ptr()); } + + const object& type() const { return m_type; } + const object& value() const { return m_value; } + const object& trace() const { return m_trace; } + +private: + object m_type, m_value, m_trace; +}; + +/** \defgroup python_builtins _ + Unless stated otherwise, the following C++ functions behave the same + as their Python counterparts. + */ + +/** \ingroup python_builtins + \rst + Return true if ``obj`` is an instance of ``T``. Type ``T`` must be a subclass of + `object` or a class which was exposed to Python as ``py::class_``. +\endrst */ +template ::value, int> = 0> +bool isinstance(handle obj) { return T::check_(obj); } + +template ::value, int> = 0> +bool isinstance(handle obj) { return detail::isinstance_generic(obj, typeid(T)); } + +template <> inline bool isinstance(handle obj) = delete; +template <> inline bool isinstance(handle obj) { return obj.ptr() != nullptr; } + +/// \ingroup python_builtins +/// Return true if ``obj`` is an instance of the ``type``. +inline bool isinstance(handle obj, handle type) { + const auto result = PyObject_IsInstance(obj.ptr(), type.ptr()); + if (result == -1) + throw error_already_set(); + return result != 0; +} + +/// \addtogroup python_builtins +/// @{ +inline bool hasattr(handle obj, handle name) { + return PyObject_HasAttr(obj.ptr(), name.ptr()) == 1; +} + +inline bool hasattr(handle obj, const char *name) { + return PyObject_HasAttrString(obj.ptr(), name) == 1; +} + +inline void delattr(handle obj, handle name) { + if (PyObject_DelAttr(obj.ptr(), name.ptr()) != 0) { throw error_already_set(); } +} + +inline void delattr(handle obj, const char *name) { + if (PyObject_DelAttrString(obj.ptr(), name) != 0) { throw error_already_set(); } +} + +inline object getattr(handle obj, handle name) { + PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} + +inline object getattr(handle obj, const char *name) { + PyObject *result = PyObject_GetAttrString(obj.ptr(), name); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} + +inline object getattr(handle obj, handle name, handle default_) { + if (PyObject *result = PyObject_GetAttr(obj.ptr(), name.ptr())) { + return reinterpret_steal(result); + } else { + PyErr_Clear(); + return reinterpret_borrow(default_); + } +} + +inline object getattr(handle obj, const char *name, handle default_) { + if (PyObject *result = PyObject_GetAttrString(obj.ptr(), name)) { + return reinterpret_steal(result); + } else { + PyErr_Clear(); + return reinterpret_borrow(default_); + } +} + +inline void setattr(handle obj, handle name, handle value) { + if (PyObject_SetAttr(obj.ptr(), name.ptr(), value.ptr()) != 0) { throw error_already_set(); } +} + +inline void setattr(handle obj, const char *name, handle value) { + if (PyObject_SetAttrString(obj.ptr(), name, value.ptr()) != 0) { throw error_already_set(); } +} + +inline ssize_t hash(handle obj) { + auto h = PyObject_Hash(obj.ptr()); + if (h == -1) { throw error_already_set(); } + return h; +} + +/// @} python_builtins + +NAMESPACE_BEGIN(detail) +inline handle get_function(handle value) { + if (value) { +#if PY_MAJOR_VERSION >= 3 + if (PyInstanceMethod_Check(value.ptr())) + value = PyInstanceMethod_GET_FUNCTION(value.ptr()); + else +#endif + if (PyMethod_Check(value.ptr())) + value = PyMethod_GET_FUNCTION(value.ptr()); + } + return value; +} + +// Helper aliases/functions to support implicit casting of values given to python accessors/methods. +// When given a pyobject, this simply returns the pyobject as-is; for other C++ type, the value goes +// through pybind11::cast(obj) to convert it to an `object`. +template ::value, int> = 0> +auto object_or_cast(T &&o) -> decltype(std::forward(o)) { return std::forward(o); } +// The following casting version is implemented in cast.h: +template ::value, int> = 0> +object object_or_cast(T &&o); +// Match a PyObject*, which we want to convert directly to handle via its converting constructor +inline handle object_or_cast(PyObject *ptr) { return ptr; } + +template +class accessor : public object_api> { + using key_type = typename Policy::key_type; + +public: + accessor(handle obj, key_type key) : obj(obj), key(std::move(key)) { } + accessor(const accessor &) = default; + accessor(accessor &&) = default; + + // accessor overload required to override default assignment operator (templates are not allowed + // to replace default compiler-generated assignments). + void operator=(const accessor &a) && { std::move(*this).operator=(handle(a)); } + void operator=(const accessor &a) & { operator=(handle(a)); } + + template void operator=(T &&value) && { + Policy::set(obj, key, object_or_cast(std::forward(value))); + } + template void operator=(T &&value) & { + get_cache() = reinterpret_borrow(object_or_cast(std::forward(value))); + } + + template + PYBIND11_DEPRECATED("Use of obj.attr(...) as bool is deprecated in favor of pybind11::hasattr(obj, ...)") + explicit operator enable_if_t::value || + std::is_same::value, bool>() const { + return hasattr(obj, key); + } + template + PYBIND11_DEPRECATED("Use of obj[key] as bool is deprecated in favor of obj.contains(key)") + explicit operator enable_if_t::value, bool>() const { + return obj.contains(key); + } + + operator object() const { return get_cache(); } + PyObject *ptr() const { return get_cache().ptr(); } + template T cast() const { return get_cache().template cast(); } + +private: + object &get_cache() const { + if (!cache) { cache = Policy::get(obj, key); } + return cache; + } + +private: + handle obj; + key_type key; + mutable object cache; +}; + +NAMESPACE_BEGIN(accessor_policies) +struct obj_attr { + using key_type = object; + static object get(handle obj, handle key) { return getattr(obj, key); } + static void set(handle obj, handle key, handle val) { setattr(obj, key, val); } +}; + +struct str_attr { + using key_type = const char *; + static object get(handle obj, const char *key) { return getattr(obj, key); } + static void set(handle obj, const char *key, handle val) { setattr(obj, key, val); } +}; + +struct generic_item { + using key_type = object; + + static object get(handle obj, handle key) { + PyObject *result = PyObject_GetItem(obj.ptr(), key.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); + } + + static void set(handle obj, handle key, handle val) { + if (PyObject_SetItem(obj.ptr(), key.ptr(), val.ptr()) != 0) { throw error_already_set(); } + } +}; + +struct sequence_item { + using key_type = size_t; + + static object get(handle obj, size_t index) { + PyObject *result = PySequence_GetItem(obj.ptr(), static_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); + } + + static void set(handle obj, size_t index, handle val) { + // PySequence_SetItem does not steal a reference to 'val' + if (PySequence_SetItem(obj.ptr(), static_cast(index), val.ptr()) != 0) { + throw error_already_set(); + } + } +}; + +struct list_item { + using key_type = size_t; + + static object get(handle obj, size_t index) { + PyObject *result = PyList_GetItem(obj.ptr(), static_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_borrow(result); + } + + static void set(handle obj, size_t index, handle val) { + // PyList_SetItem steals a reference to 'val' + if (PyList_SetItem(obj.ptr(), static_cast(index), val.inc_ref().ptr()) != 0) { + throw error_already_set(); + } + } +}; + +struct tuple_item { + using key_type = size_t; + + static object get(handle obj, size_t index) { + PyObject *result = PyTuple_GetItem(obj.ptr(), static_cast(index)); + if (!result) { throw error_already_set(); } + return reinterpret_borrow(result); + } + + static void set(handle obj, size_t index, handle val) { + // PyTuple_SetItem steals a reference to 'val' + if (PyTuple_SetItem(obj.ptr(), static_cast(index), val.inc_ref().ptr()) != 0) { + throw error_already_set(); + } + } +}; +NAMESPACE_END(accessor_policies) + +/// STL iterator template used for tuple, list, sequence and dict +template +class generic_iterator : public Policy { + using It = generic_iterator; + +public: + using difference_type = ssize_t; + using iterator_category = typename Policy::iterator_category; + using value_type = typename Policy::value_type; + using reference = typename Policy::reference; + using pointer = typename Policy::pointer; + + generic_iterator() = default; + generic_iterator(handle seq, ssize_t index) : Policy(seq, index) { } + + reference operator*() const { return Policy::dereference(); } + reference operator[](difference_type n) const { return *(*this + n); } + pointer operator->() const { return **this; } + + It &operator++() { Policy::increment(); return *this; } + It operator++(int) { auto copy = *this; Policy::increment(); return copy; } + It &operator--() { Policy::decrement(); return *this; } + It operator--(int) { auto copy = *this; Policy::decrement(); return copy; } + It &operator+=(difference_type n) { Policy::advance(n); return *this; } + It &operator-=(difference_type n) { Policy::advance(-n); return *this; } + + friend It operator+(const It &a, difference_type n) { auto copy = a; return copy += n; } + friend It operator+(difference_type n, const It &b) { return b + n; } + friend It operator-(const It &a, difference_type n) { auto copy = a; return copy -= n; } + friend difference_type operator-(const It &a, const It &b) { return a.distance_to(b); } + + friend bool operator==(const It &a, const It &b) { return a.equal(b); } + friend bool operator!=(const It &a, const It &b) { return !(a == b); } + friend bool operator< (const It &a, const It &b) { return b - a > 0; } + friend bool operator> (const It &a, const It &b) { return b < a; } + friend bool operator>=(const It &a, const It &b) { return !(a < b); } + friend bool operator<=(const It &a, const It &b) { return !(a > b); } +}; + +NAMESPACE_BEGIN(iterator_policies) +/// Quick proxy class needed to implement ``operator->`` for iterators which can't return pointers +template +struct arrow_proxy { + T value; + + arrow_proxy(T &&value) : value(std::move(value)) { } + T *operator->() const { return &value; } +}; + +/// Lightweight iterator policy using just a simple pointer: see ``PySequence_Fast_ITEMS`` +class sequence_fast_readonly { +protected: + using iterator_category = std::random_access_iterator_tag; + using value_type = handle; + using reference = const handle; + using pointer = arrow_proxy; + + sequence_fast_readonly(handle obj, ssize_t n) : ptr(PySequence_Fast_ITEMS(obj.ptr()) + n) { } + + reference dereference() const { return *ptr; } + void increment() { ++ptr; } + void decrement() { --ptr; } + void advance(ssize_t n) { ptr += n; } + bool equal(const sequence_fast_readonly &b) const { return ptr == b.ptr; } + ssize_t distance_to(const sequence_fast_readonly &b) const { return ptr - b.ptr; } + +private: + PyObject **ptr; +}; + +/// Full read and write access using the sequence protocol: see ``detail::sequence_accessor`` +class sequence_slow_readwrite { +protected: + using iterator_category = std::random_access_iterator_tag; + using value_type = object; + using reference = sequence_accessor; + using pointer = arrow_proxy; + + sequence_slow_readwrite(handle obj, ssize_t index) : obj(obj), index(index) { } + + reference dereference() const { return {obj, static_cast(index)}; } + void increment() { ++index; } + void decrement() { --index; } + void advance(ssize_t n) { index += n; } + bool equal(const sequence_slow_readwrite &b) const { return index == b.index; } + ssize_t distance_to(const sequence_slow_readwrite &b) const { return index - b.index; } + +private: + handle obj; + ssize_t index; +}; + +/// Python's dictionary protocol permits this to be a forward iterator +class dict_readonly { +protected: + using iterator_category = std::forward_iterator_tag; + using value_type = std::pair; + using reference = const value_type; + using pointer = arrow_proxy; + + dict_readonly() = default; + dict_readonly(handle obj, ssize_t pos) : obj(obj), pos(pos) { increment(); } + + reference dereference() const { return {key, value}; } + void increment() { if (!PyDict_Next(obj.ptr(), &pos, &key, &value)) { pos = -1; } } + bool equal(const dict_readonly &b) const { return pos == b.pos; } + +private: + handle obj; + PyObject *key = nullptr, *value = nullptr; + ssize_t pos = -1; +}; +NAMESPACE_END(iterator_policies) + +#if !defined(PYPY_VERSION) +using tuple_iterator = generic_iterator; +using list_iterator = generic_iterator; +#else +using tuple_iterator = generic_iterator; +using list_iterator = generic_iterator; +#endif + +using sequence_iterator = generic_iterator; +using dict_iterator = generic_iterator; + +inline bool PyIterable_Check(PyObject *obj) { + PyObject *iter = PyObject_GetIter(obj); + if (iter) { + Py_DECREF(iter); + return true; + } else { + PyErr_Clear(); + return false; + } +} + +inline bool PyNone_Check(PyObject *o) { return o == Py_None; } +#if PY_MAJOR_VERSION >= 3 +inline bool PyEllipsis_Check(PyObject *o) { return o == Py_Ellipsis; } +#endif + +inline bool PyUnicode_Check_Permissive(PyObject *o) { return PyUnicode_Check(o) || PYBIND11_BYTES_CHECK(o); } + +inline bool PyStaticMethod_Check(PyObject *o) { return o->ob_type == &PyStaticMethod_Type; } + +class kwargs_proxy : public handle { +public: + explicit kwargs_proxy(handle h) : handle(h) { } +}; + +class args_proxy : public handle { +public: + explicit args_proxy(handle h) : handle(h) { } + kwargs_proxy operator*() const { return kwargs_proxy(*this); } +}; + +/// Python argument categories (using PEP 448 terms) +template using is_keyword = std::is_base_of; +template using is_s_unpacking = std::is_same; // * unpacking +template using is_ds_unpacking = std::is_same; // ** unpacking +template using is_positional = satisfies_none_of; +template using is_keyword_or_ds = satisfies_any_of; + +// Call argument collector forward declarations +template +class simple_collector; +template +class unpacking_collector; + +NAMESPACE_END(detail) + +// TODO: After the deprecated constructors are removed, this macro can be simplified by +// inheriting ctors: `using Parent::Parent`. It's not an option right now because +// the `using` statement triggers the parent deprecation warning even if the ctor +// isn't even used. +#define PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + public: \ + PYBIND11_DEPRECATED("Use reinterpret_borrow<"#Name">() or reinterpret_steal<"#Name">()") \ + Name(handle h, bool is_borrowed) : Parent(is_borrowed ? Parent(h, borrowed_t{}) : Parent(h, stolen_t{})) { } \ + Name(handle h, borrowed_t) : Parent(h, borrowed_t{}) { } \ + Name(handle h, stolen_t) : Parent(h, stolen_t{}) { } \ + PYBIND11_DEPRECATED("Use py::isinstance(obj) instead") \ + bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); } \ + static bool check_(handle h) { return h.ptr() != nullptr && CheckFun(h.ptr()); } + +#define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, ConvertFun) \ + PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \ + Name(const object &o) \ + : Parent(check_(o) ? o.inc_ref().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ + { if (!m_ptr) throw error_already_set(); } \ + Name(object &&o) \ + : Parent(check_(o) ? o.release().ptr() : ConvertFun(o.ptr()), stolen_t{}) \ + { if (!m_ptr) throw error_already_set(); } \ + template \ + Name(const ::pybind11::detail::accessor &a) : Name(object(a)) { } + +#define PYBIND11_OBJECT(Name, Parent, CheckFun) \ + PYBIND11_OBJECT_COMMON(Name, Parent, CheckFun) \ + /* This is deliberately not 'explicit' to allow implicit conversion from object: */ \ + Name(const object &o) : Parent(o) { } \ + Name(object &&o) : Parent(std::move(o)) { } + +#define PYBIND11_OBJECT_DEFAULT(Name, Parent, CheckFun) \ + PYBIND11_OBJECT(Name, Parent, CheckFun) \ + Name() : Parent() { } + +/// \addtogroup pytypes +/// @{ + +/** \rst + Wraps a Python iterator so that it can also be used as a C++ input iterator + + Caveat: copying an iterator does not (and cannot) clone the internal + state of the Python iterable. This also applies to the post-increment + operator. This iterator should only be used to retrieve the current + value using ``operator*()``. +\endrst */ +class iterator : public object { +public: + using iterator_category = std::input_iterator_tag; + using difference_type = ssize_t; + using value_type = handle; + using reference = const handle; + using pointer = const handle *; + + PYBIND11_OBJECT_DEFAULT(iterator, object, PyIter_Check) + + iterator& operator++() { + advance(); + return *this; + } + + iterator operator++(int) { + auto rv = *this; + advance(); + return rv; + } + + reference operator*() const { + if (m_ptr && !value.ptr()) { + auto& self = const_cast(*this); + self.advance(); + } + return value; + } + + pointer operator->() const { operator*(); return &value; } + + /** \rst + The value which marks the end of the iteration. ``it == iterator::sentinel()`` + is equivalent to catching ``StopIteration`` in Python. + + .. code-block:: cpp + + void foo(py::iterator it) { + while (it != py::iterator::sentinel()) { + // use `*it` + ++it; + } + } + \endrst */ + static iterator sentinel() { return {}; } + + friend bool operator==(const iterator &a, const iterator &b) { return a->ptr() == b->ptr(); } + friend bool operator!=(const iterator &a, const iterator &b) { return a->ptr() != b->ptr(); } + +private: + void advance() { + value = reinterpret_steal(PyIter_Next(m_ptr)); + if (PyErr_Occurred()) { throw error_already_set(); } + } + +private: + object value = {}; +}; + +class iterable : public object { +public: + PYBIND11_OBJECT_DEFAULT(iterable, object, detail::PyIterable_Check) +}; + +class bytes; + +class str : public object { +public: + PYBIND11_OBJECT_CVT(str, object, detail::PyUnicode_Check_Permissive, raw_str) + + str(const char *c, size_t n) + : object(PyUnicode_FromStringAndSize(c, (ssize_t) n), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate string object!"); + } + + // 'explicit' is explicitly omitted from the following constructors to allow implicit conversion to py::str from C++ string-like objects + str(const char *c = "") + : object(PyUnicode_FromString(c), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate string object!"); + } + + str(const std::string &s) : str(s.data(), s.size()) { } + + explicit str(const bytes &b); + + /** \rst + Return a string representation of the object. This is analogous to + the ``str()`` function in Python. + \endrst */ + explicit str(handle h) : object(raw_str(h.ptr()), stolen_t{}) { } + + operator std::string() const { + object temp = *this; + if (PyUnicode_Check(m_ptr)) { + temp = reinterpret_steal(PyUnicode_AsUTF8String(m_ptr)); + if (!temp) + pybind11_fail("Unable to extract string contents! (encoding issue)"); + } + char *buffer; + ssize_t length; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract string contents! (invalid type)"); + return std::string(buffer, (size_t) length); + } + + template + str format(Args &&...args) const { + return attr("format")(std::forward(args)...); + } + +private: + /// Return string representation -- always returns a new reference, even if already a str + static PyObject *raw_str(PyObject *op) { + PyObject *str_value = PyObject_Str(op); +#if PY_MAJOR_VERSION < 3 + if (!str_value) throw error_already_set(); + PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); + Py_XDECREF(str_value); str_value = unicode; +#endif + return str_value; + } +}; +/// @} pytypes + +inline namespace literals { +/** \rst + String literal version of `str` + \endrst */ +inline str operator"" _s(const char *s, size_t size) { return {s, size}; } +} + +/// \addtogroup pytypes +/// @{ +class bytes : public object { +public: + PYBIND11_OBJECT(bytes, object, PYBIND11_BYTES_CHECK) + + // Allow implicit conversion: + bytes(const char *c = "") + : object(PYBIND11_BYTES_FROM_STRING(c), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate bytes object!"); + } + + bytes(const char *c, size_t n) + : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(c, (ssize_t) n), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate bytes object!"); + } + + // Allow implicit conversion: + bytes(const std::string &s) : bytes(s.data(), s.size()) { } + + explicit bytes(const pybind11::str &s); + + operator std::string() const { + char *buffer; + ssize_t length; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + return std::string(buffer, (size_t) length); + } +}; + +inline bytes::bytes(const pybind11::str &s) { + object temp = s; + if (PyUnicode_Check(s.ptr())) { + temp = reinterpret_steal(PyUnicode_AsUTF8String(s.ptr())); + if (!temp) + pybind11_fail("Unable to extract string contents! (encoding issue)"); + } + char *buffer; + ssize_t length; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(temp.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract string contents! (invalid type)"); + auto obj = reinterpret_steal(PYBIND11_BYTES_FROM_STRING_AND_SIZE(buffer, length)); + if (!obj) + pybind11_fail("Could not allocate bytes object!"); + m_ptr = obj.release().ptr(); +} + +inline str::str(const bytes& b) { + char *buffer; + ssize_t length; + if (PYBIND11_BYTES_AS_STRING_AND_SIZE(b.ptr(), &buffer, &length)) + pybind11_fail("Unable to extract bytes contents!"); + auto obj = reinterpret_steal(PyUnicode_FromStringAndSize(buffer, (ssize_t) length)); + if (!obj) + pybind11_fail("Could not allocate string object!"); + m_ptr = obj.release().ptr(); +} + +class none : public object { +public: + PYBIND11_OBJECT(none, object, detail::PyNone_Check) + none() : object(Py_None, borrowed_t{}) { } +}; + +#if PY_MAJOR_VERSION >= 3 +class ellipsis : public object { +public: + PYBIND11_OBJECT(ellipsis, object, detail::PyEllipsis_Check) + ellipsis() : object(Py_Ellipsis, borrowed_t{}) { } +}; +#endif + +class bool_ : public object { +public: + PYBIND11_OBJECT_CVT(bool_, object, PyBool_Check, raw_bool) + bool_() : object(Py_False, borrowed_t{}) { } + // Allow implicit conversion from and to `bool`: + bool_(bool value) : object(value ? Py_True : Py_False, borrowed_t{}) { } + operator bool() const { return m_ptr && PyLong_AsLong(m_ptr) != 0; } + +private: + /// Return the truth value of an object -- always returns a new reference + static PyObject *raw_bool(PyObject *op) { + const auto value = PyObject_IsTrue(op); + if (value == -1) return nullptr; + return handle(value ? Py_True : Py_False).inc_ref().ptr(); + } +}; + +NAMESPACE_BEGIN(detail) +// Converts a value to the given unsigned type. If an error occurs, you get back (Unsigned) -1; +// otherwise you get back the unsigned long or unsigned long long value cast to (Unsigned). +// (The distinction is critically important when casting a returned -1 error value to some other +// unsigned type: (A)-1 != (B)-1 when A and B are unsigned types of different sizes). +template +Unsigned as_unsigned(PyObject *o) { + if (sizeof(Unsigned) <= sizeof(unsigned long) +#if PY_VERSION_HEX < 0x03000000 + || PyInt_Check(o) +#endif + ) { + unsigned long v = PyLong_AsUnsignedLong(o); + return v == (unsigned long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v; + } + else { + unsigned long long v = PyLong_AsUnsignedLongLong(o); + return v == (unsigned long long) -1 && PyErr_Occurred() ? (Unsigned) -1 : (Unsigned) v; + } +} +NAMESPACE_END(detail) + +class int_ : public object { +public: + PYBIND11_OBJECT_CVT(int_, object, PYBIND11_LONG_CHECK, PyNumber_Long) + int_() : object(PyLong_FromLong(0), stolen_t{}) { } + // Allow implicit conversion from C++ integral types: + template ::value, int> = 0> + int_(T value) { + if (sizeof(T) <= sizeof(long)) { + if (std::is_signed::value) + m_ptr = PyLong_FromLong((long) value); + else + m_ptr = PyLong_FromUnsignedLong((unsigned long) value); + } else { + if (std::is_signed::value) + m_ptr = PyLong_FromLongLong((long long) value); + else + m_ptr = PyLong_FromUnsignedLongLong((unsigned long long) value); + } + if (!m_ptr) pybind11_fail("Could not allocate int object!"); + } + + template ::value, int> = 0> + operator T() const { + return std::is_unsigned::value + ? detail::as_unsigned(m_ptr) + : sizeof(T) <= sizeof(long) + ? (T) PyLong_AsLong(m_ptr) + : (T) PYBIND11_LONG_AS_LONGLONG(m_ptr); + } +}; + +class float_ : public object { +public: + PYBIND11_OBJECT_CVT(float_, object, PyFloat_Check, PyNumber_Float) + // Allow implicit conversion from float/double: + float_(float value) : object(PyFloat_FromDouble((double) value), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate float object!"); + } + float_(double value = .0) : object(PyFloat_FromDouble((double) value), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate float object!"); + } + operator float() const { return (float) PyFloat_AsDouble(m_ptr); } + operator double() const { return (double) PyFloat_AsDouble(m_ptr); } +}; + +class weakref : public object { +public: + PYBIND11_OBJECT_DEFAULT(weakref, object, PyWeakref_Check) + explicit weakref(handle obj, handle callback = {}) + : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate weak reference!"); + } +}; + +class slice : public object { +public: + PYBIND11_OBJECT_DEFAULT(slice, object, PySlice_Check) + slice(ssize_t start_, ssize_t stop_, ssize_t step_) { + int_ start(start_), stop(stop_), step(step_); + m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr()); + if (!m_ptr) pybind11_fail("Could not allocate slice object!"); + } + bool compute(size_t length, size_t *start, size_t *stop, size_t *step, + size_t *slicelength) const { + return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr, + (ssize_t) length, (ssize_t *) start, + (ssize_t *) stop, (ssize_t *) step, + (ssize_t *) slicelength) == 0; + } + bool compute(ssize_t length, ssize_t *start, ssize_t *stop, ssize_t *step, + ssize_t *slicelength) const { + return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr, + length, start, + stop, step, + slicelength) == 0; + } +}; + +class capsule : public object { +public: + PYBIND11_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact) + PYBIND11_DEPRECATED("Use reinterpret_borrow() or reinterpret_steal()") + capsule(PyObject *ptr, bool is_borrowed) : object(is_borrowed ? object(ptr, borrowed_t{}) : object(ptr, stolen_t{})) { } + + explicit capsule(const void *value, const char *name = nullptr, void (*destructor)(PyObject *) = nullptr) + : object(PyCapsule_New(const_cast(value), name, destructor), stolen_t{}) { + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + PYBIND11_DEPRECATED("Please pass a destructor that takes a void pointer as input") + capsule(const void *value, void (*destruct)(PyObject *)) + : object(PyCapsule_New(const_cast(value), nullptr, destruct), stolen_t{}) { + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + capsule(const void *value, void (*destructor)(void *)) { + m_ptr = PyCapsule_New(const_cast(value), nullptr, [](PyObject *o) { + auto destructor = reinterpret_cast(PyCapsule_GetContext(o)); + void *ptr = PyCapsule_GetPointer(o, nullptr); + destructor(ptr); + }); + + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + + if (PyCapsule_SetContext(m_ptr, (void *) destructor) != 0) + pybind11_fail("Could not set capsule context!"); + } + + capsule(void (*destructor)()) { + m_ptr = PyCapsule_New(reinterpret_cast(destructor), nullptr, [](PyObject *o) { + auto destructor = reinterpret_cast(PyCapsule_GetPointer(o, nullptr)); + destructor(); + }); + + if (!m_ptr) + pybind11_fail("Could not allocate capsule object!"); + } + + template operator T *() const { + auto name = this->name(); + T * result = static_cast(PyCapsule_GetPointer(m_ptr, name)); + if (!result) pybind11_fail("Unable to extract capsule contents!"); + return result; + } + + const char *name() const { return PyCapsule_GetName(m_ptr); } +}; + +class tuple : public object { +public: + PYBIND11_OBJECT_CVT(tuple, object, PyTuple_Check, PySequence_Tuple) + explicit tuple(size_t size = 0) : object(PyTuple_New((ssize_t) size), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate tuple object!"); + } + size_t size() const { return (size_t) PyTuple_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::tuple_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::tuple_iterator begin() const { return {*this, 0}; } + detail::tuple_iterator end() const { return {*this, PyTuple_GET_SIZE(m_ptr)}; } +}; + +class dict : public object { +public: + PYBIND11_OBJECT_CVT(dict, object, PyDict_Check, raw_dict) + dict() : object(PyDict_New(), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate dict object!"); + } + template ...>::value>, + // MSVC workaround: it can't compile an out-of-line definition, so defer the collector + typename collector = detail::deferred_t, Args...>> + explicit dict(Args &&...args) : dict(collector(std::forward(args)...).kwargs()) { } + + size_t size() const { return (size_t) PyDict_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::dict_iterator begin() const { return {*this, 0}; } + detail::dict_iterator end() const { return {}; } + void clear() const { PyDict_Clear(ptr()); } + template bool contains(T &&key) const { + return PyDict_Contains(m_ptr, detail::object_or_cast(std::forward(key)).ptr()) == 1; + } + +private: + /// Call the `dict` Python type -- always returns a new reference + static PyObject *raw_dict(PyObject *op) { + if (PyDict_Check(op)) + return handle(op).inc_ref().ptr(); + return PyObject_CallFunctionObjArgs((PyObject *) &PyDict_Type, op, nullptr); + } +}; + +class sequence : public object { +public: + PYBIND11_OBJECT_DEFAULT(sequence, object, PySequence_Check) + size_t size() const { return (size_t) PySequence_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::sequence_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::sequence_iterator begin() const { return {*this, 0}; } + detail::sequence_iterator end() const { return {*this, PySequence_Size(m_ptr)}; } +}; + +class list : public object { +public: + PYBIND11_OBJECT_CVT(list, object, PyList_Check, PySequence_List) + explicit list(size_t size = 0) : object(PyList_New((ssize_t) size), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate list object!"); + } + size_t size() const { return (size_t) PyList_Size(m_ptr); } + bool empty() const { return size() == 0; } + detail::list_accessor operator[](size_t index) const { return {*this, index}; } + detail::item_accessor operator[](handle h) const { return object::operator[](h); } + detail::list_iterator begin() const { return {*this, 0}; } + detail::list_iterator end() const { return {*this, PyList_GET_SIZE(m_ptr)}; } + template void append(T &&val) const { + PyList_Append(m_ptr, detail::object_or_cast(std::forward(val)).ptr()); + } + template void insert(size_t index, T &&val) const { + PyList_Insert(m_ptr, static_cast(index), + detail::object_or_cast(std::forward(val)).ptr()); + } +}; + +class args : public tuple { PYBIND11_OBJECT_DEFAULT(args, tuple, PyTuple_Check) }; +class kwargs : public dict { PYBIND11_OBJECT_DEFAULT(kwargs, dict, PyDict_Check) }; + +class set : public object { +public: + PYBIND11_OBJECT_CVT(set, object, PySet_Check, PySet_New) + set() : object(PySet_New(nullptr), stolen_t{}) { + if (!m_ptr) pybind11_fail("Could not allocate set object!"); + } + size_t size() const { return (size_t) PySet_Size(m_ptr); } + bool empty() const { return size() == 0; } + template bool add(T &&val) const { + return PySet_Add(m_ptr, detail::object_or_cast(std::forward(val)).ptr()) == 0; + } + void clear() const { PySet_Clear(m_ptr); } + template bool contains(T &&val) const { + return PySet_Contains(m_ptr, detail::object_or_cast(std::forward(val)).ptr()) == 1; + } +}; + +class function : public object { +public: + PYBIND11_OBJECT_DEFAULT(function, object, PyCallable_Check) + handle cpp_function() const { + handle fun = detail::get_function(m_ptr); + if (fun && PyCFunction_Check(fun.ptr())) + return fun; + return handle(); + } + bool is_cpp_function() const { return (bool) cpp_function(); } +}; + +class staticmethod : public object { +public: + PYBIND11_OBJECT_CVT(staticmethod, object, detail::PyStaticMethod_Check, PyStaticMethod_New) +}; + +class buffer : public object { +public: + PYBIND11_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer) + + buffer_info request(bool writable = false) const { + int flags = PyBUF_STRIDES | PyBUF_FORMAT; + if (writable) flags |= PyBUF_WRITABLE; + Py_buffer *view = new Py_buffer(); + if (PyObject_GetBuffer(m_ptr, view, flags) != 0) { + delete view; + throw error_already_set(); + } + return buffer_info(view); + } +}; + +class memoryview : public object { +public: + explicit memoryview(const buffer_info& info) { + static Py_buffer buf { }; + // Py_buffer uses signed sizes, strides and shape!.. + static std::vector py_strides { }; + static std::vector py_shape { }; + buf.buf = info.ptr; + buf.itemsize = info.itemsize; + buf.format = const_cast(info.format.c_str()); + buf.ndim = (int) info.ndim; + buf.len = info.size; + py_strides.clear(); + py_shape.clear(); + for (size_t i = 0; i < (size_t) info.ndim; ++i) { + py_strides.push_back(info.strides[i]); + py_shape.push_back(info.shape[i]); + } + buf.strides = py_strides.data(); + buf.shape = py_shape.data(); + buf.suboffsets = nullptr; + buf.readonly = info.readonly; + buf.internal = nullptr; + + m_ptr = PyMemoryView_FromBuffer(&buf); + if (!m_ptr) + pybind11_fail("Unable to create memoryview from buffer descriptor"); + } + + PYBIND11_OBJECT_CVT(memoryview, object, PyMemoryView_Check, PyMemoryView_FromObject) +}; +/// @} pytypes + +/// \addtogroup python_builtins +/// @{ +inline size_t len(handle h) { + ssize_t result = PyObject_Length(h.ptr()); + if (result < 0) + pybind11_fail("Unable to compute length of object"); + return (size_t) result; +} + +inline size_t len_hint(handle h) { +#if PY_VERSION_HEX >= 0x03040000 + ssize_t result = PyObject_LengthHint(h.ptr(), 0); +#else + ssize_t result = PyObject_Length(h.ptr()); +#endif + if (result < 0) { + // Sometimes a length can't be determined at all (eg generators) + // In which case simply return 0 + PyErr_Clear(); + return 0; + } + return (size_t) result; +} + +inline str repr(handle h) { + PyObject *str_value = PyObject_Repr(h.ptr()); + if (!str_value) throw error_already_set(); +#if PY_MAJOR_VERSION < 3 + PyObject *unicode = PyUnicode_FromEncodedObject(str_value, "utf-8", nullptr); + Py_XDECREF(str_value); str_value = unicode; + if (!str_value) throw error_already_set(); +#endif + return reinterpret_steal(str_value); +} + +inline iterator iter(handle obj) { + PyObject *result = PyObject_GetIter(obj.ptr()); + if (!result) { throw error_already_set(); } + return reinterpret_steal(result); +} +/// @} python_builtins + +NAMESPACE_BEGIN(detail) +template iterator object_api::begin() const { return iter(derived()); } +template iterator object_api::end() const { return iterator::sentinel(); } +template item_accessor object_api::operator[](handle key) const { + return {derived(), reinterpret_borrow(key)}; +} +template item_accessor object_api::operator[](const char *key) const { + return {derived(), pybind11::str(key)}; +} +template obj_attr_accessor object_api::attr(handle key) const { + return {derived(), reinterpret_borrow(key)}; +} +template str_attr_accessor object_api::attr(const char *key) const { + return {derived(), key}; +} +template args_proxy object_api::operator*() const { + return args_proxy(derived().ptr()); +} +template template bool object_api::contains(T &&item) const { + return attr("__contains__")(std::forward(item)).template cast(); +} + +template +pybind11::str object_api::str() const { return pybind11::str(derived()); } + +template +str_attr_accessor object_api::doc() const { return attr("__doc__"); } + +template +handle object_api::get_type() const { return (PyObject *) Py_TYPE(derived().ptr()); } + +template +bool object_api::rich_compare(object_api const &other, int value) const { + int rv = PyObject_RichCompareBool(derived().ptr(), other.derived().ptr(), value); + if (rv == -1) + throw error_already_set(); + return rv == 1; +} + +#define PYBIND11_MATH_OPERATOR_UNARY(op, fn) \ + template object object_api::op() const { \ + object result = reinterpret_steal(fn(derived().ptr())); \ + if (!result.ptr()) \ + throw error_already_set(); \ + return result; \ + } + +#define PYBIND11_MATH_OPERATOR_BINARY(op, fn) \ + template \ + object object_api::op(object_api const &other) const { \ + object result = reinterpret_steal( \ + fn(derived().ptr(), other.derived().ptr())); \ + if (!result.ptr()) \ + throw error_already_set(); \ + return result; \ + } + +PYBIND11_MATH_OPERATOR_UNARY (operator~, PyNumber_Invert) +PYBIND11_MATH_OPERATOR_UNARY (operator-, PyNumber_Negative) +PYBIND11_MATH_OPERATOR_BINARY(operator+, PyNumber_Add) +PYBIND11_MATH_OPERATOR_BINARY(operator+=, PyNumber_InPlaceAdd) +PYBIND11_MATH_OPERATOR_BINARY(operator-, PyNumber_Subtract) +PYBIND11_MATH_OPERATOR_BINARY(operator-=, PyNumber_InPlaceSubtract) +PYBIND11_MATH_OPERATOR_BINARY(operator*, PyNumber_Multiply) +PYBIND11_MATH_OPERATOR_BINARY(operator*=, PyNumber_InPlaceMultiply) +PYBIND11_MATH_OPERATOR_BINARY(operator/, PyNumber_TrueDivide) +PYBIND11_MATH_OPERATOR_BINARY(operator/=, PyNumber_InPlaceTrueDivide) +PYBIND11_MATH_OPERATOR_BINARY(operator|, PyNumber_Or) +PYBIND11_MATH_OPERATOR_BINARY(operator|=, PyNumber_InPlaceOr) +PYBIND11_MATH_OPERATOR_BINARY(operator&, PyNumber_And) +PYBIND11_MATH_OPERATOR_BINARY(operator&=, PyNumber_InPlaceAnd) +PYBIND11_MATH_OPERATOR_BINARY(operator^, PyNumber_Xor) +PYBIND11_MATH_OPERATOR_BINARY(operator^=, PyNumber_InPlaceXor) +PYBIND11_MATH_OPERATOR_BINARY(operator<<, PyNumber_Lshift) +PYBIND11_MATH_OPERATOR_BINARY(operator<<=, PyNumber_InPlaceLshift) +PYBIND11_MATH_OPERATOR_BINARY(operator>>, PyNumber_Rshift) +PYBIND11_MATH_OPERATOR_BINARY(operator>>=, PyNumber_InPlaceRshift) + +#undef PYBIND11_MATH_OPERATOR_UNARY +#undef PYBIND11_MATH_OPERATOR_BINARY + +NAMESPACE_END(detail) +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/include/pybind11/stl.h b/thirdparty/pybind11/include/pybind11/stl.h new file mode 100644 index 000000000..32f8d294a --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/stl.h @@ -0,0 +1,386 @@ +/* + pybind11/stl.h: Transparent conversion for STL data types + + Copyright (c) 2016 Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "pybind11.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: Conditional expression is constant +#endif + +#ifdef __has_include +// std::optional (but including it in c++14 mode isn't allowed) +# if defined(PYBIND11_CPP17) && __has_include() +# include +# define PYBIND11_HAS_OPTIONAL 1 +# endif +// std::experimental::optional (but not allowed in c++11 mode) +# if defined(PYBIND11_CPP14) && (__has_include() && \ + !__has_include()) +# include +# define PYBIND11_HAS_EXP_OPTIONAL 1 +# endif +// std::variant +# if defined(PYBIND11_CPP17) && __has_include() +# include +# define PYBIND11_HAS_VARIANT 1 +# endif +#elif defined(_MSC_VER) && defined(PYBIND11_CPP17) +# include +# include +# define PYBIND11_HAS_OPTIONAL 1 +# define PYBIND11_HAS_VARIANT 1 +#endif + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +/// Extracts an const lvalue reference or rvalue reference for U based on the type of T (e.g. for +/// forwarding a container element). Typically used indirect via forwarded_type(), below. +template +using forwarded_type = conditional_t< + std::is_lvalue_reference::value, remove_reference_t &, remove_reference_t &&>; + +/// Forwards a value U as rvalue or lvalue according to whether T is rvalue or lvalue; typically +/// used for forwarding a container's elements. +template +forwarded_type forward_like(U &&u) { + return std::forward>(std::forward(u)); +} + +template struct set_caster { + using type = Type; + using key_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto s = reinterpret_borrow(src); + value.clear(); + for (auto entry : s) { + key_conv conv; + if (!conv.load(entry, convert)) + return false; + value.insert(cast_op(std::move(conv))); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + if (!std::is_lvalue_reference::value) + policy = return_value_policy_override::policy(policy); + pybind11::set s; + for (auto &&value : src) { + auto value_ = reinterpret_steal(key_conv::cast(forward_like(value), policy, parent)); + if (!value_ || !s.add(value_)) + return handle(); + } + return s.release(); + } + + PYBIND11_TYPE_CASTER(type, _("Set[") + key_conv::name + _("]")); +}; + +template struct map_caster { + using key_conv = make_caster; + using value_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto d = reinterpret_borrow(src); + value.clear(); + for (auto it : d) { + key_conv kconv; + value_conv vconv; + if (!kconv.load(it.first.ptr(), convert) || + !vconv.load(it.second.ptr(), convert)) + return false; + value.emplace(cast_op(std::move(kconv)), cast_op(std::move(vconv))); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + dict d; + return_value_policy policy_key = policy; + return_value_policy policy_value = policy; + if (!std::is_lvalue_reference::value) { + policy_key = return_value_policy_override::policy(policy_key); + policy_value = return_value_policy_override::policy(policy_value); + } + for (auto &&kv : src) { + auto key = reinterpret_steal(key_conv::cast(forward_like(kv.first), policy_key, parent)); + auto value = reinterpret_steal(value_conv::cast(forward_like(kv.second), policy_value, parent)); + if (!key || !value) + return handle(); + d[key] = value; + } + return d.release(); + } + + PYBIND11_TYPE_CASTER(Type, _("Dict[") + key_conv::name + _(", ") + value_conv::name + _("]")); +}; + +template struct list_caster { + using value_conv = make_caster; + + bool load(handle src, bool convert) { + if (!isinstance(src) || isinstance(src)) + return false; + auto s = reinterpret_borrow(src); + value.clear(); + reserve_maybe(s, &value); + for (auto it : s) { + value_conv conv; + if (!conv.load(it, convert)) + return false; + value.push_back(cast_op(std::move(conv))); + } + return true; + } + +private: + template ().reserve(0)), void>::value, int> = 0> + void reserve_maybe(sequence s, Type *) { value.reserve(s.size()); } + void reserve_maybe(sequence, void *) { } + +public: + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + if (!std::is_lvalue_reference::value) + policy = return_value_policy_override::policy(policy); + list l(src.size()); + size_t index = 0; + for (auto &&value : src) { + auto value_ = reinterpret_steal(value_conv::cast(forward_like(value), policy, parent)); + if (!value_) + return handle(); + PyList_SET_ITEM(l.ptr(), (ssize_t) index++, value_.release().ptr()); // steals a reference + } + return l.release(); + } + + PYBIND11_TYPE_CASTER(Type, _("List[") + value_conv::name + _("]")); +}; + +template struct type_caster> + : list_caster, Type> { }; + +template struct type_caster> + : list_caster, Type> { }; + +template struct type_caster> + : list_caster, Type> { }; + +template struct array_caster { + using value_conv = make_caster; + +private: + template + bool require_size(enable_if_t size) { + if (value.size() != size) + value.resize(size); + return true; + } + template + bool require_size(enable_if_t size) { + return size == Size; + } + +public: + bool load(handle src, bool convert) { + if (!isinstance(src)) + return false; + auto l = reinterpret_borrow(src); + if (!require_size(l.size())) + return false; + size_t ctr = 0; + for (auto it : l) { + value_conv conv; + if (!conv.load(it, convert)) + return false; + value[ctr++] = cast_op(std::move(conv)); + } + return true; + } + + template + static handle cast(T &&src, return_value_policy policy, handle parent) { + list l(src.size()); + size_t index = 0; + for (auto &&value : src) { + auto value_ = reinterpret_steal(value_conv::cast(forward_like(value), policy, parent)); + if (!value_) + return handle(); + PyList_SET_ITEM(l.ptr(), (ssize_t) index++, value_.release().ptr()); // steals a reference + } + return l.release(); + } + + PYBIND11_TYPE_CASTER(ArrayType, _("List[") + value_conv::name + _(_(""), _("[") + _() + _("]")) + _("]")); +}; + +template struct type_caster> + : array_caster, Type, false, Size> { }; + +template struct type_caster> + : array_caster, Type, true> { }; + +template struct type_caster> + : set_caster, Key> { }; + +template struct type_caster> + : set_caster, Key> { }; + +template struct type_caster> + : map_caster, Key, Value> { }; + +template struct type_caster> + : map_caster, Key, Value> { }; + +// This type caster is intended to be used for std::optional and std::experimental::optional +template struct optional_caster { + using value_conv = make_caster; + + template + static handle cast(T_ &&src, return_value_policy policy, handle parent) { + if (!src) + return none().inc_ref(); + policy = return_value_policy_override::policy(policy); + return value_conv::cast(*std::forward(src), policy, parent); + } + + bool load(handle src, bool convert) { + if (!src) { + return false; + } else if (src.is_none()) { + return true; // default-constructed value is already empty + } + value_conv inner_caster; + if (!inner_caster.load(src, convert)) + return false; + + value.emplace(cast_op(std::move(inner_caster))); + return true; + } + + PYBIND11_TYPE_CASTER(T, _("Optional[") + value_conv::name + _("]")); +}; + +#if PYBIND11_HAS_OPTIONAL +template struct type_caster> + : public optional_caster> {}; + +template<> struct type_caster + : public void_caster {}; +#endif + +#if PYBIND11_HAS_EXP_OPTIONAL +template struct type_caster> + : public optional_caster> {}; + +template<> struct type_caster + : public void_caster {}; +#endif + +/// Visit a variant and cast any found type to Python +struct variant_caster_visitor { + return_value_policy policy; + handle parent; + + using result_type = handle; // required by boost::variant in C++11 + + template + result_type operator()(T &&src) const { + return make_caster::cast(std::forward(src), policy, parent); + } +}; + +/// Helper class which abstracts away variant's `visit` function. `std::variant` and similar +/// `namespace::variant` types which provide a `namespace::visit()` function are handled here +/// automatically using argument-dependent lookup. Users can provide specializations for other +/// variant-like classes, e.g. `boost::variant` and `boost::apply_visitor`. +template class Variant> +struct visit_helper { + template + static auto call(Args &&...args) -> decltype(visit(std::forward(args)...)) { + return visit(std::forward(args)...); + } +}; + +/// Generic variant caster +template struct variant_caster; + +template class V, typename... Ts> +struct variant_caster> { + static_assert(sizeof...(Ts) > 0, "Variant must consist of at least one alternative."); + + template + bool load_alternative(handle src, bool convert, type_list) { + auto caster = make_caster(); + if (caster.load(src, convert)) { + value = cast_op(caster); + return true; + } + return load_alternative(src, convert, type_list{}); + } + + bool load_alternative(handle, bool, type_list<>) { return false; } + + bool load(handle src, bool convert) { + // Do a first pass without conversions to improve constructor resolution. + // E.g. `py::int_(1).cast>()` needs to fill the `int` + // slot of the variant. Without two-pass loading `double` would be filled + // because it appears first and a conversion is possible. + if (convert && load_alternative(src, false, type_list{})) + return true; + return load_alternative(src, convert, type_list{}); + } + + template + static handle cast(Variant &&src, return_value_policy policy, handle parent) { + return visit_helper::call(variant_caster_visitor{policy, parent}, + std::forward(src)); + } + + using Type = V; + PYBIND11_TYPE_CASTER(Type, _("Union[") + detail::concat(make_caster::name...) + _("]")); +}; + +#if PYBIND11_HAS_VARIANT +template +struct type_caster> : variant_caster> { }; +#endif + +NAMESPACE_END(detail) + +inline std::ostream &operator<<(std::ostream &os, const handle &obj) { + os << (std::string) str(obj); + return os; +} + +NAMESPACE_END(PYBIND11_NAMESPACE) + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif diff --git a/thirdparty/pybind11/include/pybind11/stl_bind.h b/thirdparty/pybind11/include/pybind11/stl_bind.h new file mode 100644 index 000000000..da233eca9 --- /dev/null +++ b/thirdparty/pybind11/include/pybind11/stl_bind.h @@ -0,0 +1,656 @@ +/* + pybind11/std_bind.h: Binding generators for STL data types + + Copyright (c) 2016 Sergey Lyskov and Wenzel Jakob + + All rights reserved. Use of this source code is governed by a + BSD-style license that can be found in the LICENSE file. +*/ + +#pragma once + +#include "detail/common.h" +#include "operators.h" + +#include +#include + +NAMESPACE_BEGIN(PYBIND11_NAMESPACE) +NAMESPACE_BEGIN(detail) + +/* SFINAE helper class used by 'is_comparable */ +template struct container_traits { + template static std::true_type test_comparable(decltype(std::declval() == std::declval())*); + template static std::false_type test_comparable(...); + template static std::true_type test_value(typename T2::value_type *); + template static std::false_type test_value(...); + template static std::true_type test_pair(typename T2::first_type *, typename T2::second_type *); + template static std::false_type test_pair(...); + + static constexpr const bool is_comparable = std::is_same(nullptr))>::value; + static constexpr const bool is_pair = std::is_same(nullptr, nullptr))>::value; + static constexpr const bool is_vector = std::is_same(nullptr))>::value; + static constexpr const bool is_element = !is_pair && !is_vector; +}; + +/* Default: is_comparable -> std::false_type */ +template +struct is_comparable : std::false_type { }; + +/* For non-map data structures, check whether operator== can be instantiated */ +template +struct is_comparable< + T, enable_if_t::is_element && + container_traits::is_comparable>> + : std::true_type { }; + +/* For a vector/map data structure, recursively check the value type (which is std::pair for maps) */ +template +struct is_comparable::is_vector>> { + static constexpr const bool value = + is_comparable::value; +}; + +/* For pairs, recursively check the two data types */ +template +struct is_comparable::is_pair>> { + static constexpr const bool value = + is_comparable::value && + is_comparable::value; +}; + +/* Fallback functions */ +template void vector_if_copy_constructible(const Args &...) { } +template void vector_if_equal_operator(const Args &...) { } +template void vector_if_insertion_operator(const Args &...) { } +template void vector_modifiers(const Args &...) { } + +template +void vector_if_copy_constructible(enable_if_t::value, Class_> &cl) { + cl.def(init(), "Copy constructor"); +} + +template +void vector_if_equal_operator(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + + cl.def(self == self); + cl.def(self != self); + + cl.def("count", + [](const Vector &v, const T &x) { + return std::count(v.begin(), v.end(), x); + }, + arg("x"), + "Return the number of times ``x`` appears in the list" + ); + + cl.def("remove", [](Vector &v, const T &x) { + auto p = std::find(v.begin(), v.end(), x); + if (p != v.end()) + v.erase(p); + else + throw value_error(); + }, + arg("x"), + "Remove the first item from the list whose value is x. " + "It is an error if there is no such item." + ); + + cl.def("__contains__", + [](const Vector &v, const T &x) { + return std::find(v.begin(), v.end(), x) != v.end(); + }, + arg("x"), + "Return true the container contains ``x``" + ); +} + +// Vector modifiers -- requires a copyable vector_type: +// (Technically, some of these (pop and __delitem__) don't actually require copyability, but it seems +// silly to allow deletion but not insertion, so include them here too.) +template +void vector_modifiers(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + + auto wrap_i = [](DiffType i, SizeType n) { + if (i < 0) + i += n; + if (i < 0 || (SizeType)i >= n) + throw index_error(); + return i; + }; + + cl.def("append", + [](Vector &v, const T &value) { v.push_back(value); }, + arg("x"), + "Add an item to the end of the list"); + + cl.def(init([](iterable it) { + auto v = std::unique_ptr(new Vector()); + v->reserve(len_hint(it)); + for (handle h : it) + v->push_back(h.cast()); + return v.release(); + })); + + cl.def("clear", + [](Vector &v) { + v.clear(); + }, + "Clear the contents" + ); + + cl.def("extend", + [](Vector &v, const Vector &src) { + v.insert(v.end(), src.begin(), src.end()); + }, + arg("L"), + "Extend the list by appending all the items in the given list" + ); + + cl.def("extend", + [](Vector &v, iterable it) { + const size_t old_size = v.size(); + v.reserve(old_size + len_hint(it)); + try { + for (handle h : it) { + v.push_back(h.cast()); + } + } catch (const cast_error &) { + v.erase(v.begin() + static_cast(old_size), v.end()); + try { + v.shrink_to_fit(); + } catch (const std::exception &) { + // Do nothing + } + throw; + } + }, + arg("L"), + "Extend the list by appending all the items in the given list" + ); + + cl.def("insert", + [](Vector &v, DiffType i, const T &x) { + // Can't use wrap_i; i == v.size() is OK + if (i < 0) + i += v.size(); + if (i < 0 || (SizeType)i > v.size()) + throw index_error(); + v.insert(v.begin() + i, x); + }, + arg("i") , arg("x"), + "Insert an item at a given position." + ); + + cl.def("pop", + [](Vector &v) { + if (v.empty()) + throw index_error(); + T t = v.back(); + v.pop_back(); + return t; + }, + "Remove and return the last item" + ); + + cl.def("pop", + [wrap_i](Vector &v, DiffType i) { + i = wrap_i(i, v.size()); + T t = v[(SizeType) i]; + v.erase(v.begin() + i); + return t; + }, + arg("i"), + "Remove and return the item at index ``i``" + ); + + cl.def("__setitem__", + [wrap_i](Vector &v, DiffType i, const T &t) { + i = wrap_i(i, v.size()); + v[(SizeType)i] = t; + } + ); + + /// Slicing protocol + cl.def("__getitem__", + [](const Vector &v, slice slice) -> Vector * { + size_t start, stop, step, slicelength; + + if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) + throw error_already_set(); + + Vector *seq = new Vector(); + seq->reserve((size_t) slicelength); + + for (size_t i=0; ipush_back(v[start]); + start += step; + } + return seq; + }, + arg("s"), + "Retrieve list elements using a slice object" + ); + + cl.def("__setitem__", + [](Vector &v, slice slice, const Vector &value) { + size_t start, stop, step, slicelength; + if (!slice.compute(v.size(), &start, &stop, &step, &slicelength)) + throw error_already_set(); + + if (slicelength != value.size()) + throw std::runtime_error("Left and right hand size of slice assignment have different sizes!"); + + for (size_t i=0; i), +// we have to access by copying; otherwise we return by reference. +template using vector_needs_copy = negation< + std::is_same()[typename Vector::size_type()]), typename Vector::value_type &>>; + +// The usual case: access and iterate by reference +template +void vector_accessor(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + using ItType = typename Vector::iterator; + + auto wrap_i = [](DiffType i, SizeType n) { + if (i < 0) + i += n; + if (i < 0 || (SizeType)i >= n) + throw index_error(); + return i; + }; + + cl.def("__getitem__", + [wrap_i](Vector &v, DiffType i) -> T & { + i = wrap_i(i, v.size()); + return v[(SizeType)i]; + }, + return_value_policy::reference_internal // ref + keepalive + ); + + cl.def("__iter__", + [](Vector &v) { + return make_iterator< + return_value_policy::reference_internal, ItType, ItType, T&>( + v.begin(), v.end()); + }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); +} + +// The case for special objects, like std::vector, that have to be returned-by-copy: +template +void vector_accessor(enable_if_t::value, Class_> &cl) { + using T = typename Vector::value_type; + using SizeType = typename Vector::size_type; + using DiffType = typename Vector::difference_type; + using ItType = typename Vector::iterator; + cl.def("__getitem__", + [](const Vector &v, DiffType i) -> T { + if (i < 0 && (i += v.size()) < 0) + throw index_error(); + if ((SizeType)i >= v.size()) + throw index_error(); + return v[(SizeType)i]; + } + ); + + cl.def("__iter__", + [](Vector &v) { + return make_iterator< + return_value_policy::copy, ItType, ItType, T>( + v.begin(), v.end()); + }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); +} + +template auto vector_if_insertion_operator(Class_ &cl, std::string const &name) + -> decltype(std::declval() << std::declval(), void()) { + using size_type = typename Vector::size_type; + + cl.def("__repr__", + [name](Vector &v) { + std::ostringstream s; + s << name << '['; + for (size_type i=0; i < v.size(); ++i) { + s << v[i]; + if (i != v.size() - 1) + s << ", "; + } + s << ']'; + return s.str(); + }, + "Return the canonical string representation of this list." + ); +} + +// Provide the buffer interface for vectors if we have data() and we have a format for it +// GCC seems to have "void std::vector::data()" - doing SFINAE on the existence of data() is insufficient, we need to check it returns an appropriate pointer +template +struct vector_has_data_and_format : std::false_type {}; +template +struct vector_has_data_and_format::format(), std::declval().data()), typename Vector::value_type*>::value>> : std::true_type {}; + +// Add the buffer interface to a vector +template +enable_if_t...>::value> +vector_buffer(Class_& cl) { + using T = typename Vector::value_type; + + static_assert(vector_has_data_and_format::value, "There is not an appropriate format descriptor for this vector"); + + // numpy.h declares this for arbitrary types, but it may raise an exception and crash hard at runtime if PYBIND11_NUMPY_DTYPE hasn't been called, so check here + format_descriptor::format(); + + cl.def_buffer([](Vector& v) -> buffer_info { + return buffer_info(v.data(), static_cast(sizeof(T)), format_descriptor::format(), 1, {v.size()}, {sizeof(T)}); + }); + + cl.def(init([](buffer buf) { + auto info = buf.request(); + if (info.ndim != 1 || info.strides[0] % static_cast(sizeof(T))) + throw type_error("Only valid 1D buffers can be copied to a vector"); + if (!detail::compare_buffer_info::compare(info) || (ssize_t) sizeof(T) != info.itemsize) + throw type_error("Format mismatch (Python: " + info.format + " C++: " + format_descriptor::format() + ")"); + + auto vec = std::unique_ptr(new Vector()); + vec->reserve((size_t) info.shape[0]); + T *p = static_cast(info.ptr); + ssize_t step = info.strides[0] / static_cast(sizeof(T)); + T *end = p + info.shape[0] * step; + for (; p != end; p += step) + vec->push_back(*p); + return vec.release(); + })); + + return; +} + +template +enable_if_t...>::value> vector_buffer(Class_&) {} + +NAMESPACE_END(detail) + +// +// std::vector +// +template , typename... Args> +class_ bind_vector(handle scope, std::string const &name, Args&&... args) { + using Class_ = class_; + + // If the value_type is unregistered (e.g. a converting type) or is itself registered + // module-local then make the vector binding module-local as well: + using vtype = typename Vector::value_type; + auto vtype_info = detail::get_type_info(typeid(vtype)); + bool local = !vtype_info || vtype_info->module_local; + + Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward(args)...); + + // Declare the buffer interface if a buffer_protocol() is passed in + detail::vector_buffer(cl); + + cl.def(init<>()); + + // Register copy constructor (if possible) + detail::vector_if_copy_constructible(cl); + + // Register comparison-related operators and functions (if possible) + detail::vector_if_equal_operator(cl); + + // Register stream insertion operator (if possible) + detail::vector_if_insertion_operator(cl, name); + + // Modifiers require copyable vector value type + detail::vector_modifiers(cl); + + // Accessor and iterator; return by value if copyable, otherwise we return by ref + keep-alive + detail::vector_accessor(cl); + + cl.def("__bool__", + [](const Vector &v) -> bool { + return !v.empty(); + }, + "Check whether the list is nonempty" + ); + + cl.def("__len__", &Vector::size); + + + + +#if 0 + // C++ style functions deprecated, leaving it here as an example + cl.def(init()); + + cl.def("resize", + (void (Vector::*) (size_type count)) & Vector::resize, + "changes the number of elements stored"); + + cl.def("erase", + [](Vector &v, SizeType i) { + if (i >= v.size()) + throw index_error(); + v.erase(v.begin() + i); + }, "erases element at index ``i``"); + + cl.def("empty", &Vector::empty, "checks whether the container is empty"); + cl.def("size", &Vector::size, "returns the number of elements"); + cl.def("push_back", (void (Vector::*)(const T&)) &Vector::push_back, "adds an element to the end"); + cl.def("pop_back", &Vector::pop_back, "removes the last element"); + + cl.def("max_size", &Vector::max_size, "returns the maximum possible number of elements"); + cl.def("reserve", &Vector::reserve, "reserves storage"); + cl.def("capacity", &Vector::capacity, "returns the number of elements that can be held in currently allocated storage"); + cl.def("shrink_to_fit", &Vector::shrink_to_fit, "reduces memory usage by freeing unused memory"); + + cl.def("clear", &Vector::clear, "clears the contents"); + cl.def("swap", &Vector::swap, "swaps the contents"); + + cl.def("front", [](Vector &v) { + if (v.size()) return v.front(); + else throw index_error(); + }, "access the first element"); + + cl.def("back", [](Vector &v) { + if (v.size()) return v.back(); + else throw index_error(); + }, "access the last element "); + +#endif + + return cl; +} + + + +// +// std::map, std::unordered_map +// + +NAMESPACE_BEGIN(detail) + +/* Fallback functions */ +template void map_if_insertion_operator(const Args &...) { } +template void map_assignment(const Args &...) { } + +// Map assignment when copy-assignable: just copy the value +template +void map_assignment(enable_if_t::value, Class_> &cl) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + + cl.def("__setitem__", + [](Map &m, const KeyType &k, const MappedType &v) { + auto it = m.find(k); + if (it != m.end()) it->second = v; + else m.emplace(k, v); + } + ); +} + +// Not copy-assignable, but still copy-constructible: we can update the value by erasing and reinserting +template +void map_assignment(enable_if_t< + !is_copy_assignable::value && + is_copy_constructible::value, + Class_> &cl) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + + cl.def("__setitem__", + [](Map &m, const KeyType &k, const MappedType &v) { + // We can't use m[k] = v; because value type might not be default constructable + auto r = m.emplace(k, v); + if (!r.second) { + // value type is not copy assignable so the only way to insert it is to erase it first... + m.erase(r.first); + m.emplace(k, v); + } + } + ); +} + + +template auto map_if_insertion_operator(Class_ &cl, std::string const &name) +-> decltype(std::declval() << std::declval() << std::declval(), void()) { + + cl.def("__repr__", + [name](Map &m) { + std::ostringstream s; + s << name << '{'; + bool f = false; + for (auto const &kv : m) { + if (f) + s << ", "; + s << kv.first << ": " << kv.second; + f = true; + } + s << '}'; + return s.str(); + }, + "Return the canonical string representation of this map." + ); +} + + +NAMESPACE_END(detail) + +template , typename... Args> +class_ bind_map(handle scope, const std::string &name, Args&&... args) { + using KeyType = typename Map::key_type; + using MappedType = typename Map::mapped_type; + using Class_ = class_; + + // If either type is a non-module-local bound type then make the map binding non-local as well; + // otherwise (e.g. both types are either module-local or converting) the map will be + // module-local. + auto tinfo = detail::get_type_info(typeid(MappedType)); + bool local = !tinfo || tinfo->module_local; + if (local) { + tinfo = detail::get_type_info(typeid(KeyType)); + local = !tinfo || tinfo->module_local; + } + + Class_ cl(scope, name.c_str(), pybind11::module_local(local), std::forward(args)...); + + cl.def(init<>()); + + // Register stream insertion operator (if possible) + detail::map_if_insertion_operator(cl, name); + + cl.def("__bool__", + [](const Map &m) -> bool { return !m.empty(); }, + "Check whether the map is nonempty" + ); + + cl.def("__iter__", + [](Map &m) { return make_key_iterator(m.begin(), m.end()); }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); + + cl.def("items", + [](Map &m) { return make_iterator(m.begin(), m.end()); }, + keep_alive<0, 1>() /* Essential: keep list alive while iterator exists */ + ); + + cl.def("__getitem__", + [](Map &m, const KeyType &k) -> MappedType & { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + return it->second; + }, + return_value_policy::reference_internal // ref + keepalive + ); + + cl.def("__contains__", + [](Map &m, const KeyType &k) -> bool { + auto it = m.find(k); + if (it == m.end()) + return false; + return true; + } + ); + + // Assignment provided only if the type is copyable + detail::map_assignment(cl); + + cl.def("__delitem__", + [](Map &m, const KeyType &k) { + auto it = m.find(k); + if (it == m.end()) + throw key_error(); + m.erase(it); + } + ); + + cl.def("__len__", &Map::size); + + return cl; +} + +NAMESPACE_END(PYBIND11_NAMESPACE) diff --git a/thirdparty/pybind11/pybind11/__init__.py b/thirdparty/pybind11/pybind11/__init__.py new file mode 100644 index 000000000..4b1de3efa --- /dev/null +++ b/thirdparty/pybind11/pybind11/__init__.py @@ -0,0 +1,12 @@ +from ._version import version_info, __version__ # noqa: F401 imported but unused + + +def get_include(user=False): + import os + d = os.path.dirname(__file__) + if os.path.exists(os.path.join(d, "include")): + # Package is installed + return os.path.join(d, "include") + else: + # Package is from a source directory + return os.path.join(os.path.dirname(d), "include") diff --git a/thirdparty/pybind11/pybind11/__main__.py b/thirdparty/pybind11/pybind11/__main__.py new file mode 100644 index 000000000..89b263a8a --- /dev/null +++ b/thirdparty/pybind11/pybind11/__main__.py @@ -0,0 +1,36 @@ +from __future__ import print_function + +import argparse +import sys +import sysconfig + +from . import get_include + + +def print_includes(): + dirs = [sysconfig.get_path('include'), + sysconfig.get_path('platinclude'), + get_include()] + + # Make unique but preserve order + unique_dirs = [] + for d in dirs: + if d not in unique_dirs: + unique_dirs.append(d) + + print(' '.join('-I' + d for d in unique_dirs)) + + +def main(): + parser = argparse.ArgumentParser(prog='python -m pybind11') + parser.add_argument('--includes', action='store_true', + help='Include flags for both pybind11 and Python headers.') + args = parser.parse_args() + if not sys.argv[1:]: + parser.print_help() + if args.includes: + print_includes() + + +if __name__ == '__main__': + main() diff --git a/thirdparty/pybind11/pybind11/_version.py b/thirdparty/pybind11/pybind11/_version.py new file mode 100644 index 000000000..8d5aa5c76 --- /dev/null +++ b/thirdparty/pybind11/pybind11/_version.py @@ -0,0 +1,2 @@ +version_info = (2, 5, 0) +__version__ = '.'.join(map(str, version_info)) diff --git a/thirdparty/pybind11/setup.cfg b/thirdparty/pybind11/setup.cfg new file mode 100644 index 000000000..002f38d10 --- /dev/null +++ b/thirdparty/pybind11/setup.cfg @@ -0,0 +1,12 @@ +[bdist_wheel] +universal=1 + +[flake8] +max-line-length = 99 +show_source = True +exclude = .git, __pycache__, build, dist, docs, tools, venv +ignore = + # required for pretty matrix formatting: multiple spaces after `,` and `[` + E201, E241, W504, + # camelcase 'cPickle' imported as lowercase 'pickle' + N813 diff --git a/thirdparty/pybind11/setup.py b/thirdparty/pybind11/setup.py new file mode 100644 index 000000000..473ea1ee0 --- /dev/null +++ b/thirdparty/pybind11/setup.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python + +# Setup script for PyPI; use CMakeFile.txt to build extension modules + +from setuptools import setup +from distutils.command.install_headers import install_headers +from distutils.command.build_py import build_py +from pybind11 import __version__ +import os + +package_data = [ + 'include/pybind11/detail/class.h', + 'include/pybind11/detail/common.h', + 'include/pybind11/detail/descr.h', + 'include/pybind11/detail/init.h', + 'include/pybind11/detail/internals.h', + 'include/pybind11/detail/typeid.h', + 'include/pybind11/attr.h', + 'include/pybind11/buffer_info.h', + 'include/pybind11/cast.h', + 'include/pybind11/chrono.h', + 'include/pybind11/common.h', + 'include/pybind11/complex.h', + 'include/pybind11/eigen.h', + 'include/pybind11/embed.h', + 'include/pybind11/eval.h', + 'include/pybind11/functional.h', + 'include/pybind11/iostream.h', + 'include/pybind11/numpy.h', + 'include/pybind11/operators.h', + 'include/pybind11/options.h', + 'include/pybind11/pybind11.h', + 'include/pybind11/pytypes.h', + 'include/pybind11/stl.h', + 'include/pybind11/stl_bind.h', +] + +# Prevent installation of pybind11 headers by setting +# PYBIND11_USE_CMAKE. +if os.environ.get('PYBIND11_USE_CMAKE'): + headers = [] +else: + headers = package_data + + +class InstallHeaders(install_headers): + """Use custom header installer because the default one flattens subdirectories""" + def run(self): + if not self.distribution.headers: + return + + for header in self.distribution.headers: + subdir = os.path.dirname(os.path.relpath(header, 'include/pybind11')) + install_dir = os.path.join(self.install_dir, subdir) + self.mkpath(install_dir) + + (out, _) = self.copy_file(header, install_dir) + self.outfiles.append(out) + + +# Install the headers inside the package as well +class BuildPy(build_py): + def build_package_data(self): + build_py.build_package_data(self) + for header in package_data: + target = os.path.join(self.build_lib, 'pybind11', header) + self.mkpath(os.path.dirname(target)) + self.copy_file(header, target, preserve_mode=False) + + +setup( + name='pybind11', + version=__version__, + description='Seamless operability between C++11 and Python', + author='Wenzel Jakob', + author_email='wenzel.jakob@epfl.ch', + url='https://github.com/pybind/pybind11', + download_url='https://github.com/pybind/pybind11/tarball/v' + __version__, + packages=['pybind11'], + license='BSD', + headers=headers, + zip_safe=False, + cmdclass=dict(install_headers=InstallHeaders, build_py=BuildPy), + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Topic :: Software Development :: Libraries :: Python Modules', + 'Topic :: Utilities', + 'Programming Language :: C++', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.2', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'License :: OSI Approved :: BSD License' + ], + keywords='C++11, Python bindings', + long_description="""pybind11 is a lightweight header-only library that +exposes C++ types in Python and vice versa, mainly to create Python bindings of +existing C++ code. Its goals and syntax are similar to the excellent +Boost.Python by David Abrahams: to minimize boilerplate code in traditional +extension modules by inferring type information using compile-time +introspection. + +The main issue with Boost.Python-and the reason for creating such a similar +project-is Boost. Boost is an enormously large and complex suite of utility +libraries that works with almost every C++ compiler in existence. This +compatibility has its cost: arcane template tricks and workarounds are +necessary to support the oldest and buggiest of compiler specimens. Now that +C++11-compatible compilers are widely available, this heavy machinery has +become an excessively large and unnecessary dependency. + +Think of this library as a tiny self-contained version of Boost.Python with +everything stripped away that isn't relevant for binding generation. Without +comments, the core header files only require ~4K lines of code and depend on +Python (2.7 or 3.x, or PyPy2.7 >= 5.7) and the C++ standard library. This +compact implementation was possible thanks to some of the new C++11 language +features (specifically: tuples, lambda functions and variadic templates). Since +its creation, this library has grown beyond Boost.Python in many ways, leading +to dramatically simpler binding code in many common situations.""") diff --git a/thirdparty/pybind11/tools/FindCatch.cmake b/thirdparty/pybind11/tools/FindCatch.cmake new file mode 100644 index 000000000..9d490c5aa --- /dev/null +++ b/thirdparty/pybind11/tools/FindCatch.cmake @@ -0,0 +1,57 @@ +# - Find the Catch test framework or download it (single header) +# +# This is a quick module for internal use. It assumes that Catch is +# REQUIRED and that a minimum version is provided (not EXACT). If +# a suitable version isn't found locally, the single header file +# will be downloaded and placed in the build dir: PROJECT_BINARY_DIR. +# +# This code sets the following variables: +# CATCH_INCLUDE_DIR - path to catch.hpp +# CATCH_VERSION - version number + +if(NOT Catch_FIND_VERSION) + message(FATAL_ERROR "A version number must be specified.") +elseif(Catch_FIND_REQUIRED) + message(FATAL_ERROR "This module assumes Catch is not required.") +elseif(Catch_FIND_VERSION_EXACT) + message(FATAL_ERROR "Exact version numbers are not supported, only minimum.") +endif() + +# Extract the version number from catch.hpp +function(_get_catch_version) + file(STRINGS "${CATCH_INCLUDE_DIR}/catch.hpp" version_line REGEX "Catch v.*" LIMIT_COUNT 1) + if(version_line MATCHES "Catch v([0-9]+)\\.([0-9]+)\\.([0-9]+)") + set(CATCH_VERSION "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" PARENT_SCOPE) + endif() +endfunction() + +# Download the single-header version of Catch +function(_download_catch version destination_dir) + message(STATUS "Downloading catch v${version}...") + set(url https://github.com/philsquared/Catch/releases/download/v${version}/catch.hpp) + file(DOWNLOAD ${url} "${destination_dir}/catch.hpp" STATUS status) + list(GET status 0 error) + if(error) + message(FATAL_ERROR "Could not download ${url}") + endif() + set(CATCH_INCLUDE_DIR "${destination_dir}" CACHE INTERNAL "") +endfunction() + +# Look for catch locally +find_path(CATCH_INCLUDE_DIR NAMES catch.hpp PATH_SUFFIXES catch) +if(CATCH_INCLUDE_DIR) + _get_catch_version() +endif() + +# Download the header if it wasn't found or if it's outdated +if(NOT CATCH_VERSION OR CATCH_VERSION VERSION_LESS ${Catch_FIND_VERSION}) + if(DOWNLOAD_CATCH) + _download_catch(${Catch_FIND_VERSION} "${PROJECT_BINARY_DIR}/catch/") + _get_catch_version() + else() + set(CATCH_FOUND FALSE) + return() + endif() +endif() + +set(CATCH_FOUND TRUE) diff --git a/thirdparty/pybind11/tools/FindEigen3.cmake b/thirdparty/pybind11/tools/FindEigen3.cmake new file mode 100644 index 000000000..9c546a05d --- /dev/null +++ b/thirdparty/pybind11/tools/FindEigen3.cmake @@ -0,0 +1,81 @@ +# - Try to find Eigen3 lib +# +# This module supports requiring a minimum version, e.g. you can do +# find_package(Eigen3 3.1.2) +# to require version 3.1.2 or newer of Eigen3. +# +# Once done this will define +# +# EIGEN3_FOUND - system has eigen lib with correct version +# EIGEN3_INCLUDE_DIR - the eigen include directory +# EIGEN3_VERSION - eigen version + +# Copyright (c) 2006, 2007 Montel Laurent, +# Copyright (c) 2008, 2009 Gael Guennebaud, +# Copyright (c) 2009 Benoit Jacob +# Redistribution and use is allowed according to the terms of the 2-clause BSD license. + +if(NOT Eigen3_FIND_VERSION) + if(NOT Eigen3_FIND_VERSION_MAJOR) + set(Eigen3_FIND_VERSION_MAJOR 2) + endif(NOT Eigen3_FIND_VERSION_MAJOR) + if(NOT Eigen3_FIND_VERSION_MINOR) + set(Eigen3_FIND_VERSION_MINOR 91) + endif(NOT Eigen3_FIND_VERSION_MINOR) + if(NOT Eigen3_FIND_VERSION_PATCH) + set(Eigen3_FIND_VERSION_PATCH 0) + endif(NOT Eigen3_FIND_VERSION_PATCH) + + set(Eigen3_FIND_VERSION "${Eigen3_FIND_VERSION_MAJOR}.${Eigen3_FIND_VERSION_MINOR}.${Eigen3_FIND_VERSION_PATCH}") +endif(NOT Eigen3_FIND_VERSION) + +macro(_eigen3_check_version) + file(READ "${EIGEN3_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen3_version_header) + + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen3_world_version_match "${_eigen3_version_header}") + set(EIGEN3_WORLD_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen3_major_version_match "${_eigen3_version_header}") + set(EIGEN3_MAJOR_VERSION "${CMAKE_MATCH_1}") + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen3_minor_version_match "${_eigen3_version_header}") + set(EIGEN3_MINOR_VERSION "${CMAKE_MATCH_1}") + + set(EIGEN3_VERSION ${EIGEN3_WORLD_VERSION}.${EIGEN3_MAJOR_VERSION}.${EIGEN3_MINOR_VERSION}) + if(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK FALSE) + else(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + set(EIGEN3_VERSION_OK TRUE) + endif(${EIGEN3_VERSION} VERSION_LESS ${Eigen3_FIND_VERSION}) + + if(NOT EIGEN3_VERSION_OK) + + message(STATUS "Eigen3 version ${EIGEN3_VERSION} found in ${EIGEN3_INCLUDE_DIR}, " + "but at least version ${Eigen3_FIND_VERSION} is required") + endif(NOT EIGEN3_VERSION_OK) +endmacro(_eigen3_check_version) + +if (EIGEN3_INCLUDE_DIR) + + # in cache already + _eigen3_check_version() + set(EIGEN3_FOUND ${EIGEN3_VERSION_OK}) + +else (EIGEN3_INCLUDE_DIR) + + find_path(EIGEN3_INCLUDE_DIR NAMES signature_of_eigen3_matrix_library + PATHS + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES eigen3 eigen + ) + + if(EIGEN3_INCLUDE_DIR) + _eigen3_check_version() + endif(EIGEN3_INCLUDE_DIR) + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Eigen3 DEFAULT_MSG EIGEN3_INCLUDE_DIR EIGEN3_VERSION_OK) + + mark_as_advanced(EIGEN3_INCLUDE_DIR) + +endif(EIGEN3_INCLUDE_DIR) + diff --git a/thirdparty/pybind11/tools/FindPythonLibsNew.cmake b/thirdparty/pybind11/tools/FindPythonLibsNew.cmake new file mode 100644 index 000000000..9ea6036e3 --- /dev/null +++ b/thirdparty/pybind11/tools/FindPythonLibsNew.cmake @@ -0,0 +1,202 @@ +# - Find python libraries +# This module finds the libraries corresponding to the Python interpreter +# FindPythonInterp provides. +# This code sets the following variables: +# +# PYTHONLIBS_FOUND - have the Python libs been found +# PYTHON_PREFIX - path to the Python installation +# PYTHON_LIBRARIES - path to the python library +# PYTHON_INCLUDE_DIRS - path to where Python.h is found +# PYTHON_MODULE_EXTENSION - lib extension, e.g. '.so' or '.pyd' +# PYTHON_MODULE_PREFIX - lib name prefix: usually an empty string +# PYTHON_SITE_PACKAGES - path to installation site-packages +# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build +# +# Thanks to talljimbo for the patch adding the 'LDVERSION' config +# variable usage. + +#============================================================================= +# Copyright 2001-2009 Kitware, Inc. +# Copyright 2012 Continuum Analytics, Inc. +# +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the names of Kitware, Inc., the Insight Software Consortium, +# nor the names of their contributors may be used to endorse or promote +# products derived from this software without specific prior written +# permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#============================================================================= + +# Checking for the extension makes sure that `LibsNew` was found and not just `Libs`. +if(PYTHONLIBS_FOUND AND PYTHON_MODULE_EXTENSION) + return() +endif() + +# Use the Python interpreter to find the libs. +if(PythonLibsNew_FIND_REQUIRED) + find_package(PythonInterp ${PythonLibsNew_FIND_VERSION} REQUIRED) +else() + find_package(PythonInterp ${PythonLibsNew_FIND_VERSION}) +endif() + +if(NOT PYTHONINTERP_FOUND) + set(PYTHONLIBS_FOUND FALSE) + set(PythonLibsNew_FOUND FALSE) + return() +endif() + +# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter +# testing whether sys has the gettotalrefcount function is a reliable, cross-platform +# way to detect a CPython debug interpreter. +# +# The library suffix is from the config var LDVERSION sometimes, otherwise +# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows. +execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "from distutils import sysconfig as s;import sys;import struct; +print('.'.join(str(v) for v in sys.version_info)); +print(sys.prefix); +print(s.get_python_inc(plat_specific=True)); +print(s.get_python_lib(plat_specific=True)); +print(s.get_config_var('SO')); +print(hasattr(sys, 'gettotalrefcount')+0); +print(struct.calcsize('@P')); +print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); +print(s.get_config_var('LIBDIR') or ''); +print(s.get_config_var('MULTIARCH') or ''); +" + RESULT_VARIABLE _PYTHON_SUCCESS + OUTPUT_VARIABLE _PYTHON_VALUES + ERROR_VARIABLE _PYTHON_ERROR_VALUE) + +if(NOT _PYTHON_SUCCESS MATCHES 0) + if(PythonLibsNew_FIND_REQUIRED) + message(FATAL_ERROR + "Python config failure:\n${_PYTHON_ERROR_VALUE}") + endif() + set(PYTHONLIBS_FOUND FALSE) + set(PythonLibsNew_FOUND FALSE) + return() +endif() + +# Convert the process output into a list +if(WIN32) + string(REGEX REPLACE "\\\\" "/" _PYTHON_VALUES ${_PYTHON_VALUES}) +endif() +string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) +string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) +list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST) +list(GET _PYTHON_VALUES 1 PYTHON_PREFIX) +list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR) +list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES) +list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION) +list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG) +list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P) +list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX) +list(GET _PYTHON_VALUES 8 PYTHON_LIBDIR) +list(GET _PYTHON_VALUES 9 PYTHON_MULTIARCH) + +# Make sure the Python has the same pointer-size as the chosen compiler +# Skip if CMAKE_SIZEOF_VOID_P is not defined +if(CMAKE_SIZEOF_VOID_P AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}")) + if(PythonLibsNew_FIND_REQUIRED) + math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8") + math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8") + message(FATAL_ERROR + "Python config failure: Python is ${_PYTHON_BITS}-bit, " + "chosen compiler is ${_CMAKE_BITS}-bit") + endif() + set(PYTHONLIBS_FOUND FALSE) + set(PythonLibsNew_FOUND FALSE) + return() +endif() + +# The built-in FindPython didn't always give the version numbers +string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST}) +list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR) +list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR) +list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH) + +# Make sure all directory separators are '/' +string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX "${PYTHON_PREFIX}") +string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR "${PYTHON_INCLUDE_DIR}") +string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES "${PYTHON_SITE_PACKAGES}") + +if(CMAKE_HOST_WIN32 AND NOT (MINGW AND DEFINED ENV{MSYSTEM})) + set(PYTHON_LIBRARY + "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + + # when run in a venv, PYTHON_PREFIX points to it. But the libraries remain in the + # original python installation. They may be found relative to PYTHON_INCLUDE_DIR. + if(NOT EXISTS "${PYTHON_LIBRARY}") + get_filename_component(_PYTHON_ROOT ${PYTHON_INCLUDE_DIR} DIRECTORY) + set(PYTHON_LIBRARY + "${_PYTHON_ROOT}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") + endif() + + # raise an error if the python libs are still not found. + if(NOT EXISTS "${PYTHON_LIBRARY}") + message(FATAL_ERROR "Python libraries not found") + endif() + +else() + if(PYTHON_MULTIARCH) + set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}/${PYTHON_MULTIARCH}" "${PYTHON_LIBDIR}") + else() + set(_PYTHON_LIBS_SEARCH "${PYTHON_LIBDIR}") + endif() + #message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}") + # Probably this needs to be more involved. It would be nice if the config + # information the python interpreter itself gave us were more complete. + find_library(PYTHON_LIBRARY + NAMES "python${PYTHON_LIBRARY_SUFFIX}" + PATHS ${_PYTHON_LIBS_SEARCH} + NO_DEFAULT_PATH) + + # If all else fails, just set the name/version and let the linker figure out the path. + if(NOT PYTHON_LIBRARY) + set(PYTHON_LIBRARY python${PYTHON_LIBRARY_SUFFIX}) + endif() +endif() + +MARK_AS_ADVANCED( + PYTHON_LIBRARY + PYTHON_INCLUDE_DIR +) + +# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the +# cache entries because they are meant to specify the location of a single +# library. We now set the variables listed by the documentation for this +# module. +SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}") +SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") +SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}") + +find_package_message(PYTHON + "Found PythonLibs: ${PYTHON_LIBRARY}" + "${PYTHON_EXECUTABLE}${PYTHON_VERSION}") + +set(PYTHONLIBS_FOUND TRUE) +set(PythonLibsNew_FOUND TRUE) diff --git a/thirdparty/pybind11/tools/check-style.sh b/thirdparty/pybind11/tools/check-style.sh new file mode 100644 index 000000000..0a9f7d24f --- /dev/null +++ b/thirdparty/pybind11/tools/check-style.sh @@ -0,0 +1,70 @@ +#!/bin/bash +# +# Script to check include/test code for common pybind11 code style errors. +# +# This script currently checks for +# +# 1. use of tabs instead of spaces +# 2. MSDOS-style CRLF endings +# 3. trailing spaces +# 4. missing space between keyword and parenthesis, e.g.: for(, if(, while( +# 5. Missing space between right parenthesis and brace, e.g. 'for (...){' +# 6. opening brace on its own line. It should always be on the same line as the +# if/while/for/do statement. +# +# Invoke as: tools/check-style.sh +# + +check_style_errors=0 +IFS=$'\n' + +found="$( GREP_COLORS='mt=41' GREP_COLOR='41' grep $'\t' include tests/*.{cpp,py,h} docs/*.rst -rn --color=always )" +if [ -n "$found" ]; then + # The mt=41 sets a red background for matched tabs: + echo -e '\033[31;01mError: found tab characters in the following files:\033[0m' + check_style_errors=1 + echo "$found" | sed -e 's/^/ /' +fi + + +found="$( grep -IUlr $'\r' include tests/*.{cpp,py,h} docs/*.rst --color=always )" +if [ -n "$found" ]; then + echo -e '\033[31;01mError: found CRLF characters in the following files:\033[0m' + check_style_errors=1 + echo "$found" | sed -e 's/^/ /' +fi + +found="$(GREP_COLORS='mt=41' GREP_COLOR='41' grep '[[:blank:]]\+$' include tests/*.{cpp,py,h} docs/*.rst -rn --color=always )" +if [ -n "$found" ]; then + # The mt=41 sets a red background for matched trailing spaces + echo -e '\033[31;01mError: found trailing spaces in the following files:\033[0m' + check_style_errors=1 + echo "$found" | sed -e 's/^/ /' +fi + +found="$(grep '\<\(if\|for\|while\|catch\)(\|){' include tests/*.{cpp,h} -rn --color=always)" +if [ -n "$found" ]; then + echo -e '\033[31;01mError: found the following coding style problems:\033[0m' + check_style_errors=1 + echo "$found" | sed -e 's/^/ /' +fi + +found="$(awk ' +function prefix(filename, lineno) { + return " \033[35m" filename "\033[36m:\033[32m" lineno "\033[36m:\033[0m" +} +function mark(pattern, string) { sub(pattern, "\033[01;31m&\033[0m", string); return string } +last && /^\s*{/ { + print prefix(FILENAME, FNR-1) mark("\\)\\s*$", last) + print prefix(FILENAME, FNR) mark("^\\s*{", $0) + last="" +} +{ last = /(if|for|while|catch|switch)\s*\(.*\)\s*$/ ? $0 : "" } +' $(find include -type f) tests/*.{cpp,h} docs/*.rst)" +if [ -n "$found" ]; then + check_style_errors=1 + echo -e '\033[31;01mError: braces should occur on the same line as the if/while/.. statement. Found issues in the following files:\033[0m' + echo "$found" +fi + +exit $check_style_errors diff --git a/thirdparty/pybind11/tools/clang/.gitignore b/thirdparty/pybind11/tools/clang/.gitignore new file mode 100644 index 000000000..8819bdaf3 --- /dev/null +++ b/thirdparty/pybind11/tools/clang/.gitignore @@ -0,0 +1,4 @@ +*.swp +*.swo +*.pyc +__pycache__ diff --git a/thirdparty/pybind11/tools/clang/LICENSE.TXT b/thirdparty/pybind11/tools/clang/LICENSE.TXT new file mode 100644 index 000000000..6c224f84c --- /dev/null +++ b/thirdparty/pybind11/tools/clang/LICENSE.TXT @@ -0,0 +1,63 @@ +============================================================================== +LLVM Release License +============================================================================== +University of Illinois/NCSA +Open Source License + +Copyright (c) 2007-2012 University of Illinois at Urbana-Champaign. +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== +The LLVM software contains code written by third parties. Such software will +have its own individual LICENSE.TXT file in the directory in which it appears. +This file will describe the copyrights, license, and restrictions which apply +to that code. + +The disclaimer of warranty in the University of Illinois Open Source License +applies to all code in the LLVM Distribution, and nothing in any of the +other licenses gives permission to use the names of the LLVM Team or the +University of Illinois to endorse or promote products derived from this +Software. + +The following pieces of software have additional or alternate copyrights, +licenses, and/or restrictions: + +Program Directory +------- --------- + + diff --git a/thirdparty/pybind11/tools/clang/README.md b/thirdparty/pybind11/tools/clang/README.md new file mode 100644 index 000000000..efb892166 --- /dev/null +++ b/thirdparty/pybind11/tools/clang/README.md @@ -0,0 +1,2 @@ +This is simply clang's Python bindings (clang.cindex) ported to Python 3. Please see http://llvm.org/svn/llvm-project/cfe/trunk/bindings/python/ for the original project. + diff --git a/thirdparty/pybind11/tools/clang/__init__.py b/thirdparty/pybind11/tools/clang/__init__.py new file mode 100644 index 000000000..88f308123 --- /dev/null +++ b/thirdparty/pybind11/tools/clang/__init__.py @@ -0,0 +1,24 @@ +#===- __init__.py - Clang Python Bindings --------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +Clang Library Bindings +====================== + +This package provides access to the Clang compiler and libraries. + +The available modules are: + + cindex + + Bindings for the Clang indexing library. +""" + +__all__ = ['cindex'] + diff --git a/thirdparty/pybind11/tools/clang/cindex.py b/thirdparty/pybind11/tools/clang/cindex.py new file mode 100644 index 000000000..3a083de0d --- /dev/null +++ b/thirdparty/pybind11/tools/clang/cindex.py @@ -0,0 +1,3884 @@ +#===- cindex.py - Python Indexing Library Bindings -----------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +r""" +Clang Indexing Library Bindings +=============================== + +This module provides an interface to the Clang indexing library. It is a +low-level interface to the indexing library which attempts to match the Clang +API directly while also being "pythonic". Notable differences from the C API +are: + + * string results are returned as Python strings, not CXString objects. + + * null cursors are translated to None. + + * access to child cursors is done via iteration, not visitation. + +The major indexing objects are: + + Index + + The top-level object which manages some global library state. + + TranslationUnit + + High-level object encapsulating the AST for a single translation unit. These + can be loaded from .ast files or parsed on the fly. + + Cursor + + Generic object for representing a node in the AST. + + SourceRange, SourceLocation, and File + + Objects representing information about the input source. + +Most object information is exposed using properties, when the underlying API +call is efficient. +""" + +# TODO +# ==== +# +# o API support for invalid translation units. Currently we can't even get the +# diagnostics on failure because they refer to locations in an object that +# will have been invalidated. +# +# o fix memory management issues (currently client must hold on to index and +# translation unit, or risk crashes). +# +# o expose code completion APIs. +# +# o cleanup ctypes wrapping, would be nice to separate the ctypes details more +# clearly, and hide from the external interface (i.e., help(cindex)). +# +# o implement additional SourceLocation, SourceRange, and File methods. + +from ctypes import * +import collections + +import clang.enumerations + +# ctypes doesn't implicitly convert c_void_p to the appropriate wrapper +# object. This is a problem, because it means that from_parameter will see an +# integer and pass the wrong value on platforms where int != void*. Work around +# this by marshalling object arguments as void**. +c_object_p = POINTER(c_void_p) + +callbacks = {} + +### Exception Classes ### + +class TranslationUnitLoadError(Exception): + """Represents an error that occurred when loading a TranslationUnit. + + This is raised in the case where a TranslationUnit could not be + instantiated due to failure in the libclang library. + + FIXME: Make libclang expose additional error information in this scenario. + """ + pass + +class TranslationUnitSaveError(Exception): + """Represents an error that occurred when saving a TranslationUnit. + + Each error has associated with it an enumerated value, accessible under + e.save_error. Consumers can compare the value with one of the ERROR_ + constants in this class. + """ + + # Indicates that an unknown error occurred. This typically indicates that + # I/O failed during save. + ERROR_UNKNOWN = 1 + + # Indicates that errors during translation prevented saving. The errors + # should be available via the TranslationUnit's diagnostics. + ERROR_TRANSLATION_ERRORS = 2 + + # Indicates that the translation unit was somehow invalid. + ERROR_INVALID_TU = 3 + + def __init__(self, enumeration, message): + assert isinstance(enumeration, int) + + if enumeration < 1 or enumeration > 3: + raise Exception("Encountered undefined TranslationUnit save error " + "constant: %d. Please file a bug to have this " + "value supported." % enumeration) + + self.save_error = enumeration + Exception.__init__(self, 'Error %d: %s' % (enumeration, message)) + +### Structures and Utility Classes ### + +class CachedProperty(object): + """Decorator that lazy-loads the value of a property. + + The first time the property is accessed, the original property function is + executed. The value it returns is set as the new value of that instance's + property, replacing the original method. + """ + + def __init__(self, wrapped): + self.wrapped = wrapped + try: + self.__doc__ = wrapped.__doc__ + except: + pass + + def __get__(self, instance, instance_type=None): + if instance is None: + return self + + value = self.wrapped(instance) + setattr(instance, self.wrapped.__name__, value) + + return value + + +class _CXString(Structure): + """Helper for transforming CXString results.""" + + _fields_ = [("spelling", c_char_p), ("free", c_int)] + + def __del__(self): + conf.lib.clang_disposeString(self) + + @staticmethod + def from_result(res, fn, args): + assert isinstance(res, _CXString) + return conf.lib.clang_getCString(res) + +class SourceLocation(Structure): + """ + A SourceLocation represents a particular location within a source file. + """ + _fields_ = [("ptr_data", c_void_p * 2), ("int_data", c_uint)] + _data = None + + def _get_instantiation(self): + if self._data is None: + f, l, c, o = c_object_p(), c_uint(), c_uint(), c_uint() + conf.lib.clang_getInstantiationLocation(self, byref(f), byref(l), + byref(c), byref(o)) + if f: + f = File(f) + else: + f = None + self._data = (f, int(l.value), int(c.value), int(o.value)) + return self._data + + @staticmethod + def from_position(tu, file, line, column): + """ + Retrieve the source location associated with a given file/line/column in + a particular translation unit. + """ + return conf.lib.clang_getLocation(tu, file, line, column) + + @staticmethod + def from_offset(tu, file, offset): + """Retrieve a SourceLocation from a given character offset. + + tu -- TranslationUnit file belongs to + file -- File instance to obtain offset from + offset -- Integer character offset within file + """ + return conf.lib.clang_getLocationForOffset(tu, file, offset) + + @property + def file(self): + """Get the file represented by this source location.""" + return self._get_instantiation()[0] + + @property + def line(self): + """Get the line represented by this source location.""" + return self._get_instantiation()[1] + + @property + def column(self): + """Get the column represented by this source location.""" + return self._get_instantiation()[2] + + @property + def offset(self): + """Get the file offset represented by this source location.""" + return self._get_instantiation()[3] + + def __eq__(self, other): + return conf.lib.clang_equalLocations(self, other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __repr__(self): + if self.file: + filename = self.file.name + else: + filename = None + return "" % ( + filename, self.line, self.column) + +class SourceRange(Structure): + """ + A SourceRange describes a range of source locations within the source + code. + """ + _fields_ = [ + ("ptr_data", c_void_p * 2), + ("begin_int_data", c_uint), + ("end_int_data", c_uint)] + + # FIXME: Eliminate this and make normal constructor? Requires hiding ctypes + # object. + @staticmethod + def from_locations(start, end): + return conf.lib.clang_getRange(start, end) + + @property + def start(self): + """ + Return a SourceLocation representing the first character within a + source range. + """ + return conf.lib.clang_getRangeStart(self) + + @property + def end(self): + """ + Return a SourceLocation representing the last character within a + source range. + """ + return conf.lib.clang_getRangeEnd(self) + + def __eq__(self, other): + return conf.lib.clang_equalRanges(self, other) + + def __ne__(self, other): + return not self.__eq__(other) + + def __contains__(self, other): + """Useful to detect the Token/Lexer bug""" + if not isinstance(other, SourceLocation): + return False + if other.file is None and self.start.file is None: + pass + elif ( self.start.file.name != other.file.name or + other.file.name != self.end.file.name): + # same file name + return False + # same file, in between lines + if self.start.line < other.line < self.end.line: + return True + elif self.start.line == other.line: + # same file first line + if self.start.column <= other.column: + return True + elif other.line == self.end.line: + # same file last line + if other.column <= self.end.column: + return True + return False + + def __repr__(self): + return "" % (self.start, self.end) + +class Diagnostic(object): + """ + A Diagnostic is a single instance of a Clang diagnostic. It includes the + diagnostic severity, the message, the location the diagnostic occurred, as + well as additional source ranges and associated fix-it hints. + """ + + Ignored = 0 + Note = 1 + Warning = 2 + Error = 3 + Fatal = 4 + + def __init__(self, ptr): + self.ptr = ptr + + def __del__(self): + conf.lib.clang_disposeDiagnostic(self) + + @property + def severity(self): + return conf.lib.clang_getDiagnosticSeverity(self) + + @property + def location(self): + return conf.lib.clang_getDiagnosticLocation(self) + + @property + def spelling(self): + return conf.lib.clang_getDiagnosticSpelling(self) + + @property + def ranges(self): + class RangeIterator: + def __init__(self, diag): + self.diag = diag + + def __len__(self): + return int(conf.lib.clang_getDiagnosticNumRanges(self.diag)) + + def __getitem__(self, key): + if (key >= len(self)): + raise IndexError + return conf.lib.clang_getDiagnosticRange(self.diag, key) + + return RangeIterator(self) + + @property + def fixits(self): + class FixItIterator: + def __init__(self, diag): + self.diag = diag + + def __len__(self): + return int(conf.lib.clang_getDiagnosticNumFixIts(self.diag)) + + def __getitem__(self, key): + range = SourceRange() + value = conf.lib.clang_getDiagnosticFixIt(self.diag, key, + byref(range)) + if len(value) == 0: + raise IndexError + + return FixIt(range, value) + + return FixItIterator(self) + + @property + def children(self): + class ChildDiagnosticsIterator: + def __init__(self, diag): + self.diag_set = conf.lib.clang_getChildDiagnostics(diag) + + def __len__(self): + return int(conf.lib.clang_getNumDiagnosticsInSet(self.diag_set)) + + def __getitem__(self, key): + diag = conf.lib.clang_getDiagnosticInSet(self.diag_set, key) + if not diag: + raise IndexError + return Diagnostic(diag) + + return ChildDiagnosticsIterator(self) + + @property + def category_number(self): + """The category number for this diagnostic or 0 if unavailable.""" + return conf.lib.clang_getDiagnosticCategory(self) + + @property + def category_name(self): + """The string name of the category for this diagnostic.""" + return conf.lib.clang_getDiagnosticCategoryText(self) + + @property + def option(self): + """The command-line option that enables this diagnostic.""" + return conf.lib.clang_getDiagnosticOption(self, None) + + @property + def disable_option(self): + """The command-line option that disables this diagnostic.""" + disable = _CXString() + conf.lib.clang_getDiagnosticOption(self, byref(disable)) + + return conf.lib.clang_getCString(disable) + + def __repr__(self): + return "" % ( + self.severity, self.location, self.spelling) + + def from_param(self): + return self.ptr + +class FixIt(object): + """ + A FixIt represents a transformation to be applied to the source to + "fix-it". The fix-it shouldbe applied by replacing the given source range + with the given value. + """ + + def __init__(self, range, value): + self.range = range + self.value = value + + def __repr__(self): + return "" % (self.range, self.value) + +class TokenGroup(object): + """Helper class to facilitate token management. + + Tokens are allocated from libclang in chunks. They must be disposed of as a + collective group. + + One purpose of this class is for instances to represent groups of allocated + tokens. Each token in a group contains a reference back to an instance of + this class. When all tokens from a group are garbage collected, it allows + this class to be garbage collected. When this class is garbage collected, + it calls the libclang destructor which invalidates all tokens in the group. + + You should not instantiate this class outside of this module. + """ + def __init__(self, tu, memory, count): + self._tu = tu + self._memory = memory + self._count = count + + def __del__(self): + conf.lib.clang_disposeTokens(self._tu, self._memory, self._count) + + @staticmethod + def get_tokens(tu, extent): + """Helper method to return all tokens in an extent. + + This functionality is needed multiple places in this module. We define + it here because it seems like a logical place. + """ + tokens_memory = POINTER(Token)() + tokens_count = c_uint() + + conf.lib.clang_tokenize(tu, extent, byref(tokens_memory), + byref(tokens_count)) + + count = int(tokens_count.value) + + # If we get no tokens, no memory was allocated. Be sure not to return + # anything and potentially call a destructor on nothing. + if count < 1: + return + + tokens_array = cast(tokens_memory, POINTER(Token * count)).contents + + token_group = TokenGroup(tu, tokens_memory, tokens_count) + + for i in range(0, count): + token = Token() + token.int_data = tokens_array[i].int_data + token.ptr_data = tokens_array[i].ptr_data + token._tu = tu + token._group = token_group + + yield token + +class TokenKind(object): + """Describes a specific type of a Token.""" + + _value_map = {} # int -> TokenKind + + def __init__(self, value, name): + """Create a new TokenKind instance from a numeric value and a name.""" + self.value = value + self.name = name + + def __repr__(self): + return 'TokenKind.%s' % (self.name,) + + @staticmethod + def from_value(value): + """Obtain a registered TokenKind instance from its value.""" + result = TokenKind._value_map.get(value, None) + + if result is None: + raise ValueError('Unknown TokenKind: %d' % value) + + return result + + @staticmethod + def register(value, name): + """Register a new TokenKind enumeration. + + This should only be called at module load time by code within this + package. + """ + if value in TokenKind._value_map: + raise ValueError('TokenKind already registered: %d' % value) + + kind = TokenKind(value, name) + TokenKind._value_map[value] = kind + setattr(TokenKind, name, kind) + +### Cursor Kinds ### +class BaseEnumeration(object): + """ + Common base class for named enumerations held in sync with Index.h values. + + Subclasses must define their own _kinds and _name_map members, as: + _kinds = [] + _name_map = None + These values hold the per-subclass instances and value-to-name mappings, + respectively. + + """ + + def __init__(self, value): + if value >= len(self.__class__._kinds): + self.__class__._kinds += [None] * (value - len(self.__class__._kinds) + 1) + if self.__class__._kinds[value] is not None: + raise ValueError('{0} value {1} already loaded'.format( + str(self.__class__), value)) + self.value = value + self.__class__._kinds[value] = self + self.__class__._name_map = None + + + def from_param(self): + return self.value + + @property + def name(self): + """Get the enumeration name of this cursor kind.""" + if self._name_map is None: + self._name_map = {} + for key, value in list(self.__class__.__dict__.items()): + if isinstance(value, self.__class__): + self._name_map[value] = key + return self._name_map[self] + + @classmethod + def from_id(cls, id): + if id >= len(cls._kinds) or cls._kinds[id] is None: + raise ValueError('Unknown template argument kind %d' % id) + return cls._kinds[id] + + def __repr__(self): + return '%s.%s' % (self.__class__, self.name,) + + +class CursorKind(BaseEnumeration): + """ + A CursorKind describes the kind of entity that a cursor points to. + """ + + # The required BaseEnumeration declarations. + _kinds = [] + _name_map = None + + @staticmethod + def get_all_kinds(): + """Return all CursorKind enumeration instances.""" + return [_f for _f in CursorKind._kinds if _f] + + def is_declaration(self): + """Test if this is a declaration kind.""" + return conf.lib.clang_isDeclaration(self) + + def is_reference(self): + """Test if this is a reference kind.""" + return conf.lib.clang_isReference(self) + + def is_expression(self): + """Test if this is an expression kind.""" + return conf.lib.clang_isExpression(self) + + def is_statement(self): + """Test if this is a statement kind.""" + return conf.lib.clang_isStatement(self) + + def is_attribute(self): + """Test if this is an attribute kind.""" + return conf.lib.clang_isAttribute(self) + + def is_invalid(self): + """Test if this is an invalid kind.""" + return conf.lib.clang_isInvalid(self) + + def is_translation_unit(self): + """Test if this is a translation unit kind.""" + return conf.lib.clang_isTranslationUnit(self) + + def is_preprocessing(self): + """Test if this is a preprocessing kind.""" + return conf.lib.clang_isPreprocessing(self) + + def is_unexposed(self): + """Test if this is an unexposed kind.""" + return conf.lib.clang_isUnexposed(self) + + def __repr__(self): + return 'CursorKind.%s' % (self.name,) + +### +# Declaration Kinds + +# A declaration whose specific kind is not exposed via this interface. +# +# Unexposed declarations have the same operations as any other kind of +# declaration; one can extract their location information, spelling, find their +# definitions, etc. However, the specific kind of the declaration is not +# reported. +CursorKind.UNEXPOSED_DECL = CursorKind(1) + +# A C or C++ struct. +CursorKind.STRUCT_DECL = CursorKind(2) + +# A C or C++ union. +CursorKind.UNION_DECL = CursorKind(3) + +# A C++ class. +CursorKind.CLASS_DECL = CursorKind(4) + +# An enumeration. +CursorKind.ENUM_DECL = CursorKind(5) + +# A field (in C) or non-static data member (in C++) in a struct, union, or C++ +# class. +CursorKind.FIELD_DECL = CursorKind(6) + +# An enumerator constant. +CursorKind.ENUM_CONSTANT_DECL = CursorKind(7) + +# A function. +CursorKind.FUNCTION_DECL = CursorKind(8) + +# A variable. +CursorKind.VAR_DECL = CursorKind(9) + +# A function or method parameter. +CursorKind.PARM_DECL = CursorKind(10) + +# An Objective-C @interface. +CursorKind.OBJC_INTERFACE_DECL = CursorKind(11) + +# An Objective-C @interface for a category. +CursorKind.OBJC_CATEGORY_DECL = CursorKind(12) + +# An Objective-C @protocol declaration. +CursorKind.OBJC_PROTOCOL_DECL = CursorKind(13) + +# An Objective-C @property declaration. +CursorKind.OBJC_PROPERTY_DECL = CursorKind(14) + +# An Objective-C instance variable. +CursorKind.OBJC_IVAR_DECL = CursorKind(15) + +# An Objective-C instance method. +CursorKind.OBJC_INSTANCE_METHOD_DECL = CursorKind(16) + +# An Objective-C class method. +CursorKind.OBJC_CLASS_METHOD_DECL = CursorKind(17) + +# An Objective-C @implementation. +CursorKind.OBJC_IMPLEMENTATION_DECL = CursorKind(18) + +# An Objective-C @implementation for a category. +CursorKind.OBJC_CATEGORY_IMPL_DECL = CursorKind(19) + +# A typedef. +CursorKind.TYPEDEF_DECL = CursorKind(20) + +# A C++ class method. +CursorKind.CXX_METHOD = CursorKind(21) + +# A C++ namespace. +CursorKind.NAMESPACE = CursorKind(22) + +# A linkage specification, e.g. 'extern "C"'. +CursorKind.LINKAGE_SPEC = CursorKind(23) + +# A C++ constructor. +CursorKind.CONSTRUCTOR = CursorKind(24) + +# A C++ destructor. +CursorKind.DESTRUCTOR = CursorKind(25) + +# A C++ conversion function. +CursorKind.CONVERSION_FUNCTION = CursorKind(26) + +# A C++ template type parameter +CursorKind.TEMPLATE_TYPE_PARAMETER = CursorKind(27) + +# A C++ non-type template paramater. +CursorKind.TEMPLATE_NON_TYPE_PARAMETER = CursorKind(28) + +# A C++ template template parameter. +CursorKind.TEMPLATE_TEMPLATE_PARAMETER = CursorKind(29) + +# A C++ function template. +CursorKind.FUNCTION_TEMPLATE = CursorKind(30) + +# A C++ class template. +CursorKind.CLASS_TEMPLATE = CursorKind(31) + +# A C++ class template partial specialization. +CursorKind.CLASS_TEMPLATE_PARTIAL_SPECIALIZATION = CursorKind(32) + +# A C++ namespace alias declaration. +CursorKind.NAMESPACE_ALIAS = CursorKind(33) + +# A C++ using directive +CursorKind.USING_DIRECTIVE = CursorKind(34) + +# A C++ using declaration +CursorKind.USING_DECLARATION = CursorKind(35) + +# A Type alias decl. +CursorKind.TYPE_ALIAS_DECL = CursorKind(36) + +# A Objective-C synthesize decl +CursorKind.OBJC_SYNTHESIZE_DECL = CursorKind(37) + +# A Objective-C dynamic decl +CursorKind.OBJC_DYNAMIC_DECL = CursorKind(38) + +# A C++ access specifier decl. +CursorKind.CXX_ACCESS_SPEC_DECL = CursorKind(39) + + +### +# Reference Kinds + +CursorKind.OBJC_SUPER_CLASS_REF = CursorKind(40) +CursorKind.OBJC_PROTOCOL_REF = CursorKind(41) +CursorKind.OBJC_CLASS_REF = CursorKind(42) + +# A reference to a type declaration. +# +# A type reference occurs anywhere where a type is named but not +# declared. For example, given: +# typedef unsigned size_type; +# size_type size; +# +# The typedef is a declaration of size_type (CXCursor_TypedefDecl), +# while the type of the variable "size" is referenced. The cursor +# referenced by the type of size is the typedef for size_type. +CursorKind.TYPE_REF = CursorKind(43) +CursorKind.CXX_BASE_SPECIFIER = CursorKind(44) + +# A reference to a class template, function template, template +# template parameter, or class template partial specialization. +CursorKind.TEMPLATE_REF = CursorKind(45) + +# A reference to a namespace or namepsace alias. +CursorKind.NAMESPACE_REF = CursorKind(46) + +# A reference to a member of a struct, union, or class that occurs in +# some non-expression context, e.g., a designated initializer. +CursorKind.MEMBER_REF = CursorKind(47) + +# A reference to a labeled statement. +CursorKind.LABEL_REF = CursorKind(48) + +# A reference to a set of overloaded functions or function templates +# that has not yet been resolved to a specific function or function template. +CursorKind.OVERLOADED_DECL_REF = CursorKind(49) + +# A reference to a variable that occurs in some non-expression +# context, e.g., a C++ lambda capture list. +CursorKind.VARIABLE_REF = CursorKind(50) + +### +# Invalid/Error Kinds + +CursorKind.INVALID_FILE = CursorKind(70) +CursorKind.NO_DECL_FOUND = CursorKind(71) +CursorKind.NOT_IMPLEMENTED = CursorKind(72) +CursorKind.INVALID_CODE = CursorKind(73) + +### +# Expression Kinds + +# An expression whose specific kind is not exposed via this interface. +# +# Unexposed expressions have the same operations as any other kind of +# expression; one can extract their location information, spelling, children, +# etc. However, the specific kind of the expression is not reported. +CursorKind.UNEXPOSED_EXPR = CursorKind(100) + +# An expression that refers to some value declaration, such as a function, +# varible, or enumerator. +CursorKind.DECL_REF_EXPR = CursorKind(101) + +# An expression that refers to a member of a struct, union, class, Objective-C +# class, etc. +CursorKind.MEMBER_REF_EXPR = CursorKind(102) + +# An expression that calls a function. +CursorKind.CALL_EXPR = CursorKind(103) + +# An expression that sends a message to an Objective-C object or class. +CursorKind.OBJC_MESSAGE_EXPR = CursorKind(104) + +# An expression that represents a block literal. +CursorKind.BLOCK_EXPR = CursorKind(105) + +# An integer literal. +CursorKind.INTEGER_LITERAL = CursorKind(106) + +# A floating point number literal. +CursorKind.FLOATING_LITERAL = CursorKind(107) + +# An imaginary number literal. +CursorKind.IMAGINARY_LITERAL = CursorKind(108) + +# A string literal. +CursorKind.STRING_LITERAL = CursorKind(109) + +# A character literal. +CursorKind.CHARACTER_LITERAL = CursorKind(110) + +# A parenthesized expression, e.g. "(1)". +# +# This AST node is only formed if full location information is requested. +CursorKind.PAREN_EXPR = CursorKind(111) + +# This represents the unary-expression's (except sizeof and +# alignof). +CursorKind.UNARY_OPERATOR = CursorKind(112) + +# [C99 6.5.2.1] Array Subscripting. +CursorKind.ARRAY_SUBSCRIPT_EXPR = CursorKind(113) + +# A builtin binary operation expression such as "x + y" or +# "x <= y". +CursorKind.BINARY_OPERATOR = CursorKind(114) + +# Compound assignment such as "+=". +CursorKind.COMPOUND_ASSIGNMENT_OPERATOR = CursorKind(115) + +# The ?: ternary operator. +CursorKind.CONDITIONAL_OPERATOR = CursorKind(116) + +# An explicit cast in C (C99 6.5.4) or a C-style cast in C++ +# (C++ [expr.cast]), which uses the syntax (Type)expr. +# +# For example: (int)f. +CursorKind.CSTYLE_CAST_EXPR = CursorKind(117) + +# [C99 6.5.2.5] +CursorKind.COMPOUND_LITERAL_EXPR = CursorKind(118) + +# Describes an C or C++ initializer list. +CursorKind.INIT_LIST_EXPR = CursorKind(119) + +# The GNU address of label extension, representing &&label. +CursorKind.ADDR_LABEL_EXPR = CursorKind(120) + +# This is the GNU Statement Expression extension: ({int X=4; X;}) +CursorKind.StmtExpr = CursorKind(121) + +# Represents a C11 generic selection. +CursorKind.GENERIC_SELECTION_EXPR = CursorKind(122) + +# Implements the GNU __null extension, which is a name for a null +# pointer constant that has integral type (e.g., int or long) and is the same +# size and alignment as a pointer. +# +# The __null extension is typically only used by system headers, which define +# NULL as __null in C++ rather than using 0 (which is an integer that may not +# match the size of a pointer). +CursorKind.GNU_NULL_EXPR = CursorKind(123) + +# C++'s static_cast<> expression. +CursorKind.CXX_STATIC_CAST_EXPR = CursorKind(124) + +# C++'s dynamic_cast<> expression. +CursorKind.CXX_DYNAMIC_CAST_EXPR = CursorKind(125) + +# C++'s reinterpret_cast<> expression. +CursorKind.CXX_REINTERPRET_CAST_EXPR = CursorKind(126) + +# C++'s const_cast<> expression. +CursorKind.CXX_CONST_CAST_EXPR = CursorKind(127) + +# Represents an explicit C++ type conversion that uses "functional" +# notion (C++ [expr.type.conv]). +# +# Example: +# \code +# x = int(0.5); +# \endcode +CursorKind.CXX_FUNCTIONAL_CAST_EXPR = CursorKind(128) + +# A C++ typeid expression (C++ [expr.typeid]). +CursorKind.CXX_TYPEID_EXPR = CursorKind(129) + +# [C++ 2.13.5] C++ Boolean Literal. +CursorKind.CXX_BOOL_LITERAL_EXPR = CursorKind(130) + +# [C++0x 2.14.7] C++ Pointer Literal. +CursorKind.CXX_NULL_PTR_LITERAL_EXPR = CursorKind(131) + +# Represents the "this" expression in C++ +CursorKind.CXX_THIS_EXPR = CursorKind(132) + +# [C++ 15] C++ Throw Expression. +# +# This handles 'throw' and 'throw' assignment-expression. When +# assignment-expression isn't present, Op will be null. +CursorKind.CXX_THROW_EXPR = CursorKind(133) + +# A new expression for memory allocation and constructor calls, e.g: +# "new CXXNewExpr(foo)". +CursorKind.CXX_NEW_EXPR = CursorKind(134) + +# A delete expression for memory deallocation and destructor calls, +# e.g. "delete[] pArray". +CursorKind.CXX_DELETE_EXPR = CursorKind(135) + +# Represents a unary expression. +CursorKind.CXX_UNARY_EXPR = CursorKind(136) + +# ObjCStringLiteral, used for Objective-C string literals i.e. "foo". +CursorKind.OBJC_STRING_LITERAL = CursorKind(137) + +# ObjCEncodeExpr, used for in Objective-C. +CursorKind.OBJC_ENCODE_EXPR = CursorKind(138) + +# ObjCSelectorExpr used for in Objective-C. +CursorKind.OBJC_SELECTOR_EXPR = CursorKind(139) + +# Objective-C's protocol expression. +CursorKind.OBJC_PROTOCOL_EXPR = CursorKind(140) + +# An Objective-C "bridged" cast expression, which casts between +# Objective-C pointers and C pointers, transferring ownership in the process. +# +# \code +# NSString *str = (__bridge_transfer NSString *)CFCreateString(); +# \endcode +CursorKind.OBJC_BRIDGE_CAST_EXPR = CursorKind(141) + +# Represents a C++0x pack expansion that produces a sequence of +# expressions. +# +# A pack expansion expression contains a pattern (which itself is an +# expression) followed by an ellipsis. For example: +CursorKind.PACK_EXPANSION_EXPR = CursorKind(142) + +# Represents an expression that computes the length of a parameter +# pack. +CursorKind.SIZE_OF_PACK_EXPR = CursorKind(143) + +# Represents a C++ lambda expression that produces a local function +# object. +# +# \code +# void abssort(float *x, unsigned N) { +# std::sort(x, x + N, +# [](float a, float b) { +# return std::abs(a) < std::abs(b); +# }); +# } +# \endcode +CursorKind.LAMBDA_EXPR = CursorKind(144) + +# Objective-c Boolean Literal. +CursorKind.OBJ_BOOL_LITERAL_EXPR = CursorKind(145) + +# Represents the "self" expression in a ObjC method. +CursorKind.OBJ_SELF_EXPR = CursorKind(146) + + +# A statement whose specific kind is not exposed via this interface. +# +# Unexposed statements have the same operations as any other kind of statement; +# one can extract their location information, spelling, children, etc. However, +# the specific kind of the statement is not reported. +CursorKind.UNEXPOSED_STMT = CursorKind(200) + +# A labelled statement in a function. +CursorKind.LABEL_STMT = CursorKind(201) + +# A compound statement +CursorKind.COMPOUND_STMT = CursorKind(202) + +# A case statement. +CursorKind.CASE_STMT = CursorKind(203) + +# A default statement. +CursorKind.DEFAULT_STMT = CursorKind(204) + +# An if statement. +CursorKind.IF_STMT = CursorKind(205) + +# A switch statement. +CursorKind.SWITCH_STMT = CursorKind(206) + +# A while statement. +CursorKind.WHILE_STMT = CursorKind(207) + +# A do statement. +CursorKind.DO_STMT = CursorKind(208) + +# A for statement. +CursorKind.FOR_STMT = CursorKind(209) + +# A goto statement. +CursorKind.GOTO_STMT = CursorKind(210) + +# An indirect goto statement. +CursorKind.INDIRECT_GOTO_STMT = CursorKind(211) + +# A continue statement. +CursorKind.CONTINUE_STMT = CursorKind(212) + +# A break statement. +CursorKind.BREAK_STMT = CursorKind(213) + +# A return statement. +CursorKind.RETURN_STMT = CursorKind(214) + +# A GNU-style inline assembler statement. +CursorKind.ASM_STMT = CursorKind(215) + +# Objective-C's overall @try-@catch-@finally statement. +CursorKind.OBJC_AT_TRY_STMT = CursorKind(216) + +# Objective-C's @catch statement. +CursorKind.OBJC_AT_CATCH_STMT = CursorKind(217) + +# Objective-C's @finally statement. +CursorKind.OBJC_AT_FINALLY_STMT = CursorKind(218) + +# Objective-C's @throw statement. +CursorKind.OBJC_AT_THROW_STMT = CursorKind(219) + +# Objective-C's @synchronized statement. +CursorKind.OBJC_AT_SYNCHRONIZED_STMT = CursorKind(220) + +# Objective-C's autorealease pool statement. +CursorKind.OBJC_AUTORELEASE_POOL_STMT = CursorKind(221) + +# Objective-C's for collection statement. +CursorKind.OBJC_FOR_COLLECTION_STMT = CursorKind(222) + +# C++'s catch statement. +CursorKind.CXX_CATCH_STMT = CursorKind(223) + +# C++'s try statement. +CursorKind.CXX_TRY_STMT = CursorKind(224) + +# C++'s for (* : *) statement. +CursorKind.CXX_FOR_RANGE_STMT = CursorKind(225) + +# Windows Structured Exception Handling's try statement. +CursorKind.SEH_TRY_STMT = CursorKind(226) + +# Windows Structured Exception Handling's except statement. +CursorKind.SEH_EXCEPT_STMT = CursorKind(227) + +# Windows Structured Exception Handling's finally statement. +CursorKind.SEH_FINALLY_STMT = CursorKind(228) + +# A MS inline assembly statement extension. +CursorKind.MS_ASM_STMT = CursorKind(229) + +# The null statement. +CursorKind.NULL_STMT = CursorKind(230) + +# Adaptor class for mixing declarations with statements and expressions. +CursorKind.DECL_STMT = CursorKind(231) + +# OpenMP parallel directive. +CursorKind.OMP_PARALLEL_DIRECTIVE = CursorKind(232) + +# OpenMP SIMD directive. +CursorKind.OMP_SIMD_DIRECTIVE = CursorKind(233) + +# OpenMP for directive. +CursorKind.OMP_FOR_DIRECTIVE = CursorKind(234) + +# OpenMP sections directive. +CursorKind.OMP_SECTIONS_DIRECTIVE = CursorKind(235) + +# OpenMP section directive. +CursorKind.OMP_SECTION_DIRECTIVE = CursorKind(236) + +# OpenMP single directive. +CursorKind.OMP_SINGLE_DIRECTIVE = CursorKind(237) + +# OpenMP parallel for directive. +CursorKind.OMP_PARALLEL_FOR_DIRECTIVE = CursorKind(238) + +# OpenMP parallel sections directive. +CursorKind.OMP_PARALLEL_SECTIONS_DIRECTIVE = CursorKind(239) + +# OpenMP task directive. +CursorKind.OMP_TASK_DIRECTIVE = CursorKind(240) + +# OpenMP master directive. +CursorKind.OMP_MASTER_DIRECTIVE = CursorKind(241) + +# OpenMP critical directive. +CursorKind.OMP_CRITICAL_DIRECTIVE = CursorKind(242) + +# OpenMP taskyield directive. +CursorKind.OMP_TASKYIELD_DIRECTIVE = CursorKind(243) + +# OpenMP barrier directive. +CursorKind.OMP_BARRIER_DIRECTIVE = CursorKind(244) + +# OpenMP taskwait directive. +CursorKind.OMP_TASKWAIT_DIRECTIVE = CursorKind(245) + +# OpenMP flush directive. +CursorKind.OMP_FLUSH_DIRECTIVE = CursorKind(246) + +# Windows Structured Exception Handling's leave statement. +CursorKind.SEH_LEAVE_STMT = CursorKind(247) + +# OpenMP ordered directive. +CursorKind.OMP_ORDERED_DIRECTIVE = CursorKind(248) + +# OpenMP atomic directive. +CursorKind.OMP_ATOMIC_DIRECTIVE = CursorKind(249) + +# OpenMP for SIMD directive. +CursorKind.OMP_FOR_SIMD_DIRECTIVE = CursorKind(250) + +# OpenMP parallel for SIMD directive. +CursorKind.OMP_PARALLELFORSIMD_DIRECTIVE = CursorKind(251) + +# OpenMP target directive. +CursorKind.OMP_TARGET_DIRECTIVE = CursorKind(252) + +# OpenMP teams directive. +CursorKind.OMP_TEAMS_DIRECTIVE = CursorKind(253) + +# OpenMP taskgroup directive. +CursorKind.OMP_TASKGROUP_DIRECTIVE = CursorKind(254) + +# OpenMP cancellation point directive. +CursorKind.OMP_CANCELLATION_POINT_DIRECTIVE = CursorKind(255) + +# OpenMP cancel directive. +CursorKind.OMP_CANCEL_DIRECTIVE = CursorKind(256) + +# OpenMP target data directive. +CursorKind.OMP_TARGET_DATA_DIRECTIVE = CursorKind(257) + +# OpenMP taskloop directive. +CursorKind.OMP_TASK_LOOP_DIRECTIVE = CursorKind(258) + +# OpenMP taskloop simd directive. +CursorKind.OMP_TASK_LOOP_SIMD_DIRECTIVE = CursorKind(259) + +# OpenMP distribute directive. +CursorKind.OMP_DISTRIBUTE_DIRECTIVE = CursorKind(260) + +# OpenMP target enter data directive. +CursorKind.OMP_TARGET_ENTER_DATA_DIRECTIVE = CursorKind(261) + +# OpenMP target exit data directive. +CursorKind.OMP_TARGET_EXIT_DATA_DIRECTIVE = CursorKind(262) + +# OpenMP target parallel directive. +CursorKind.OMP_TARGET_PARALLEL_DIRECTIVE = CursorKind(263) + +# OpenMP target parallel for directive. +CursorKind.OMP_TARGET_PARALLELFOR_DIRECTIVE = CursorKind(264) + +# OpenMP target update directive. +CursorKind.OMP_TARGET_UPDATE_DIRECTIVE = CursorKind(265) + +# OpenMP distribute parallel for directive. +CursorKind.OMP_DISTRIBUTE_PARALLELFOR_DIRECTIVE = CursorKind(266) + +# OpenMP distribute parallel for simd directive. +CursorKind.OMP_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE = CursorKind(267) + +# OpenMP distribute simd directive. +CursorKind.OMP_DISTRIBUTE_SIMD_DIRECTIVE = CursorKind(268) + +# OpenMP target parallel for simd directive. +CursorKind.OMP_TARGET_PARALLEL_FOR_SIMD_DIRECTIVE = CursorKind(269) + +# OpenMP target simd directive. +CursorKind.OMP_TARGET_SIMD_DIRECTIVE = CursorKind(270) + +# OpenMP teams distribute directive. +CursorKind.OMP_TEAMS_DISTRIBUTE_DIRECTIVE = CursorKind(271) + +### +# Other Kinds + +# Cursor that represents the translation unit itself. +# +# The translation unit cursor exists primarily to act as the root cursor for +# traversing the contents of a translation unit. +CursorKind.TRANSLATION_UNIT = CursorKind(300) + +### +# Attributes + +# An attribute whoe specific kind is note exposed via this interface +CursorKind.UNEXPOSED_ATTR = CursorKind(400) + +CursorKind.IB_ACTION_ATTR = CursorKind(401) +CursorKind.IB_OUTLET_ATTR = CursorKind(402) +CursorKind.IB_OUTLET_COLLECTION_ATTR = CursorKind(403) + +CursorKind.CXX_FINAL_ATTR = CursorKind(404) +CursorKind.CXX_OVERRIDE_ATTR = CursorKind(405) +CursorKind.ANNOTATE_ATTR = CursorKind(406) +CursorKind.ASM_LABEL_ATTR = CursorKind(407) +CursorKind.PACKED_ATTR = CursorKind(408) +CursorKind.PURE_ATTR = CursorKind(409) +CursorKind.CONST_ATTR = CursorKind(410) +CursorKind.NODUPLICATE_ATTR = CursorKind(411) +CursorKind.CUDACONSTANT_ATTR = CursorKind(412) +CursorKind.CUDADEVICE_ATTR = CursorKind(413) +CursorKind.CUDAGLOBAL_ATTR = CursorKind(414) +CursorKind.CUDAHOST_ATTR = CursorKind(415) +CursorKind.CUDASHARED_ATTR = CursorKind(416) + +CursorKind.VISIBILITY_ATTR = CursorKind(417) + +CursorKind.DLLEXPORT_ATTR = CursorKind(418) +CursorKind.DLLIMPORT_ATTR = CursorKind(419) + +### +# Preprocessing +CursorKind.PREPROCESSING_DIRECTIVE = CursorKind(500) +CursorKind.MACRO_DEFINITION = CursorKind(501) +CursorKind.MACRO_INSTANTIATION = CursorKind(502) +CursorKind.INCLUSION_DIRECTIVE = CursorKind(503) + +### +# Extra declaration + +# A module import declaration. +CursorKind.MODULE_IMPORT_DECL = CursorKind(600) +# A type alias template declaration +CursorKind.TYPE_ALIAS_TEMPLATE_DECL = CursorKind(601) +# A static_assert or _Static_assert node +CursorKind.STATIC_ASSERT = CursorKind(602) +# A friend declaration +CursorKind.FRIEND_DECL = CursorKind(603) + +# A code completion overload candidate. +CursorKind.OVERLOAD_CANDIDATE = CursorKind(700) + +### Template Argument Kinds ### +class TemplateArgumentKind(BaseEnumeration): + """ + A TemplateArgumentKind describes the kind of entity that a template argument + represents. + """ + + # The required BaseEnumeration declarations. + _kinds = [] + _name_map = None + +TemplateArgumentKind.NULL = TemplateArgumentKind(0) +TemplateArgumentKind.TYPE = TemplateArgumentKind(1) +TemplateArgumentKind.DECLARATION = TemplateArgumentKind(2) +TemplateArgumentKind.NULLPTR = TemplateArgumentKind(3) +TemplateArgumentKind.INTEGRAL = TemplateArgumentKind(4) + +### Cursors ### + +class Cursor(Structure): + """ + The Cursor class represents a reference to an element within the AST. It + acts as a kind of iterator. + """ + _fields_ = [("_kind_id", c_int), ("xdata", c_int), ("data", c_void_p * 3)] + + @staticmethod + def from_location(tu, location): + # We store a reference to the TU in the instance so the TU won't get + # collected before the cursor. + cursor = conf.lib.clang_getCursor(tu, location) + cursor._tu = tu + + return cursor + + def __eq__(self, other): + return conf.lib.clang_equalCursors(self, other) + + def __ne__(self, other): + return not self.__eq__(other) + + def is_definition(self): + """ + Returns true if the declaration pointed at by the cursor is also a + definition of that entity. + """ + return conf.lib.clang_isCursorDefinition(self) + + def is_const_method(self): + """Returns True if the cursor refers to a C++ member function or member + function template that is declared 'const'. + """ + return conf.lib.clang_CXXMethod_isConst(self) + + def is_converting_constructor(self): + """Returns True if the cursor refers to a C++ converting constructor. + """ + return conf.lib.clang_CXXConstructor_isConvertingConstructor(self) + + def is_copy_constructor(self): + """Returns True if the cursor refers to a C++ copy constructor. + """ + return conf.lib.clang_CXXConstructor_isCopyConstructor(self) + + def is_default_constructor(self): + """Returns True if the cursor refers to a C++ default constructor. + """ + return conf.lib.clang_CXXConstructor_isDefaultConstructor(self) + + def is_move_constructor(self): + """Returns True if the cursor refers to a C++ move constructor. + """ + return conf.lib.clang_CXXConstructor_isMoveConstructor(self) + + def is_default_method(self): + """Returns True if the cursor refers to a C++ member function or member + function template that is declared '= default'. + """ + return conf.lib.clang_CXXMethod_isDefaulted(self) + + def is_mutable_field(self): + """Returns True if the cursor refers to a C++ field that is declared + 'mutable'. + """ + return conf.lib.clang_CXXField_isMutable(self) + + def is_pure_virtual_method(self): + """Returns True if the cursor refers to a C++ member function or member + function template that is declared pure virtual. + """ + return conf.lib.clang_CXXMethod_isPureVirtual(self) + + def is_static_method(self): + """Returns True if the cursor refers to a C++ member function or member + function template that is declared 'static'. + """ + return conf.lib.clang_CXXMethod_isStatic(self) + + def is_virtual_method(self): + """Returns True if the cursor refers to a C++ member function or member + function template that is declared 'virtual'. + """ + return conf.lib.clang_CXXMethod_isVirtual(self) + + def get_definition(self): + """ + If the cursor is a reference to a declaration or a declaration of + some entity, return a cursor that points to the definition of that + entity. + """ + # TODO: Should probably check that this is either a reference or + # declaration prior to issuing the lookup. + return conf.lib.clang_getCursorDefinition(self) + + def get_usr(self): + """Return the Unified Symbol Resultion (USR) for the entity referenced + by the given cursor (or None). + + A Unified Symbol Resolution (USR) is a string that identifies a + particular entity (function, class, variable, etc.) within a + program. USRs can be compared across translation units to determine, + e.g., when references in one translation refer to an entity defined in + another translation unit.""" + return conf.lib.clang_getCursorUSR(self) + + @property + def kind(self): + """Return the kind of this cursor.""" + return CursorKind.from_id(self._kind_id) + + @property + def spelling(self): + """Return the spelling of the entity pointed at by the cursor.""" + if not hasattr(self, '_spelling'): + self._spelling = conf.lib.clang_getCursorSpelling(self) + + return self._spelling + + @property + def displayname(self): + """ + Return the display name for the entity referenced by this cursor. + + The display name contains extra information that helps identify the + cursor, such as the parameters of a function or template or the + arguments of a class template specialization. + """ + if not hasattr(self, '_displayname'): + self._displayname = conf.lib.clang_getCursorDisplayName(self) + + return self._displayname + + @property + def mangled_name(self): + """Return the mangled name for the entity referenced by this cursor.""" + if not hasattr(self, '_mangled_name'): + self._mangled_name = conf.lib.clang_Cursor_getMangling(self) + + return self._mangled_name + + @property + def location(self): + """ + Return the source location (the starting character) of the entity + pointed at by the cursor. + """ + if not hasattr(self, '_loc'): + self._loc = conf.lib.clang_getCursorLocation(self) + + return self._loc + + @property + def extent(self): + """ + Return the source range (the range of text) occupied by the entity + pointed at by the cursor. + """ + if not hasattr(self, '_extent'): + self._extent = conf.lib.clang_getCursorExtent(self) + + return self._extent + + @property + def storage_class(self): + """ + Retrieves the storage class (if any) of the entity pointed at by the + cursor. + """ + if not hasattr(self, '_storage_class'): + self._storage_class = conf.lib.clang_Cursor_getStorageClass(self) + + return StorageClass.from_id(self._storage_class) + + @property + def access_specifier(self): + """ + Retrieves the access specifier (if any) of the entity pointed at by the + cursor. + """ + if not hasattr(self, '_access_specifier'): + self._access_specifier = conf.lib.clang_getCXXAccessSpecifier(self) + + return AccessSpecifier.from_id(self._access_specifier) + + @property + def type(self): + """ + Retrieve the Type (if any) of the entity pointed at by the cursor. + """ + if not hasattr(self, '_type'): + self._type = conf.lib.clang_getCursorType(self) + + return self._type + + @property + def canonical(self): + """Return the canonical Cursor corresponding to this Cursor. + + The canonical cursor is the cursor which is representative for the + underlying entity. For example, if you have multiple forward + declarations for the same class, the canonical cursor for the forward + declarations will be identical. + """ + if not hasattr(self, '_canonical'): + self._canonical = conf.lib.clang_getCanonicalCursor(self) + + return self._canonical + + @property + def result_type(self): + """Retrieve the Type of the result for this Cursor.""" + if not hasattr(self, '_result_type'): + self._result_type = conf.lib.clang_getResultType(self.type) + + return self._result_type + + @property + def underlying_typedef_type(self): + """Return the underlying type of a typedef declaration. + + Returns a Type for the typedef this cursor is a declaration for. If + the current cursor is not a typedef, this raises. + """ + if not hasattr(self, '_underlying_type'): + assert self.kind.is_declaration() + self._underlying_type = \ + conf.lib.clang_getTypedefDeclUnderlyingType(self) + + return self._underlying_type + + @property + def enum_type(self): + """Return the integer type of an enum declaration. + + Returns a Type corresponding to an integer. If the cursor is not for an + enum, this raises. + """ + if not hasattr(self, '_enum_type'): + assert self.kind == CursorKind.ENUM_DECL + self._enum_type = conf.lib.clang_getEnumDeclIntegerType(self) + + return self._enum_type + + @property + def enum_value(self): + """Return the value of an enum constant.""" + if not hasattr(self, '_enum_value'): + assert self.kind == CursorKind.ENUM_CONSTANT_DECL + # Figure out the underlying type of the enum to know if it + # is a signed or unsigned quantity. + underlying_type = self.type + if underlying_type.kind == TypeKind.ENUM: + underlying_type = underlying_type.get_declaration().enum_type + if underlying_type.kind in (TypeKind.CHAR_U, + TypeKind.UCHAR, + TypeKind.CHAR16, + TypeKind.CHAR32, + TypeKind.USHORT, + TypeKind.UINT, + TypeKind.ULONG, + TypeKind.ULONGLONG, + TypeKind.UINT128): + self._enum_value = \ + conf.lib.clang_getEnumConstantDeclUnsignedValue(self) + else: + self._enum_value = conf.lib.clang_getEnumConstantDeclValue(self) + return self._enum_value + + @property + def objc_type_encoding(self): + """Return the Objective-C type encoding as a str.""" + if not hasattr(self, '_objc_type_encoding'): + self._objc_type_encoding = \ + conf.lib.clang_getDeclObjCTypeEncoding(self) + + return self._objc_type_encoding + + @property + def hash(self): + """Returns a hash of the cursor as an int.""" + if not hasattr(self, '_hash'): + self._hash = conf.lib.clang_hashCursor(self) + + return self._hash + + @property + def semantic_parent(self): + """Return the semantic parent for this cursor.""" + if not hasattr(self, '_semantic_parent'): + self._semantic_parent = conf.lib.clang_getCursorSemanticParent(self) + + return self._semantic_parent + + @property + def lexical_parent(self): + """Return the lexical parent for this cursor.""" + if not hasattr(self, '_lexical_parent'): + self._lexical_parent = conf.lib.clang_getCursorLexicalParent(self) + + return self._lexical_parent + + @property + def translation_unit(self): + """Returns the TranslationUnit to which this Cursor belongs.""" + # If this triggers an AttributeError, the instance was not properly + # created. + return self._tu + + @property + def referenced(self): + """ + For a cursor that is a reference, returns a cursor + representing the entity that it references. + """ + if not hasattr(self, '_referenced'): + self._referenced = conf.lib.clang_getCursorReferenced(self) + + return self._referenced + + @property + def brief_comment(self): + """Returns the brief comment text associated with that Cursor""" + return conf.lib.clang_Cursor_getBriefCommentText(self) + + @property + def raw_comment(self): + """Returns the raw comment text associated with that Cursor""" + return conf.lib.clang_Cursor_getRawCommentText(self) + + def get_arguments(self): + """Return an iterator for accessing the arguments of this cursor.""" + num_args = conf.lib.clang_Cursor_getNumArguments(self) + for i in range(0, num_args): + yield conf.lib.clang_Cursor_getArgument(self, i) + + def get_num_template_arguments(self): + """Returns the number of template args associated with this cursor.""" + return conf.lib.clang_Cursor_getNumTemplateArguments(self) + + def get_template_argument_kind(self, num): + """Returns the TemplateArgumentKind for the indicated template + argument.""" + return conf.lib.clang_Cursor_getTemplateArgumentKind(self, num) + + def get_template_argument_type(self, num): + """Returns the CXType for the indicated template argument.""" + return conf.lib.clang_Cursor_getTemplateArgumentType(self, num) + + def get_template_argument_value(self, num): + """Returns the value of the indicated arg as a signed 64b integer.""" + return conf.lib.clang_Cursor_getTemplateArgumentValue(self, num) + + def get_template_argument_unsigned_value(self, num): + """Returns the value of the indicated arg as an unsigned 64b integer.""" + return conf.lib.clang_Cursor_getTemplateArgumentUnsignedValue(self, num) + + def get_children(self): + """Return an iterator for accessing the children of this cursor.""" + + # FIXME: Expose iteration from CIndex, PR6125. + def visitor(child, parent, children): + # FIXME: Document this assertion in API. + # FIXME: There should just be an isNull method. + assert child != conf.lib.clang_getNullCursor() + + # Create reference to TU so it isn't GC'd before Cursor. + child._tu = self._tu + children.append(child) + return 1 # continue + children = [] + conf.lib.clang_visitChildren(self, callbacks['cursor_visit'](visitor), + children) + return iter(children) + + def walk_preorder(self): + """Depth-first preorder walk over the cursor and its descendants. + + Yields cursors. + """ + yield self + for child in self.get_children(): + for descendant in child.walk_preorder(): + yield descendant + + def get_tokens(self): + """Obtain Token instances formulating that compose this Cursor. + + This is a generator for Token instances. It returns all tokens which + occupy the extent this cursor occupies. + """ + return TokenGroup.get_tokens(self._tu, self.extent) + + def get_field_offsetof(self): + """Returns the offsetof the FIELD_DECL pointed by this Cursor.""" + return conf.lib.clang_Cursor_getOffsetOfField(self) + + def is_anonymous(self): + """ + Check if the record is anonymous. + """ + if self.kind == CursorKind.FIELD_DECL: + return self.type.get_declaration().is_anonymous() + return conf.lib.clang_Cursor_isAnonymous(self) + + def is_bitfield(self): + """ + Check if the field is a bitfield. + """ + return conf.lib.clang_Cursor_isBitField(self) + + def get_bitfield_width(self): + """ + Retrieve the width of a bitfield. + """ + return conf.lib.clang_getFieldDeclBitWidth(self) + + @staticmethod + def from_result(res, fn, args): + assert isinstance(res, Cursor) + # FIXME: There should just be an isNull method. + if res == conf.lib.clang_getNullCursor(): + return None + + # Store a reference to the TU in the Python object so it won't get GC'd + # before the Cursor. + tu = None + for arg in args: + if isinstance(arg, TranslationUnit): + tu = arg + break + + if hasattr(arg, 'translation_unit'): + tu = arg.translation_unit + break + + assert tu is not None + + res._tu = tu + return res + + @staticmethod + def from_cursor_result(res, fn, args): + assert isinstance(res, Cursor) + if res == conf.lib.clang_getNullCursor(): + return None + + res._tu = args[0]._tu + return res + +class StorageClass(object): + """ + Describes the storage class of a declaration + """ + + # The unique kind objects, index by id. + _kinds = [] + _name_map = None + + def __init__(self, value): + if value >= len(StorageClass._kinds): + StorageClass._kinds += [None] * (value - len(StorageClass._kinds) + 1) + if StorageClass._kinds[value] is not None: + raise ValueError('StorageClass already loaded') + self.value = value + StorageClass._kinds[value] = self + StorageClass._name_map = None + + def from_param(self): + return self.value + + @property + def name(self): + """Get the enumeration name of this storage class.""" + if self._name_map is None: + self._name_map = {} + for key,value in list(StorageClass.__dict__.items()): + if isinstance(value,StorageClass): + self._name_map[value] = key + return self._name_map[self] + + @staticmethod + def from_id(id): + if id >= len(StorageClass._kinds) or not StorageClass._kinds[id]: + raise ValueError('Unknown storage class %d' % id) + return StorageClass._kinds[id] + + def __repr__(self): + return 'StorageClass.%s' % (self.name,) + +StorageClass.INVALID = StorageClass(0) +StorageClass.NONE = StorageClass(1) +StorageClass.EXTERN = StorageClass(2) +StorageClass.STATIC = StorageClass(3) +StorageClass.PRIVATEEXTERN = StorageClass(4) +StorageClass.OPENCLWORKGROUPLOCAL = StorageClass(5) +StorageClass.AUTO = StorageClass(6) +StorageClass.REGISTER = StorageClass(7) + + +### C++ access specifiers ### + +class AccessSpecifier(BaseEnumeration): + """ + Describes the access of a C++ class member + """ + + # The unique kind objects, index by id. + _kinds = [] + _name_map = None + + def from_param(self): + return self.value + + def __repr__(self): + return 'AccessSpecifier.%s' % (self.name,) + +AccessSpecifier.INVALID = AccessSpecifier(0) +AccessSpecifier.PUBLIC = AccessSpecifier(1) +AccessSpecifier.PROTECTED = AccessSpecifier(2) +AccessSpecifier.PRIVATE = AccessSpecifier(3) +AccessSpecifier.NONE = AccessSpecifier(4) + +### Type Kinds ### + +class TypeKind(BaseEnumeration): + """ + Describes the kind of type. + """ + + # The unique kind objects, indexed by id. + _kinds = [] + _name_map = None + + @property + def spelling(self): + """Retrieve the spelling of this TypeKind.""" + return conf.lib.clang_getTypeKindSpelling(self.value) + + def __repr__(self): + return 'TypeKind.%s' % (self.name,) + +TypeKind.INVALID = TypeKind(0) +TypeKind.UNEXPOSED = TypeKind(1) +TypeKind.VOID = TypeKind(2) +TypeKind.BOOL = TypeKind(3) +TypeKind.CHAR_U = TypeKind(4) +TypeKind.UCHAR = TypeKind(5) +TypeKind.CHAR16 = TypeKind(6) +TypeKind.CHAR32 = TypeKind(7) +TypeKind.USHORT = TypeKind(8) +TypeKind.UINT = TypeKind(9) +TypeKind.ULONG = TypeKind(10) +TypeKind.ULONGLONG = TypeKind(11) +TypeKind.UINT128 = TypeKind(12) +TypeKind.CHAR_S = TypeKind(13) +TypeKind.SCHAR = TypeKind(14) +TypeKind.WCHAR = TypeKind(15) +TypeKind.SHORT = TypeKind(16) +TypeKind.INT = TypeKind(17) +TypeKind.LONG = TypeKind(18) +TypeKind.LONGLONG = TypeKind(19) +TypeKind.INT128 = TypeKind(20) +TypeKind.FLOAT = TypeKind(21) +TypeKind.DOUBLE = TypeKind(22) +TypeKind.LONGDOUBLE = TypeKind(23) +TypeKind.NULLPTR = TypeKind(24) +TypeKind.OVERLOAD = TypeKind(25) +TypeKind.DEPENDENT = TypeKind(26) +TypeKind.OBJCID = TypeKind(27) +TypeKind.OBJCCLASS = TypeKind(28) +TypeKind.OBJCSEL = TypeKind(29) +TypeKind.FLOAT128 = TypeKind(30) +TypeKind.HALF = TypeKind(31) +TypeKind.COMPLEX = TypeKind(100) +TypeKind.POINTER = TypeKind(101) +TypeKind.BLOCKPOINTER = TypeKind(102) +TypeKind.LVALUEREFERENCE = TypeKind(103) +TypeKind.RVALUEREFERENCE = TypeKind(104) +TypeKind.RECORD = TypeKind(105) +TypeKind.ENUM = TypeKind(106) +TypeKind.TYPEDEF = TypeKind(107) +TypeKind.OBJCINTERFACE = TypeKind(108) +TypeKind.OBJCOBJECTPOINTER = TypeKind(109) +TypeKind.FUNCTIONNOPROTO = TypeKind(110) +TypeKind.FUNCTIONPROTO = TypeKind(111) +TypeKind.CONSTANTARRAY = TypeKind(112) +TypeKind.VECTOR = TypeKind(113) +TypeKind.INCOMPLETEARRAY = TypeKind(114) +TypeKind.VARIABLEARRAY = TypeKind(115) +TypeKind.DEPENDENTSIZEDARRAY = TypeKind(116) +TypeKind.MEMBERPOINTER = TypeKind(117) +TypeKind.AUTO = TypeKind(118) +TypeKind.ELABORATED = TypeKind(119) + +class RefQualifierKind(BaseEnumeration): + """Describes a specific ref-qualifier of a type.""" + + # The unique kind objects, indexed by id. + _kinds = [] + _name_map = None + + def from_param(self): + return self.value + + def __repr__(self): + return 'RefQualifierKind.%s' % (self.name,) + +RefQualifierKind.NONE = RefQualifierKind(0) +RefQualifierKind.LVALUE = RefQualifierKind(1) +RefQualifierKind.RVALUE = RefQualifierKind(2) + +class Type(Structure): + """ + The type of an element in the abstract syntax tree. + """ + _fields_ = [("_kind_id", c_int), ("data", c_void_p * 2)] + + @property + def kind(self): + """Return the kind of this type.""" + return TypeKind.from_id(self._kind_id) + + def argument_types(self): + """Retrieve a container for the non-variadic arguments for this type. + + The returned object is iterable and indexable. Each item in the + container is a Type instance. + """ + class ArgumentsIterator(collections.Sequence): + def __init__(self, parent): + self.parent = parent + self.length = None + + def __len__(self): + if self.length is None: + self.length = conf.lib.clang_getNumArgTypes(self.parent) + + return self.length + + def __getitem__(self, key): + # FIXME Support slice objects. + if not isinstance(key, int): + raise TypeError("Must supply a non-negative int.") + + if key < 0: + raise IndexError("Only non-negative indexes are accepted.") + + if key >= len(self): + raise IndexError("Index greater than container length: " + "%d > %d" % ( key, len(self) )) + + result = conf.lib.clang_getArgType(self.parent, key) + if result.kind == TypeKind.INVALID: + raise IndexError("Argument could not be retrieved.") + + return result + + assert self.kind == TypeKind.FUNCTIONPROTO + return ArgumentsIterator(self) + + @property + def element_type(self): + """Retrieve the Type of elements within this Type. + + If accessed on a type that is not an array, complex, or vector type, an + exception will be raised. + """ + result = conf.lib.clang_getElementType(self) + if result.kind == TypeKind.INVALID: + raise Exception('Element type not available on this type.') + + return result + + @property + def element_count(self): + """Retrieve the number of elements in this type. + + Returns an int. + + If the Type is not an array or vector, this raises. + """ + result = conf.lib.clang_getNumElements(self) + if result < 0: + raise Exception('Type does not have elements.') + + return result + + @property + def translation_unit(self): + """The TranslationUnit to which this Type is associated.""" + # If this triggers an AttributeError, the instance was not properly + # instantiated. + return self._tu + + @staticmethod + def from_result(res, fn, args): + assert isinstance(res, Type) + + tu = None + for arg in args: + if hasattr(arg, 'translation_unit'): + tu = arg.translation_unit + break + + assert tu is not None + res._tu = tu + + return res + + def get_canonical(self): + """ + Return the canonical type for a Type. + + Clang's type system explicitly models typedefs and all the + ways a specific type can be represented. The canonical type + is the underlying type with all the "sugar" removed. For + example, if 'T' is a typedef for 'int', the canonical type for + 'T' would be 'int'. + """ + return conf.lib.clang_getCanonicalType(self) + + def is_const_qualified(self): + """Determine whether a Type has the "const" qualifier set. + + This does not look through typedefs that may have added "const" + at a different level. + """ + return conf.lib.clang_isConstQualifiedType(self) + + def is_volatile_qualified(self): + """Determine whether a Type has the "volatile" qualifier set. + + This does not look through typedefs that may have added "volatile" + at a different level. + """ + return conf.lib.clang_isVolatileQualifiedType(self) + + def is_restrict_qualified(self): + """Determine whether a Type has the "restrict" qualifier set. + + This does not look through typedefs that may have added "restrict" at + a different level. + """ + return conf.lib.clang_isRestrictQualifiedType(self) + + def is_function_variadic(self): + """Determine whether this function Type is a variadic function type.""" + assert self.kind == TypeKind.FUNCTIONPROTO + + return conf.lib.clang_isFunctionTypeVariadic(self) + + def is_pod(self): + """Determine whether this Type represents plain old data (POD).""" + return conf.lib.clang_isPODType(self) + + def get_pointee(self): + """ + For pointer types, returns the type of the pointee. + """ + return conf.lib.clang_getPointeeType(self) + + def get_declaration(self): + """ + Return the cursor for the declaration of the given type. + """ + return conf.lib.clang_getTypeDeclaration(self) + + def get_result(self): + """ + Retrieve the result type associated with a function type. + """ + return conf.lib.clang_getResultType(self) + + def get_array_element_type(self): + """ + Retrieve the type of the elements of the array type. + """ + return conf.lib.clang_getArrayElementType(self) + + def get_array_size(self): + """ + Retrieve the size of the constant array. + """ + return conf.lib.clang_getArraySize(self) + + def get_class_type(self): + """ + Retrieve the class type of the member pointer type. + """ + return conf.lib.clang_Type_getClassType(self) + + def get_named_type(self): + """ + Retrieve the type named by the qualified-id. + """ + return conf.lib.clang_Type_getNamedType(self) + def get_align(self): + """ + Retrieve the alignment of the record. + """ + return conf.lib.clang_Type_getAlignOf(self) + + def get_size(self): + """ + Retrieve the size of the record. + """ + return conf.lib.clang_Type_getSizeOf(self) + + def get_offset(self, fieldname): + """ + Retrieve the offset of a field in the record. + """ + return conf.lib.clang_Type_getOffsetOf(self, c_char_p(fieldname)) + + def get_ref_qualifier(self): + """ + Retrieve the ref-qualifier of the type. + """ + return RefQualifierKind.from_id( + conf.lib.clang_Type_getCXXRefQualifier(self)) + + def get_fields(self): + """Return an iterator for accessing the fields of this type.""" + + def visitor(field, children): + assert field != conf.lib.clang_getNullCursor() + + # Create reference to TU so it isn't GC'd before Cursor. + field._tu = self._tu + fields.append(field) + return 1 # continue + fields = [] + conf.lib.clang_Type_visitFields(self, + callbacks['fields_visit'](visitor), fields) + return iter(fields) + + @property + def spelling(self): + """Retrieve the spelling of this Type.""" + return conf.lib.clang_getTypeSpelling(self) + + def __eq__(self, other): + if type(other) != type(self): + return False + + return conf.lib.clang_equalTypes(self, other) + + def __ne__(self, other): + return not self.__eq__(other) + +## CIndex Objects ## + +# CIndex objects (derived from ClangObject) are essentially lightweight +# wrappers attached to some underlying object, which is exposed via CIndex as +# a void*. + +class ClangObject(object): + """ + A helper for Clang objects. This class helps act as an intermediary for + the ctypes library and the Clang CIndex library. + """ + def __init__(self, obj): + assert isinstance(obj, c_object_p) and obj + self.obj = self._as_parameter_ = obj + + def from_param(self): + return self._as_parameter_ + + +class _CXUnsavedFile(Structure): + """Helper for passing unsaved file arguments.""" + _fields_ = [("name", c_char_p), ("contents", c_char_p), ('length', c_ulong)] + +# Functions calls through the python interface are rather slow. Fortunately, +# for most symboles, we do not need to perform a function call. Their spelling +# never changes and is consequently provided by this spelling cache. +SpellingCache = { + # 0: CompletionChunk.Kind("Optional"), + # 1: CompletionChunk.Kind("TypedText"), + # 2: CompletionChunk.Kind("Text"), + # 3: CompletionChunk.Kind("Placeholder"), + # 4: CompletionChunk.Kind("Informative"), + # 5 : CompletionChunk.Kind("CurrentParameter"), + 6: '(', # CompletionChunk.Kind("LeftParen"), + 7: ')', # CompletionChunk.Kind("RightParen"), + 8: '[', # CompletionChunk.Kind("LeftBracket"), + 9: ']', # CompletionChunk.Kind("RightBracket"), + 10: '{', # CompletionChunk.Kind("LeftBrace"), + 11: '}', # CompletionChunk.Kind("RightBrace"), + 12: '<', # CompletionChunk.Kind("LeftAngle"), + 13: '>', # CompletionChunk.Kind("RightAngle"), + 14: ', ', # CompletionChunk.Kind("Comma"), + # 15: CompletionChunk.Kind("ResultType"), + 16: ':', # CompletionChunk.Kind("Colon"), + 17: ';', # CompletionChunk.Kind("SemiColon"), + 18: '=', # CompletionChunk.Kind("Equal"), + 19: ' ', # CompletionChunk.Kind("HorizontalSpace"), + # 20: CompletionChunk.Kind("VerticalSpace") +} + +class CompletionChunk: + class Kind: + def __init__(self, name): + self.name = name + + def __str__(self): + return self.name + + def __repr__(self): + return "" % self + + def __init__(self, completionString, key): + self.cs = completionString + self.key = key + self.__kindNumberCache = -1 + + def __repr__(self): + return "{'" + self.spelling + "', " + str(self.kind) + "}" + + @CachedProperty + def spelling(self): + if self.__kindNumber in SpellingCache: + return SpellingCache[self.__kindNumber] + return conf.lib.clang_getCompletionChunkText(self.cs, self.key).spelling + + # We do not use @CachedProperty here, as the manual implementation is + # apparently still significantly faster. Please profile carefully if you + # would like to add CachedProperty back. + @property + def __kindNumber(self): + if self.__kindNumberCache == -1: + self.__kindNumberCache = \ + conf.lib.clang_getCompletionChunkKind(self.cs, self.key) + return self.__kindNumberCache + + @CachedProperty + def kind(self): + return completionChunkKindMap[self.__kindNumber] + + @CachedProperty + def string(self): + res = conf.lib.clang_getCompletionChunkCompletionString(self.cs, + self.key) + + if (res): + return CompletionString(res) + else: + None + + def isKindOptional(self): + return self.__kindNumber == 0 + + def isKindTypedText(self): + return self.__kindNumber == 1 + + def isKindPlaceHolder(self): + return self.__kindNumber == 3 + + def isKindInformative(self): + return self.__kindNumber == 4 + + def isKindResultType(self): + return self.__kindNumber == 15 + +completionChunkKindMap = { + 0: CompletionChunk.Kind("Optional"), + 1: CompletionChunk.Kind("TypedText"), + 2: CompletionChunk.Kind("Text"), + 3: CompletionChunk.Kind("Placeholder"), + 4: CompletionChunk.Kind("Informative"), + 5: CompletionChunk.Kind("CurrentParameter"), + 6: CompletionChunk.Kind("LeftParen"), + 7: CompletionChunk.Kind("RightParen"), + 8: CompletionChunk.Kind("LeftBracket"), + 9: CompletionChunk.Kind("RightBracket"), + 10: CompletionChunk.Kind("LeftBrace"), + 11: CompletionChunk.Kind("RightBrace"), + 12: CompletionChunk.Kind("LeftAngle"), + 13: CompletionChunk.Kind("RightAngle"), + 14: CompletionChunk.Kind("Comma"), + 15: CompletionChunk.Kind("ResultType"), + 16: CompletionChunk.Kind("Colon"), + 17: CompletionChunk.Kind("SemiColon"), + 18: CompletionChunk.Kind("Equal"), + 19: CompletionChunk.Kind("HorizontalSpace"), + 20: CompletionChunk.Kind("VerticalSpace")} + +class CompletionString(ClangObject): + class Availability: + def __init__(self, name): + self.name = name + + def __str__(self): + return self.name + + def __repr__(self): + return "" % self + + def __len__(self): + return self.num_chunks + + @CachedProperty + def num_chunks(self): + return conf.lib.clang_getNumCompletionChunks(self.obj) + + def __getitem__(self, key): + if self.num_chunks <= key: + raise IndexError + return CompletionChunk(self.obj, key) + + @property + def priority(self): + return conf.lib.clang_getCompletionPriority(self.obj) + + @property + def availability(self): + res = conf.lib.clang_getCompletionAvailability(self.obj) + return availabilityKinds[res] + + @property + def briefComment(self): + if conf.function_exists("clang_getCompletionBriefComment"): + return conf.lib.clang_getCompletionBriefComment(self.obj) + return _CXString() + + def __repr__(self): + return " | ".join([str(a) for a in self]) \ + + " || Priority: " + str(self.priority) \ + + " || Availability: " + str(self.availability) \ + + " || Brief comment: " + str(self.briefComment.spelling) + +availabilityKinds = { + 0: CompletionChunk.Kind("Available"), + 1: CompletionChunk.Kind("Deprecated"), + 2: CompletionChunk.Kind("NotAvailable"), + 3: CompletionChunk.Kind("NotAccessible")} + +class CodeCompletionResult(Structure): + _fields_ = [('cursorKind', c_int), ('completionString', c_object_p)] + + def __repr__(self): + return str(CompletionString(self.completionString)) + + @property + def kind(self): + return CursorKind.from_id(self.cursorKind) + + @property + def string(self): + return CompletionString(self.completionString) + +class CCRStructure(Structure): + _fields_ = [('results', POINTER(CodeCompletionResult)), + ('numResults', c_int)] + + def __len__(self): + return self.numResults + + def __getitem__(self, key): + if len(self) <= key: + raise IndexError + + return self.results[key] + +class CodeCompletionResults(ClangObject): + def __init__(self, ptr): + assert isinstance(ptr, POINTER(CCRStructure)) and ptr + self.ptr = self._as_parameter_ = ptr + + def from_param(self): + return self._as_parameter_ + + def __del__(self): + conf.lib.clang_disposeCodeCompleteResults(self) + + @property + def results(self): + return self.ptr.contents + + @property + def diagnostics(self): + class DiagnosticsItr: + def __init__(self, ccr): + self.ccr= ccr + + def __len__(self): + return int(\ + conf.lib.clang_codeCompleteGetNumDiagnostics(self.ccr)) + + def __getitem__(self, key): + return conf.lib.clang_codeCompleteGetDiagnostic(self.ccr, key) + + return DiagnosticsItr(self) + + +class Index(ClangObject): + """ + The Index type provides the primary interface to the Clang CIndex library, + primarily by providing an interface for reading and parsing translation + units. + """ + + @staticmethod + def create(excludeDecls=False): + """ + Create a new Index. + Parameters: + excludeDecls -- Exclude local declarations from translation units. + """ + return Index(conf.lib.clang_createIndex(excludeDecls, 0)) + + def __del__(self): + conf.lib.clang_disposeIndex(self) + + def read(self, path): + """Load a TranslationUnit from the given AST file.""" + return TranslationUnit.from_ast_file(path, self) + + def parse(self, path, args=None, unsaved_files=None, options = 0): + """Load the translation unit from the given source code file by running + clang and generating the AST before loading. Additional command line + parameters can be passed to clang via the args parameter. + + In-memory contents for files can be provided by passing a list of pairs + to as unsaved_files, the first item should be the filenames to be mapped + and the second should be the contents to be substituted for the + file. The contents may be passed as strings or file objects. + + If an error was encountered during parsing, a TranslationUnitLoadError + will be raised. + """ + return TranslationUnit.from_source(path, args, unsaved_files, options, + self) + +class TranslationUnit(ClangObject): + """Represents a source code translation unit. + + This is one of the main types in the API. Any time you wish to interact + with Clang's representation of a source file, you typically start with a + translation unit. + """ + + # Default parsing mode. + PARSE_NONE = 0 + + # Instruct the parser to create a detailed processing record containing + # metadata not normally retained. + PARSE_DETAILED_PROCESSING_RECORD = 1 + + # Indicates that the translation unit is incomplete. This is typically used + # when parsing headers. + PARSE_INCOMPLETE = 2 + + # Instruct the parser to create a pre-compiled preamble for the translation + # unit. This caches the preamble (included files at top of source file). + # This is useful if the translation unit will be reparsed and you don't + # want to incur the overhead of reparsing the preamble. + PARSE_PRECOMPILED_PREAMBLE = 4 + + # Cache code completion information on parse. This adds time to parsing but + # speeds up code completion. + PARSE_CACHE_COMPLETION_RESULTS = 8 + + # Flags with values 16 and 32 are deprecated and intentionally omitted. + + # Do not parse function bodies. This is useful if you only care about + # searching for declarations/definitions. + PARSE_SKIP_FUNCTION_BODIES = 64 + + # Used to indicate that brief documentation comments should be included + # into the set of code completions returned from this translation unit. + PARSE_INCLUDE_BRIEF_COMMENTS_IN_CODE_COMPLETION = 128 + + @classmethod + def from_source(cls, filename, args=None, unsaved_files=None, options=0, + index=None): + """Create a TranslationUnit by parsing source. + + This is capable of processing source code both from files on the + filesystem as well as in-memory contents. + + Command-line arguments that would be passed to clang are specified as + a list via args. These can be used to specify include paths, warnings, + etc. e.g. ["-Wall", "-I/path/to/include"]. + + In-memory file content can be provided via unsaved_files. This is an + iterable of 2-tuples. The first element is the str filename. The + second element defines the content. Content can be provided as str + source code or as file objects (anything with a read() method). If + a file object is being used, content will be read until EOF and the + read cursor will not be reset to its original position. + + options is a bitwise or of TranslationUnit.PARSE_XXX flags which will + control parsing behavior. + + index is an Index instance to utilize. If not provided, a new Index + will be created for this TranslationUnit. + + To parse source from the filesystem, the filename of the file to parse + is specified by the filename argument. Or, filename could be None and + the args list would contain the filename(s) to parse. + + To parse source from an in-memory buffer, set filename to the virtual + filename you wish to associate with this source (e.g. "test.c"). The + contents of that file are then provided in unsaved_files. + + If an error occurs, a TranslationUnitLoadError is raised. + + Please note that a TranslationUnit with parser errors may be returned. + It is the caller's responsibility to check tu.diagnostics for errors. + + Also note that Clang infers the source language from the extension of + the input filename. If you pass in source code containing a C++ class + declaration with the filename "test.c" parsing will fail. + """ + if args is None: + args = [] + + if unsaved_files is None: + unsaved_files = [] + + if index is None: + index = Index.create() + + if isinstance(filename, str): + filename = filename.encode('utf8') + + args_length = len(args) + if args_length > 0: + args = (arg.encode('utf8') if isinstance(arg, str) else arg + for arg in args) + args_array = (c_char_p * args_length)(* args) + + unsaved_array = None + if len(unsaved_files) > 0: + unsaved_array = (_CXUnsavedFile * len(unsaved_files))() + for i, (name, contents) in enumerate(unsaved_files): + if hasattr(contents, "read"): + contents = contents.read() + + unsaved_array[i].name = name + unsaved_array[i].contents = contents + unsaved_array[i].length = len(contents) + + ptr = conf.lib.clang_parseTranslationUnit(index, filename, args_array, + args_length, unsaved_array, + len(unsaved_files), options) + + if not ptr: + raise TranslationUnitLoadError("Error parsing translation unit.") + + return cls(ptr, index=index) + + @classmethod + def from_ast_file(cls, filename, index=None): + """Create a TranslationUnit instance from a saved AST file. + + A previously-saved AST file (provided with -emit-ast or + TranslationUnit.save()) is loaded from the filename specified. + + If the file cannot be loaded, a TranslationUnitLoadError will be + raised. + + index is optional and is the Index instance to use. If not provided, + a default Index will be created. + """ + if index is None: + index = Index.create() + + ptr = conf.lib.clang_createTranslationUnit(index, filename) + if not ptr: + raise TranslationUnitLoadError(filename) + + return cls(ptr=ptr, index=index) + + def __init__(self, ptr, index): + """Create a TranslationUnit instance. + + TranslationUnits should be created using one of the from_* @classmethod + functions above. __init__ is only called internally. + """ + assert isinstance(index, Index) + self.index = index + ClangObject.__init__(self, ptr) + + def __del__(self): + conf.lib.clang_disposeTranslationUnit(self) + + @property + def cursor(self): + """Retrieve the cursor that represents the given translation unit.""" + return conf.lib.clang_getTranslationUnitCursor(self) + + @property + def spelling(self): + """Get the original translation unit source file name.""" + return conf.lib.clang_getTranslationUnitSpelling(self) + + def get_includes(self): + """ + Return an iterable sequence of FileInclusion objects that describe the + sequence of inclusions in a translation unit. The first object in + this sequence is always the input file. Note that this method will not + recursively iterate over header files included through precompiled + headers. + """ + def visitor(fobj, lptr, depth, includes): + if depth > 0: + loc = lptr.contents + includes.append(FileInclusion(loc.file, File(fobj), loc, depth)) + + # Automatically adapt CIndex/ctype pointers to python objects + includes = [] + conf.lib.clang_getInclusions(self, + callbacks['translation_unit_includes'](visitor), includes) + + return iter(includes) + + def get_file(self, filename): + """Obtain a File from this translation unit.""" + + return File.from_name(self, filename) + + def get_location(self, filename, position): + """Obtain a SourceLocation for a file in this translation unit. + + The position can be specified by passing: + + - Integer file offset. Initial file offset is 0. + - 2-tuple of (line number, column number). Initial file position is + (0, 0) + """ + f = self.get_file(filename) + + if isinstance(position, int): + return SourceLocation.from_offset(self, f, position) + + return SourceLocation.from_position(self, f, position[0], position[1]) + + def get_extent(self, filename, locations): + """Obtain a SourceRange from this translation unit. + + The bounds of the SourceRange must ultimately be defined by a start and + end SourceLocation. For the locations argument, you can pass: + + - 2 SourceLocation instances in a 2-tuple or list. + - 2 int file offsets via a 2-tuple or list. + - 2 2-tuple or lists of (line, column) pairs in a 2-tuple or list. + + e.g. + + get_extent('foo.c', (5, 10)) + get_extent('foo.c', ((1, 1), (1, 15))) + """ + f = self.get_file(filename) + + if len(locations) < 2: + raise Exception('Must pass object with at least 2 elements') + + start_location, end_location = locations + + if hasattr(start_location, '__len__'): + start_location = SourceLocation.from_position(self, f, + start_location[0], start_location[1]) + elif isinstance(start_location, int): + start_location = SourceLocation.from_offset(self, f, + start_location) + + if hasattr(end_location, '__len__'): + end_location = SourceLocation.from_position(self, f, + end_location[0], end_location[1]) + elif isinstance(end_location, int): + end_location = SourceLocation.from_offset(self, f, end_location) + + assert isinstance(start_location, SourceLocation) + assert isinstance(end_location, SourceLocation) + + return SourceRange.from_locations(start_location, end_location) + + @property + def diagnostics(self): + """ + Return an iterable (and indexable) object containing the diagnostics. + """ + class DiagIterator: + def __init__(self, tu): + self.tu = tu + + def __len__(self): + return int(conf.lib.clang_getNumDiagnostics(self.tu)) + + def __getitem__(self, key): + diag = conf.lib.clang_getDiagnostic(self.tu, key) + if not diag: + raise IndexError + return Diagnostic(diag) + + return DiagIterator(self) + + def reparse(self, unsaved_files=None, options=0): + """ + Reparse an already parsed translation unit. + + In-memory contents for files can be provided by passing a list of pairs + as unsaved_files, the first items should be the filenames to be mapped + and the second should be the contents to be substituted for the + file. The contents may be passed as strings or file objects. + """ + if unsaved_files is None: + unsaved_files = [] + + unsaved_files_array = 0 + if len(unsaved_files): + unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))() + for i,(name,value) in enumerate(unsaved_files): + if not isinstance(value, str): + # FIXME: It would be great to support an efficient version + # of this, one day. + value = value.read() + print(value) + if not isinstance(value, str): + raise TypeError('Unexpected unsaved file contents.') + unsaved_files_array[i].name = name + unsaved_files_array[i].contents = value + unsaved_files_array[i].length = len(value) + ptr = conf.lib.clang_reparseTranslationUnit(self, len(unsaved_files), + unsaved_files_array, options) + + def save(self, filename): + """Saves the TranslationUnit to a file. + + This is equivalent to passing -emit-ast to the clang frontend. The + saved file can be loaded back into a TranslationUnit. Or, if it + corresponds to a header, it can be used as a pre-compiled header file. + + If an error occurs while saving, a TranslationUnitSaveError is raised. + If the error was TranslationUnitSaveError.ERROR_INVALID_TU, this means + the constructed TranslationUnit was not valid at time of save. In this + case, the reason(s) why should be available via + TranslationUnit.diagnostics(). + + filename -- The path to save the translation unit to. + """ + options = conf.lib.clang_defaultSaveOptions(self) + result = int(conf.lib.clang_saveTranslationUnit(self, filename, + options)) + if result != 0: + raise TranslationUnitSaveError(result, + 'Error saving TranslationUnit.') + + def codeComplete(self, path, line, column, unsaved_files=None, + include_macros=False, include_code_patterns=False, + include_brief_comments=False): + """ + Code complete in this translation unit. + + In-memory contents for files can be provided by passing a list of pairs + as unsaved_files, the first items should be the filenames to be mapped + and the second should be the contents to be substituted for the + file. The contents may be passed as strings or file objects. + """ + options = 0 + + if include_macros: + options += 1 + + if include_code_patterns: + options += 2 + + if include_brief_comments: + options += 4 + + if unsaved_files is None: + unsaved_files = [] + + unsaved_files_array = 0 + if len(unsaved_files): + unsaved_files_array = (_CXUnsavedFile * len(unsaved_files))() + for i,(name,value) in enumerate(unsaved_files): + if not isinstance(value, str): + # FIXME: It would be great to support an efficient version + # of this, one day. + value = value.read() + print(value) + if not isinstance(value, str): + raise TypeError('Unexpected unsaved file contents.') + unsaved_files_array[i].name = name + unsaved_files_array[i].contents = value + unsaved_files_array[i].length = len(value) + ptr = conf.lib.clang_codeCompleteAt(self, path, line, column, + unsaved_files_array, len(unsaved_files), options) + if ptr: + return CodeCompletionResults(ptr) + return None + + def get_tokens(self, locations=None, extent=None): + """Obtain tokens in this translation unit. + + This is a generator for Token instances. The caller specifies a range + of source code to obtain tokens for. The range can be specified as a + 2-tuple of SourceLocation or as a SourceRange. If both are defined, + behavior is undefined. + """ + if locations is not None: + extent = SourceRange(start=locations[0], end=locations[1]) + + return TokenGroup.get_tokens(self, extent) + +class File(ClangObject): + """ + The File class represents a particular source file that is part of a + translation unit. + """ + + @staticmethod + def from_name(translation_unit, file_name): + """Retrieve a file handle within the given translation unit.""" + return File(conf.lib.clang_getFile(translation_unit, file_name)) + + @property + def name(self): + """Return the complete file and path name of the file.""" + return conf.lib.clang_getCString(conf.lib.clang_getFileName(self)) + + @property + def time(self): + """Return the last modification time of the file.""" + return conf.lib.clang_getFileTime(self) + + def __bytes__(self): + return self.name + + def __repr__(self): + return "" % (self.name) + + @staticmethod + def from_cursor_result(res, fn, args): + assert isinstance(res, File) + + # Copy a reference to the TranslationUnit to prevent premature GC. + res._tu = args[0]._tu + return res + +class FileInclusion(object): + """ + The FileInclusion class represents the inclusion of one source file by + another via a '#include' directive or as the input file for the translation + unit. This class provides information about the included file, the including + file, the location of the '#include' directive and the depth of the included + file in the stack. Note that the input file has depth 0. + """ + + def __init__(self, src, tgt, loc, depth): + self.source = src + self.include = tgt + self.location = loc + self.depth = depth + + @property + def is_input_file(self): + """True if the included file is the input file.""" + return self.depth == 0 + +class CompilationDatabaseError(Exception): + """Represents an error that occurred when working with a CompilationDatabase + + Each error is associated to an enumerated value, accessible under + e.cdb_error. Consumers can compare the value with one of the ERROR_ + constants in this class. + """ + + # An unknown error occurred + ERROR_UNKNOWN = 0 + + # The database could not be loaded + ERROR_CANNOTLOADDATABASE = 1 + + def __init__(self, enumeration, message): + assert isinstance(enumeration, int) + + if enumeration > 1: + raise Exception("Encountered undefined CompilationDatabase error " + "constant: %d. Please file a bug to have this " + "value supported." % enumeration) + + self.cdb_error = enumeration + Exception.__init__(self, 'Error %d: %s' % (enumeration, message)) + +class CompileCommand(object): + """Represents the compile command used to build a file""" + def __init__(self, cmd, ccmds): + self.cmd = cmd + # Keep a reference to the originating CompileCommands + # to prevent garbage collection + self.ccmds = ccmds + + @property + def directory(self): + """Get the working directory for this CompileCommand""" + return conf.lib.clang_CompileCommand_getDirectory(self.cmd) + + @property + def filename(self): + """Get the working filename for this CompileCommand""" + return conf.lib.clang_CompileCommand_getFilename(self.cmd) + + @property + def arguments(self): + """ + Get an iterable object providing each argument in the + command line for the compiler invocation as a _CXString. + + Invariant : the first argument is the compiler executable + """ + length = conf.lib.clang_CompileCommand_getNumArgs(self.cmd) + for i in range(length): + yield conf.lib.clang_CompileCommand_getArg(self.cmd, i) + +class CompileCommands(object): + """ + CompileCommands is an iterable object containing all CompileCommand + that can be used for building a specific file. + """ + def __init__(self, ccmds): + self.ccmds = ccmds + + def __del__(self): + conf.lib.clang_CompileCommands_dispose(self.ccmds) + + def __len__(self): + return int(conf.lib.clang_CompileCommands_getSize(self.ccmds)) + + def __getitem__(self, i): + cc = conf.lib.clang_CompileCommands_getCommand(self.ccmds, i) + if not cc: + raise IndexError + return CompileCommand(cc, self) + + @staticmethod + def from_result(res, fn, args): + if not res: + return None + return CompileCommands(res) + +class CompilationDatabase(ClangObject): + """ + The CompilationDatabase is a wrapper class around + clang::tooling::CompilationDatabase + + It enables querying how a specific source file can be built. + """ + + def __del__(self): + conf.lib.clang_CompilationDatabase_dispose(self) + + @staticmethod + def from_result(res, fn, args): + if not res: + raise CompilationDatabaseError(0, + "CompilationDatabase loading failed") + return CompilationDatabase(res) + + @staticmethod + def fromDirectory(buildDir): + """Builds a CompilationDatabase from the database found in buildDir""" + errorCode = c_uint() + try: + cdb = conf.lib.clang_CompilationDatabase_fromDirectory(buildDir, + byref(errorCode)) + except CompilationDatabaseError as e: + raise CompilationDatabaseError(int(errorCode.value), + "CompilationDatabase loading failed") + return cdb + + def getCompileCommands(self, filename): + """ + Get an iterable object providing all the CompileCommands available to + build filename. Returns None if filename is not found in the database. + """ + return conf.lib.clang_CompilationDatabase_getCompileCommands(self, + filename) + + def getAllCompileCommands(self): + """ + Get an iterable object providing all the CompileCommands available from + the database. + """ + return conf.lib.clang_CompilationDatabase_getAllCompileCommands(self) + + +class Token(Structure): + """Represents a single token from the preprocessor. + + Tokens are effectively segments of source code. Source code is first parsed + into tokens before being converted into the AST and Cursors. + + Tokens are obtained from parsed TranslationUnit instances. You currently + can't create tokens manually. + """ + _fields_ = [ + ('int_data', c_uint * 4), + ('ptr_data', c_void_p) + ] + + @property + def spelling(self): + """The spelling of this token. + + This is the textual representation of the token in source. + """ + return conf.lib.clang_getTokenSpelling(self._tu, self) + + @property + def kind(self): + """Obtain the TokenKind of the current token.""" + return TokenKind.from_value(conf.lib.clang_getTokenKind(self)) + + @property + def location(self): + """The SourceLocation this Token occurs at.""" + return conf.lib.clang_getTokenLocation(self._tu, self) + + @property + def extent(self): + """The SourceRange this Token occupies.""" + return conf.lib.clang_getTokenExtent(self._tu, self) + + @property + def cursor(self): + """The Cursor this Token corresponds to.""" + cursor = Cursor() + + conf.lib.clang_annotateTokens(self._tu, byref(self), 1, byref(cursor)) + + return cursor + +# Now comes the plumbing to hook up the C library. + +# Register callback types in common container. +callbacks['translation_unit_includes'] = CFUNCTYPE(None, c_object_p, + POINTER(SourceLocation), c_uint, py_object) +callbacks['cursor_visit'] = CFUNCTYPE(c_int, Cursor, Cursor, py_object) +callbacks['fields_visit'] = CFUNCTYPE(c_int, Cursor, py_object) + +# Functions strictly alphabetical order. +functionList = [ + ("clang_annotateTokens", + [TranslationUnit, POINTER(Token), c_uint, POINTER(Cursor)]), + + ("clang_CompilationDatabase_dispose", + [c_object_p]), + + ("clang_CompilationDatabase_fromDirectory", + [c_char_p, POINTER(c_uint)], + c_object_p, + CompilationDatabase.from_result), + + ("clang_CompilationDatabase_getAllCompileCommands", + [c_object_p], + c_object_p, + CompileCommands.from_result), + + ("clang_CompilationDatabase_getCompileCommands", + [c_object_p, c_char_p], + c_object_p, + CompileCommands.from_result), + + ("clang_CompileCommands_dispose", + [c_object_p]), + + ("clang_CompileCommands_getCommand", + [c_object_p, c_uint], + c_object_p), + + ("clang_CompileCommands_getSize", + [c_object_p], + c_uint), + + ("clang_CompileCommand_getArg", + [c_object_p, c_uint], + _CXString, + _CXString.from_result), + + ("clang_CompileCommand_getDirectory", + [c_object_p], + _CXString, + _CXString.from_result), + + ("clang_CompileCommand_getFilename", + [c_object_p], + _CXString, + _CXString.from_result), + + ("clang_CompileCommand_getNumArgs", + [c_object_p], + c_uint), + + ("clang_codeCompleteAt", + [TranslationUnit, c_char_p, c_int, c_int, c_void_p, c_int, c_int], + POINTER(CCRStructure)), + + ("clang_codeCompleteGetDiagnostic", + [CodeCompletionResults, c_int], + Diagnostic), + + ("clang_codeCompleteGetNumDiagnostics", + [CodeCompletionResults], + c_int), + + ("clang_createIndex", + [c_int, c_int], + c_object_p), + + ("clang_createTranslationUnit", + [Index, c_char_p], + c_object_p), + + ("clang_CXXConstructor_isConvertingConstructor", + [Cursor], + bool), + + ("clang_CXXConstructor_isCopyConstructor", + [Cursor], + bool), + + ("clang_CXXConstructor_isDefaultConstructor", + [Cursor], + bool), + + ("clang_CXXConstructor_isMoveConstructor", + [Cursor], + bool), + + ("clang_CXXField_isMutable", + [Cursor], + bool), + + ("clang_CXXMethod_isConst", + [Cursor], + bool), + + ("clang_CXXMethod_isDefaulted", + [Cursor], + bool), + + ("clang_CXXMethod_isPureVirtual", + [Cursor], + bool), + + ("clang_CXXMethod_isStatic", + [Cursor], + bool), + + ("clang_CXXMethod_isVirtual", + [Cursor], + bool), + + ("clang_defaultDiagnosticDisplayOptions", + [], + c_uint), + + ("clang_defaultSaveOptions", + [TranslationUnit], + c_uint), + + ("clang_disposeCodeCompleteResults", + [CodeCompletionResults]), + +# ("clang_disposeCXTUResourceUsage", +# [CXTUResourceUsage]), + + ("clang_disposeDiagnostic", + [Diagnostic]), + + ("clang_disposeIndex", + [Index]), + + ("clang_disposeString", + [_CXString]), + + ("clang_disposeTokens", + [TranslationUnit, POINTER(Token), c_uint]), + + ("clang_disposeTranslationUnit", + [TranslationUnit]), + + ("clang_equalCursors", + [Cursor, Cursor], + bool), + + ("clang_equalLocations", + [SourceLocation, SourceLocation], + bool), + + ("clang_equalRanges", + [SourceRange, SourceRange], + bool), + + ("clang_equalTypes", + [Type, Type], + bool), + + ("clang_formatDiagnostic", + [Diagnostic, c_uint], + _CXString), + + ("clang_getArgType", + [Type, c_uint], + Type, + Type.from_result), + + ("clang_getArrayElementType", + [Type], + Type, + Type.from_result), + + ("clang_getArraySize", + [Type], + c_longlong), + + ("clang_getFieldDeclBitWidth", + [Cursor], + c_int), + + ("clang_getCanonicalCursor", + [Cursor], + Cursor, + Cursor.from_cursor_result), + + ("clang_getCanonicalType", + [Type], + Type, + Type.from_result), + + ("clang_getChildDiagnostics", + [Diagnostic], + c_object_p), + + ("clang_getCompletionAvailability", + [c_void_p], + c_int), + + ("clang_getCompletionBriefComment", + [c_void_p], + _CXString), + + ("clang_getCompletionChunkCompletionString", + [c_void_p, c_int], + c_object_p), + + ("clang_getCompletionChunkKind", + [c_void_p, c_int], + c_int), + + ("clang_getCompletionChunkText", + [c_void_p, c_int], + _CXString), + + ("clang_getCompletionPriority", + [c_void_p], + c_int), + + ("clang_getCString", + [_CXString], + c_char_p), + + ("clang_getCursor", + [TranslationUnit, SourceLocation], + Cursor), + + ("clang_getCursorDefinition", + [Cursor], + Cursor, + Cursor.from_result), + + ("clang_getCursorDisplayName", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_getCursorExtent", + [Cursor], + SourceRange), + + ("clang_getCursorLexicalParent", + [Cursor], + Cursor, + Cursor.from_cursor_result), + + ("clang_getCursorLocation", + [Cursor], + SourceLocation), + + ("clang_getCursorReferenced", + [Cursor], + Cursor, + Cursor.from_result), + + ("clang_getCursorReferenceNameRange", + [Cursor, c_uint, c_uint], + SourceRange), + + ("clang_getCursorSemanticParent", + [Cursor], + Cursor, + Cursor.from_cursor_result), + + ("clang_getCursorSpelling", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_getCursorType", + [Cursor], + Type, + Type.from_result), + + ("clang_getCursorUSR", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_Cursor_getMangling", + [Cursor], + _CXString, + _CXString.from_result), + +# ("clang_getCXTUResourceUsage", +# [TranslationUnit], +# CXTUResourceUsage), + + ("clang_getCXXAccessSpecifier", + [Cursor], + c_uint), + + ("clang_getDeclObjCTypeEncoding", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_getDiagnostic", + [c_object_p, c_uint], + c_object_p), + + ("clang_getDiagnosticCategory", + [Diagnostic], + c_uint), + + ("clang_getDiagnosticCategoryText", + [Diagnostic], + _CXString, + _CXString.from_result), + + ("clang_getDiagnosticFixIt", + [Diagnostic, c_uint, POINTER(SourceRange)], + _CXString, + _CXString.from_result), + + ("clang_getDiagnosticInSet", + [c_object_p, c_uint], + c_object_p), + + ("clang_getDiagnosticLocation", + [Diagnostic], + SourceLocation), + + ("clang_getDiagnosticNumFixIts", + [Diagnostic], + c_uint), + + ("clang_getDiagnosticNumRanges", + [Diagnostic], + c_uint), + + ("clang_getDiagnosticOption", + [Diagnostic, POINTER(_CXString)], + _CXString, + _CXString.from_result), + + ("clang_getDiagnosticRange", + [Diagnostic, c_uint], + SourceRange), + + ("clang_getDiagnosticSeverity", + [Diagnostic], + c_int), + + ("clang_getDiagnosticSpelling", + [Diagnostic], + _CXString, + _CXString.from_result), + + ("clang_getElementType", + [Type], + Type, + Type.from_result), + + ("clang_getEnumConstantDeclUnsignedValue", + [Cursor], + c_ulonglong), + + ("clang_getEnumConstantDeclValue", + [Cursor], + c_longlong), + + ("clang_getEnumDeclIntegerType", + [Cursor], + Type, + Type.from_result), + + ("clang_getFile", + [TranslationUnit, c_char_p], + c_object_p), + + ("clang_getFileName", + [File], + _CXString), # TODO go through _CXString.from_result? + + ("clang_getFileTime", + [File], + c_uint), + + ("clang_getIBOutletCollectionType", + [Cursor], + Type, + Type.from_result), + + ("clang_getIncludedFile", + [Cursor], + File, + File.from_cursor_result), + + ("clang_getInclusions", + [TranslationUnit, callbacks['translation_unit_includes'], py_object]), + + ("clang_getInstantiationLocation", + [SourceLocation, POINTER(c_object_p), POINTER(c_uint), POINTER(c_uint), + POINTER(c_uint)]), + + ("clang_getLocation", + [TranslationUnit, File, c_uint, c_uint], + SourceLocation), + + ("clang_getLocationForOffset", + [TranslationUnit, File, c_uint], + SourceLocation), + + ("clang_getNullCursor", + None, + Cursor), + + ("clang_getNumArgTypes", + [Type], + c_uint), + + ("clang_getNumCompletionChunks", + [c_void_p], + c_int), + + ("clang_getNumDiagnostics", + [c_object_p], + c_uint), + + ("clang_getNumDiagnosticsInSet", + [c_object_p], + c_uint), + + ("clang_getNumElements", + [Type], + c_longlong), + + ("clang_getNumOverloadedDecls", + [Cursor], + c_uint), + + ("clang_getOverloadedDecl", + [Cursor, c_uint], + Cursor, + Cursor.from_cursor_result), + + ("clang_getPointeeType", + [Type], + Type, + Type.from_result), + + ("clang_getRange", + [SourceLocation, SourceLocation], + SourceRange), + + ("clang_getRangeEnd", + [SourceRange], + SourceLocation), + + ("clang_getRangeStart", + [SourceRange], + SourceLocation), + + ("clang_getResultType", + [Type], + Type, + Type.from_result), + + ("clang_getSpecializedCursorTemplate", + [Cursor], + Cursor, + Cursor.from_cursor_result), + + ("clang_getTemplateCursorKind", + [Cursor], + c_uint), + + ("clang_getTokenExtent", + [TranslationUnit, Token], + SourceRange), + + ("clang_getTokenKind", + [Token], + c_uint), + + ("clang_getTokenLocation", + [TranslationUnit, Token], + SourceLocation), + + ("clang_getTokenSpelling", + [TranslationUnit, Token], + _CXString, + _CXString.from_result), + + ("clang_getTranslationUnitCursor", + [TranslationUnit], + Cursor, + Cursor.from_result), + + ("clang_getTranslationUnitSpelling", + [TranslationUnit], + _CXString, + _CXString.from_result), + + ("clang_getTUResourceUsageName", + [c_uint], + c_char_p), + + ("clang_getTypeDeclaration", + [Type], + Cursor, + Cursor.from_result), + + ("clang_getTypedefDeclUnderlyingType", + [Cursor], + Type, + Type.from_result), + + ("clang_getTypeKindSpelling", + [c_uint], + _CXString, + _CXString.from_result), + + ("clang_getTypeSpelling", + [Type], + _CXString, + _CXString.from_result), + + ("clang_hashCursor", + [Cursor], + c_uint), + + ("clang_isAttribute", + [CursorKind], + bool), + + ("clang_isConstQualifiedType", + [Type], + bool), + + ("clang_isCursorDefinition", + [Cursor], + bool), + + ("clang_isDeclaration", + [CursorKind], + bool), + + ("clang_isExpression", + [CursorKind], + bool), + + ("clang_isFileMultipleIncludeGuarded", + [TranslationUnit, File], + bool), + + ("clang_isFunctionTypeVariadic", + [Type], + bool), + + ("clang_isInvalid", + [CursorKind], + bool), + + ("clang_isPODType", + [Type], + bool), + + ("clang_isPreprocessing", + [CursorKind], + bool), + + ("clang_isReference", + [CursorKind], + bool), + + ("clang_isRestrictQualifiedType", + [Type], + bool), + + ("clang_isStatement", + [CursorKind], + bool), + + ("clang_isTranslationUnit", + [CursorKind], + bool), + + ("clang_isUnexposed", + [CursorKind], + bool), + + ("clang_isVirtualBase", + [Cursor], + bool), + + ("clang_isVolatileQualifiedType", + [Type], + bool), + + ("clang_parseTranslationUnit", + [Index, c_char_p, c_void_p, c_int, c_void_p, c_int, c_int], + c_object_p), + + ("clang_reparseTranslationUnit", + [TranslationUnit, c_int, c_void_p, c_int], + c_int), + + ("clang_saveTranslationUnit", + [TranslationUnit, c_char_p, c_uint], + c_int), + + ("clang_tokenize", + [TranslationUnit, SourceRange, POINTER(POINTER(Token)), POINTER(c_uint)]), + + ("clang_visitChildren", + [Cursor, callbacks['cursor_visit'], py_object], + c_uint), + + ("clang_Cursor_getNumArguments", + [Cursor], + c_int), + + ("clang_Cursor_getArgument", + [Cursor, c_uint], + Cursor, + Cursor.from_result), + + ("clang_Cursor_getNumTemplateArguments", + [Cursor], + c_int), + + ("clang_Cursor_getTemplateArgumentKind", + [Cursor, c_uint], + TemplateArgumentKind.from_id), + + ("clang_Cursor_getTemplateArgumentType", + [Cursor, c_uint], + Type, + Type.from_result), + + ("clang_Cursor_getTemplateArgumentValue", + [Cursor, c_uint], + c_longlong), + + ("clang_Cursor_getTemplateArgumentUnsignedValue", + [Cursor, c_uint], + c_ulonglong), + + ("clang_Cursor_isAnonymous", + [Cursor], + bool), + + ("clang_Cursor_isBitField", + [Cursor], + bool), + + ("clang_Cursor_getBriefCommentText", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_Cursor_getRawCommentText", + [Cursor], + _CXString, + _CXString.from_result), + + ("clang_Cursor_getOffsetOfField", + [Cursor], + c_longlong), + + ("clang_Type_getAlignOf", + [Type], + c_longlong), + + ("clang_Type_getClassType", + [Type], + Type, + Type.from_result), + + ("clang_Type_getOffsetOf", + [Type, c_char_p], + c_longlong), + + ("clang_Type_getSizeOf", + [Type], + c_longlong), + + ("clang_Type_getCXXRefQualifier", + [Type], + c_uint), + + ("clang_Type_getNamedType", + [Type], + Type, + Type.from_result), + + ("clang_Type_visitFields", + [Type, callbacks['fields_visit'], py_object], + c_uint), +] + +class LibclangError(Exception): + def __init__(self, message): + self.m = message + + def __str__(self): + return self.m + +def register_function(lib, item, ignore_errors): + # A function may not exist, if these bindings are used with an older or + # incompatible version of libclang.so. + try: + func = getattr(lib, item[0]) + except AttributeError as e: + msg = str(e) + ". Please ensure that your python bindings are "\ + "compatible with your libclang.so version." + if ignore_errors: + return + raise LibclangError(msg) + + if len(item) >= 2: + func.argtypes = item[1] + + if len(item) >= 3: + func.restype = item[2] + + if len(item) == 4: + func.errcheck = item[3] + +def register_functions(lib, ignore_errors): + """Register function prototypes with a libclang library instance. + + This must be called as part of library instantiation so Python knows how + to call out to the shared library. + """ + + def register(item): + return register_function(lib, item, ignore_errors) + + for f in functionList: + register(f) + +class Config: + library_path = None + library_file = None + compatibility_check = False + loaded = False + + @staticmethod + def set_library_path(path): + """Set the path in which to search for libclang""" + if Config.loaded: + raise Exception("library path must be set before before using " \ + "any other functionalities in libclang.") + + Config.library_path = path + + @staticmethod + def set_library_file(filename): + """Set the exact location of libclang""" + if Config.loaded: + raise Exception("library file must be set before before using " \ + "any other functionalities in libclang.") + + Config.library_file = filename + + @staticmethod + def set_compatibility_check(check_status): + """ Perform compatibility check when loading libclang + + The python bindings are only tested and evaluated with the version of + libclang they are provided with. To ensure correct behavior a (limited) + compatibility check is performed when loading the bindings. This check + will throw an exception, as soon as it fails. + + In case these bindings are used with an older version of libclang, parts + that have been stable between releases may still work. Users of the + python bindings can disable the compatibility check. This will cause + the python bindings to load, even though they are written for a newer + version of libclang. Failures now arise if unsupported or incompatible + features are accessed. The user is required to test themselves if the + features they are using are available and compatible between different + libclang versions. + """ + if Config.loaded: + raise Exception("compatibility_check must be set before before " \ + "using any other functionalities in libclang.") + + Config.compatibility_check = check_status + + @CachedProperty + def lib(self): + lib = self.get_cindex_library() + register_functions(lib, not Config.compatibility_check) + Config.loaded = True + return lib + + def get_filename(self): + if Config.library_file: + return Config.library_file + + import platform + name = platform.system() + + if name == 'Darwin': + file = 'libclang.dylib' + elif name == 'Windows': + file = 'libclang.dll' + else: + file = 'libclang.so' + + if Config.library_path: + file = Config.library_path + '/' + file + + return file + + def get_cindex_library(self): + try: + library = cdll.LoadLibrary(self.get_filename()) + except OSError as e: + msg = str(e) + ". To provide a path to libclang use " \ + "Config.set_library_path() or " \ + "Config.set_library_file()." + raise LibclangError(msg) + + return library + + def function_exists(self, name): + try: + getattr(self.lib, name) + except AttributeError: + return False + + return True + +def register_enumerations(): + for name, value in clang.enumerations.TokenKinds: + TokenKind.register(value, name) + +conf = Config() +register_enumerations() + +__all__ = [ + 'Config', + 'CodeCompletionResults', + 'CompilationDatabase', + 'CompileCommands', + 'CompileCommand', + 'CursorKind', + 'Cursor', + 'Diagnostic', + 'File', + 'FixIt', + 'Index', + 'SourceLocation', + 'SourceRange', + 'TokenKind', + 'Token', + 'TranslationUnitLoadError', + 'TranslationUnit', + 'TypeKind', + 'Type', +] diff --git a/thirdparty/pybind11/tools/clang/enumerations.py b/thirdparty/pybind11/tools/clang/enumerations.py new file mode 100644 index 000000000..a86a48ade --- /dev/null +++ b/thirdparty/pybind11/tools/clang/enumerations.py @@ -0,0 +1,34 @@ +#===- enumerations.py - Python Enumerations ------------------*- python -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +""" +Clang Enumerations +================== + +This module provides static definitions of enumerations that exist in libclang. + +Enumerations are typically defined as a list of tuples. The exported values are +typically munged into other types or classes at module load time. + +All enumerations are centrally defined in this file so they are all grouped +together and easier to audit. And, maybe even one day this file will be +automatically generated by scanning the libclang headers! +""" + +# Maps to CXTokenKind. Note that libclang maintains a separate set of token +# enumerations from the C++ API. +TokenKinds = [ + ('PUNCTUATION', 0), + ('KEYWORD', 1), + ('IDENTIFIER', 2), + ('LITERAL', 3), + ('COMMENT', 4), +] + +__all__ = ['TokenKinds'] diff --git a/thirdparty/pybind11/tools/libsize.py b/thirdparty/pybind11/tools/libsize.py new file mode 100644 index 000000000..5dcb8b0d0 --- /dev/null +++ b/thirdparty/pybind11/tools/libsize.py @@ -0,0 +1,38 @@ +from __future__ import print_function, division +import os +import sys + +# Internal build script for generating debugging test .so size. +# Usage: +# python libsize.py file.so save.txt -- displays the size of file.so and, if save.txt exists, compares it to the +# size in it, then overwrites save.txt with the new size for future runs. + +if len(sys.argv) != 3: + sys.exit("Invalid arguments: usage: python libsize.py file.so save.txt") + +lib = sys.argv[1] +save = sys.argv[2] + +if not os.path.exists(lib): + sys.exit("Error: requested file ({}) does not exist".format(lib)) + +libsize = os.path.getsize(lib) + +print("------", os.path.basename(lib), "file size:", libsize, end='') + +if os.path.exists(save): + with open(save) as sf: + oldsize = int(sf.readline()) + + if oldsize > 0: + change = libsize - oldsize + if change == 0: + print(" (no change)") + else: + print(" (change of {:+} bytes = {:+.2%})".format(change, change / oldsize)) +else: + print() + +with open(save, 'w') as sf: + sf.write(str(libsize)) + diff --git a/thirdparty/pybind11/tools/mkdoc.py b/thirdparty/pybind11/tools/mkdoc.py new file mode 100644 index 000000000..44164af3d --- /dev/null +++ b/thirdparty/pybind11/tools/mkdoc.py @@ -0,0 +1,379 @@ +#!/usr/bin/env python3 +# +# Syntax: mkdoc.py [-I ..] [.. a list of header files ..] +# +# Extract documentation from C++ header files to use it in Python bindings +# + +import os +import sys +import platform +import re +import textwrap + +from clang import cindex +from clang.cindex import CursorKind +from collections import OrderedDict +from glob import glob +from threading import Thread, Semaphore +from multiprocessing import cpu_count + +RECURSE_LIST = [ + CursorKind.TRANSLATION_UNIT, + CursorKind.NAMESPACE, + CursorKind.CLASS_DECL, + CursorKind.STRUCT_DECL, + CursorKind.ENUM_DECL, + CursorKind.CLASS_TEMPLATE +] + +PRINT_LIST = [ + CursorKind.CLASS_DECL, + CursorKind.STRUCT_DECL, + CursorKind.ENUM_DECL, + CursorKind.ENUM_CONSTANT_DECL, + CursorKind.CLASS_TEMPLATE, + CursorKind.FUNCTION_DECL, + CursorKind.FUNCTION_TEMPLATE, + CursorKind.CONVERSION_FUNCTION, + CursorKind.CXX_METHOD, + CursorKind.CONSTRUCTOR, + CursorKind.FIELD_DECL +] + +PREFIX_BLACKLIST = [ + CursorKind.TRANSLATION_UNIT +] + +CPP_OPERATORS = { + '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', + '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': + 'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', + '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': + 'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', + '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': + 'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' +} + +CPP_OPERATORS = OrderedDict( + sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) + +job_count = cpu_count() +job_semaphore = Semaphore(job_count) + + +class NoFilenamesError(ValueError): + pass + + +def d(s): + return s if isinstance(s, str) else s.decode('utf8') + + +def sanitize_name(name): + name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) + for k, v in CPP_OPERATORS.items(): + name = name.replace('operator%s' % k, 'operator_%s' % v) + name = re.sub('<.*>', '', name) + name = ''.join([ch if ch.isalnum() else '_' for ch in name]) + name = re.sub('_$', '', re.sub('_+', '_', name)) + return '__doc_' + name + + +def process_comment(comment): + result = '' + + # Remove C++ comment syntax + leading_spaces = float('inf') + for s in comment.expandtabs(tabsize=4).splitlines(): + s = s.strip() + if s.startswith('/*'): + s = s[2:].lstrip('*') + elif s.endswith('*/'): + s = s[:-2].rstrip('*') + elif s.startswith('///'): + s = s[3:] + if s.startswith('*'): + s = s[1:] + if len(s) > 0: + leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) + result += s + '\n' + + if leading_spaces != float('inf'): + result2 = "" + for s in result.splitlines(): + result2 += s[leading_spaces:] + '\n' + result = result2 + + # Doxygen tags + cpp_group = '([\w:]+)' + param_group = '([\[\w:\]]+)' + + s = result + s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) + s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) + s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) + s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) + s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) + s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) + s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), + r'\n\n$Parameter ``\2``:\n\n', s) + s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), + r'\n\n$Template parameter ``\2``:\n\n', s) + + for in_, out_ in { + 'return': 'Returns', + 'author': 'Author', + 'authors': 'Authors', + 'copyright': 'Copyright', + 'date': 'Date', + 'remark': 'Remark', + 'sa': 'See also', + 'see': 'See also', + 'extends': 'Extends', + 'throw': 'Throws', + 'throws': 'Throws' + }.items(): + s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) + + s = re.sub(r'\\details\s*', r'\n\n', s) + s = re.sub(r'\\brief\s*', r'', s) + s = re.sub(r'\\short\s*', r'', s) + s = re.sub(r'\\ref\s*', r'', s) + + s = re.sub(r'\\code\s?(.*?)\s?\\endcode', + r"```\n\1\n```\n", s, flags=re.DOTALL) + + # HTML/TeX tags + s = re.sub(r'(.*?)', r'``\1``', s, flags=re.DOTALL) + s = re.sub(r'
(.*?)
', r"```\n\1\n```\n", s, flags=re.DOTALL) + s = re.sub(r'(.*?)', r'*\1*', s, flags=re.DOTALL) + s = re.sub(r'(.*?)', r'**\1**', s, flags=re.DOTALL) + s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) + s = re.sub(r'
  • ', r'\n\n* ', s) + s = re.sub(r'', r'', s) + s = re.sub(r'
  • ', r'\n\n', s) + + s = s.replace('``true``', '``True``') + s = s.replace('``false``', '``False``') + + # Re-flow text + wrapper = textwrap.TextWrapper() + wrapper.expand_tabs = True + wrapper.replace_whitespace = True + wrapper.drop_whitespace = True + wrapper.width = 70 + wrapper.initial_indent = wrapper.subsequent_indent = '' + + result = '' + in_code_segment = False + for x in re.split(r'(```)', s): + if x == '```': + if not in_code_segment: + result += '```\n' + else: + result += '\n```\n\n' + in_code_segment = not in_code_segment + elif in_code_segment: + result += x.strip() + else: + for y in re.split(r'(?: *\n *){2,}', x): + wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) + if len(wrapped) > 0 and wrapped[0] == '$': + result += wrapped[1:] + '\n' + wrapper.initial_indent = \ + wrapper.subsequent_indent = ' ' * 4 + else: + if len(wrapped) > 0: + result += wrapped + '\n\n' + wrapper.initial_indent = wrapper.subsequent_indent = '' + return result.rstrip().lstrip('\n') + + +def extract(filename, node, prefix, output): + if not (node.location.file is None or + os.path.samefile(d(node.location.file.name), filename)): + return 0 + if node.kind in RECURSE_LIST: + sub_prefix = prefix + if node.kind not in PREFIX_BLACKLIST: + if len(sub_prefix) > 0: + sub_prefix += '_' + sub_prefix += d(node.spelling) + for i in node.get_children(): + extract(filename, i, sub_prefix, output) + if node.kind in PRINT_LIST: + comment = d(node.raw_comment) if node.raw_comment is not None else '' + comment = process_comment(comment) + sub_prefix = prefix + if len(sub_prefix) > 0: + sub_prefix += '_' + if len(node.spelling) > 0: + name = sanitize_name(sub_prefix + d(node.spelling)) + output.append((name, filename, comment)) + + +class ExtractionThread(Thread): + def __init__(self, filename, parameters, output): + Thread.__init__(self) + self.filename = filename + self.parameters = parameters + self.output = output + job_semaphore.acquire() + + def run(self): + print('Processing "%s" ..' % self.filename, file=sys.stderr) + try: + index = cindex.Index( + cindex.conf.lib.clang_createIndex(False, True)) + tu = index.parse(self.filename, self.parameters) + extract(self.filename, tu.cursor, '', self.output) + finally: + job_semaphore.release() + + +def read_args(args): + parameters = [] + filenames = [] + if "-x" not in args: + parameters.extend(['-x', 'c++']) + if not any(it.startswith("-std=") for it in args): + parameters.append('-std=c++11') + + if platform.system() == 'Darwin': + dev_path = '/Applications/Xcode.app/Contents/Developer/' + lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' + sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' + libclang = lib_dir + 'libclang.dylib' + + if os.path.exists(libclang): + cindex.Config.set_library_path(os.path.dirname(libclang)) + + if os.path.exists(sdk_dir): + sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) + parameters.append('-isysroot') + parameters.append(sysroot_dir) + elif platform.system() == 'Linux': + # clang doesn't find its own base includes by default on Linux, + # but different distros install them in different paths. + # Try to autodetect, preferring the highest numbered version. + def clang_folder_version(d): + return [int(ver) for ver in re.findall(r'(?:${PYBIND11_CPP_STANDARD}>) + endif() + + get_property(_iid TARGET ${PN}::pybind11 PROPERTY INTERFACE_INCLUDE_DIRECTORIES) + get_property(_ill TARGET ${PN}::module PROPERTY INTERFACE_LINK_LIBRARIES) + set(${PN}_INCLUDE_DIRS ${_iid}) + set(${PN}_LIBRARIES ${_ico} ${_ill}) +endif() +endif() diff --git a/thirdparty/pybind11/tools/pybind11Tools.cmake b/thirdparty/pybind11/tools/pybind11Tools.cmake new file mode 100644 index 000000000..508e47429 --- /dev/null +++ b/thirdparty/pybind11/tools/pybind11Tools.cmake @@ -0,0 +1,227 @@ +# tools/pybind11Tools.cmake -- Build system for the pybind11 modules +# +# Copyright (c) 2015 Wenzel Jakob +# +# All rights reserved. Use of this source code is governed by a +# BSD-style license that can be found in the LICENSE file. + +cmake_minimum_required(VERSION 2.8.12) + +# Add a CMake parameter for choosing a desired Python version +if(NOT PYBIND11_PYTHON_VERSION) + set(PYBIND11_PYTHON_VERSION "" CACHE STRING "Python version to use for compiling modules") +endif() + +set(Python_ADDITIONAL_VERSIONS 3.9 3.8 3.7 3.6 3.5 3.4) +find_package(PythonLibsNew ${PYBIND11_PYTHON_VERSION} REQUIRED) + +include(CheckCXXCompilerFlag) +include(CMakeParseArguments) + +if(NOT PYBIND11_CPP_STANDARD AND NOT CMAKE_CXX_STANDARD) + if(NOT MSVC) + check_cxx_compiler_flag("-std=c++14" HAS_CPP14_FLAG) + + if (HAS_CPP14_FLAG) + set(PYBIND11_CPP_STANDARD -std=c++14) + else() + check_cxx_compiler_flag("-std=c++11" HAS_CPP11_FLAG) + if (HAS_CPP11_FLAG) + set(PYBIND11_CPP_STANDARD -std=c++11) + else() + message(FATAL_ERROR "Unsupported compiler -- pybind11 requires C++11 support!") + endif() + endif() + elseif(MSVC) + set(PYBIND11_CPP_STANDARD /std:c++14) + endif() + + set(PYBIND11_CPP_STANDARD ${PYBIND11_CPP_STANDARD} CACHE STRING + "C++ standard flag, e.g. -std=c++11, -std=c++14, /std:c++14. Defaults to C++14 mode." FORCE) +endif() + +# Checks whether the given CXX/linker flags can compile and link a cxx file. cxxflags and +# linkerflags are lists of flags to use. The result variable is a unique variable name for each set +# of flags: the compilation result will be cached base on the result variable. If the flags work, +# sets them in cxxflags_out/linkerflags_out internal cache variables (in addition to ${result}). +function(_pybind11_return_if_cxx_and_linker_flags_work result cxxflags linkerflags cxxflags_out linkerflags_out) + set(CMAKE_REQUIRED_LIBRARIES ${linkerflags}) + check_cxx_compiler_flag("${cxxflags}" ${result}) + if (${result}) + set(${cxxflags_out} "${cxxflags}" CACHE INTERNAL "" FORCE) + set(${linkerflags_out} "${linkerflags}" CACHE INTERNAL "" FORCE) + endif() +endfunction() + +# Internal: find the appropriate link time optimization flags for this compiler +function(_pybind11_add_lto_flags target_name prefer_thin_lto) + if (NOT DEFINED PYBIND11_LTO_CXX_FLAGS) + set(PYBIND11_LTO_CXX_FLAGS "" CACHE INTERNAL "") + set(PYBIND11_LTO_LINKER_FLAGS "" CACHE INTERNAL "") + + if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") + set(cxx_append "") + set(linker_append "") + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT APPLE) + # Clang Gold plugin does not support -Os; append -O3 to MinSizeRel builds to override it + set(linker_append ";$<$:-O3>") + elseif(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + set(cxx_append ";-fno-fat-lto-objects") + endif() + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND prefer_thin_lto) + _pybind11_return_if_cxx_and_linker_flags_work(HAS_FLTO_THIN + "-flto=thin${cxx_append}" "-flto=thin${linker_append}" + PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS) + endif() + + if (NOT HAS_FLTO_THIN) + _pybind11_return_if_cxx_and_linker_flags_work(HAS_FLTO + "-flto${cxx_append}" "-flto${linker_append}" + PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS) + endif() + elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel") + # Intel equivalent to LTO is called IPO + _pybind11_return_if_cxx_and_linker_flags_work(HAS_INTEL_IPO + "-ipo" "-ipo" PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS) + elseif(MSVC) + # cmake only interprets libraries as linker flags when they start with a - (otherwise it + # converts /LTCG to \LTCG as if it was a Windows path). Luckily MSVC supports passing flags + # with - instead of /, even if it is a bit non-standard: + _pybind11_return_if_cxx_and_linker_flags_work(HAS_MSVC_GL_LTCG + "/GL" "-LTCG" PYBIND11_LTO_CXX_FLAGS PYBIND11_LTO_LINKER_FLAGS) + endif() + + if (PYBIND11_LTO_CXX_FLAGS) + message(STATUS "LTO enabled") + else() + message(STATUS "LTO disabled (not supported by the compiler and/or linker)") + endif() + endif() + + # Enable LTO flags if found, except for Debug builds + if (PYBIND11_LTO_CXX_FLAGS) + target_compile_options(${target_name} PRIVATE "$<$>:${PYBIND11_LTO_CXX_FLAGS}>") + endif() + if (PYBIND11_LTO_LINKER_FLAGS) + target_link_libraries(${target_name} PRIVATE "$<$>:${PYBIND11_LTO_LINKER_FLAGS}>") + endif() +endfunction() + +# Build a Python extension module: +# pybind11_add_module( [MODULE | SHARED] [EXCLUDE_FROM_ALL] +# [NO_EXTRAS] [SYSTEM] [THIN_LTO] source1 [source2 ...]) +# +function(pybind11_add_module target_name) + set(options MODULE SHARED EXCLUDE_FROM_ALL NO_EXTRAS SYSTEM THIN_LTO) + cmake_parse_arguments(ARG "${options}" "" "" ${ARGN}) + + if(ARG_MODULE AND ARG_SHARED) + message(FATAL_ERROR "Can't be both MODULE and SHARED") + elseif(ARG_SHARED) + set(lib_type SHARED) + else() + set(lib_type MODULE) + endif() + + if(ARG_EXCLUDE_FROM_ALL) + set(exclude_from_all EXCLUDE_FROM_ALL) + endif() + + add_library(${target_name} ${lib_type} ${exclude_from_all} ${ARG_UNPARSED_ARGUMENTS}) + + if(ARG_SYSTEM) + set(inc_isystem SYSTEM) + endif() + + target_include_directories(${target_name} ${inc_isystem} + PRIVATE ${PYBIND11_INCLUDE_DIR} # from project CMakeLists.txt + PRIVATE ${pybind11_INCLUDE_DIR} # from pybind11Config + PRIVATE ${PYTHON_INCLUDE_DIRS}) + + # Python debug libraries expose slightly different objects + # https://docs.python.org/3.6/c-api/intro.html#debugging-builds + # https://stackoverflow.com/questions/39161202/how-to-work-around-missing-pymodule-create2-in-amd64-win-python35-d-lib + if(PYTHON_IS_DEBUG) + target_compile_definitions(${target_name} PRIVATE Py_DEBUG) + endif() + + # The prefix and extension are provided by FindPythonLibsNew.cmake + set_target_properties(${target_name} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}") + set_target_properties(${target_name} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}") + + # -fvisibility=hidden is required to allow multiple modules compiled against + # different pybind versions to work properly, and for some features (e.g. + # py::module_local). We force it on everything inside the `pybind11` + # namespace; also turning it on for a pybind module compilation here avoids + # potential warnings or issues from having mixed hidden/non-hidden types. + set_target_properties(${target_name} PROPERTIES CXX_VISIBILITY_PRESET "hidden") + set_target_properties(${target_name} PROPERTIES CUDA_VISIBILITY_PRESET "hidden") + + if(WIN32 OR CYGWIN) + # Link against the Python shared library on Windows + target_link_libraries(${target_name} PRIVATE ${PYTHON_LIBRARIES}) + elseif(APPLE) + # It's quite common to have multiple copies of the same Python version + # installed on one's system. E.g.: one copy from the OS and another copy + # that's statically linked into an application like Blender or Maya. + # If we link our plugin library against the OS Python here and import it + # into Blender or Maya later on, this will cause segfaults when multiple + # conflicting Python instances are active at the same time (even when they + # are of the same version). + + # Windows is not affected by this issue since it handles DLL imports + # differently. The solution for Linux and Mac OS is simple: we just don't + # link against the Python library. The resulting shared library will have + # missing symbols, but that's perfectly fine -- they will be resolved at + # import time. + + target_link_libraries(${target_name} PRIVATE "-undefined dynamic_lookup") + + if(ARG_SHARED) + # Suppress CMake >= 3.0 warning for shared libraries + set_target_properties(${target_name} PROPERTIES MACOSX_RPATH ON) + endif() + endif() + + # Make sure C++11/14 are enabled + if(CMAKE_VERSION VERSION_LESS 3.3) + target_compile_options(${target_name} PUBLIC ${PYBIND11_CPP_STANDARD}) + else() + target_compile_options(${target_name} PUBLIC $<$:${PYBIND11_CPP_STANDARD}>) + endif() + + if(ARG_NO_EXTRAS) + return() + endif() + + _pybind11_add_lto_flags(${target_name} ${ARG_THIN_LTO}) + + if (NOT MSVC AND NOT ${CMAKE_BUILD_TYPE} MATCHES Debug|RelWithDebInfo) + # Strip unnecessary sections of the binary on Linux/Mac OS + if(CMAKE_STRIP) + if(APPLE) + add_custom_command(TARGET ${target_name} POST_BUILD + COMMAND ${CMAKE_STRIP} -x $) + else() + add_custom_command(TARGET ${target_name} POST_BUILD + COMMAND ${CMAKE_STRIP} $) + endif() + endif() + endif() + + if(MSVC) + # /MP enables multithreaded builds (relevant when there are many files), /bigobj is + # needed for bigger binding projects due to the limit to 64k addressable sections + target_compile_options(${target_name} PRIVATE /bigobj) + if(CMAKE_VERSION VERSION_LESS 3.11) + target_compile_options(${target_name} PRIVATE $<$>:/MP>) + else() + # Only set these options for C++ files. This is important so that, for + # instance, projects that include other types of source files like CUDA + # .cu files don't get these options propagated to nvcc since that would + # cause the build to fail. + target_compile_options(${target_name} PRIVATE $<$>:$<$:/MP>>) + endif() + endif() +endfunction() diff --git a/thirdparty/ruclip/CMakeLists.txt b/thirdparty/ruclip/CMakeLists.txt new file mode 100644 index 000000000..74f991368 --- /dev/null +++ b/thirdparty/ruclip/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) +project(ruclip) + +find_package(Torch REQUIRED) + +include_directories(${CMAKE_SOURCE_DIR}/youtokentome) +include_directories(${CMAKE_SOURCE_DIR}/youtokentome/third_party) + +set(RUCLIP_SOURCE_FILES RuCLIP.cpp + RuCLIPProcessor.cpp + youtokentome/utf8.cpp + youtokentome/utils.cpp + youtokentome/bpe.cpp + ClipAPI.cpp) +set(RUCLIP_HEADER_FILES TorchHeader.h + RuCLIP.h + json.hpp + json_fwd.hpp + youtokentome/utf8.h + youtokentome/utils.h + youtokentome/bpe.h + RuCLIPProcessor.h + ClipAPI.h) + +add_library(${PROJECT_NAME} SHARED ${RUCLIP_SOURCE_FILES} ${RUCLIP_HEADER_FILES}) + +if (MSVC) + get_filename_component(full_path_nvtools_lib "C:/Program Files/NVIDIA Corporation/NvToolsExt/lib/x64/nvToolsExt64_1.lib" ABSOLUTE) + message("${full_path_nvtools_lib}") + list (REMOVE_ITEM TORCH_LIBRARIES "${full_path_nvtools_lib}") +endif(MSVC) +message("Torch libs: ${TORCH_LIBRARIES}") + +set(RUCLIP_LIBS + ${OpenCV_LIBS} + ${TORCH_LIBRARIES} +) + +target_link_libraries(${PROJECT_NAME} ${RUCLIP_LIBS}) + +install(TARGETS ${PROJECT_NAME} + EXPORT MTTrackingExports + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + PUBLIC_HEADER DESTINATION include/${PROJECT_NAME}) + +set_target_properties(${PROJECT_NAME} PROPERTIES FOLDER "libs") diff --git a/thirdparty/ruclip/ClipAPI.cpp b/thirdparty/ruclip/ClipAPI.cpp new file mode 100644 index 000000000..663230ae4 --- /dev/null +++ b/thirdparty/ruclip/ClipAPI.cpp @@ -0,0 +1,158 @@ +#include "ClipAPI.h" + +#include "TorchHeader.h" +#include "RuCLIP.h" +#include "RuCLIPProcessor.h" + +#include "../../src/mtracking/defines.h" + +/// +class ClassificationCLIP::ClassificationCLIPImpl +{ +public: + ClassificationCLIPImpl() = default; + ~ClassificationCLIPImpl() = default; + + /// + bool Init(const std::string& pathToClip, const std::string& pathToBPE, int inputImgSize, int indGPU, const std::vector& labels) + { + bool res = true; + + m_pathToClip = pathToClip; + m_indGPU = indGPU; + m_labels = labels; + + //torch::manual_seed(24); + + std::cout << "Set Torch device (" << m_indGPU << "): " << ((m_indGPU < 0) ? "CPU" : "GPU") << std::endl; + if (m_indGPU >= 0 && torch::cuda::is_available()) + { + std::cout << "CUDA is available! Running on GPU." << std::endl; + m_device = torch::Device(torch::kCUDA, m_indGPU); + } + else + { + std::cout << "CUDA is not available! Running on CPU." << std::endl; + } + + std::cout << "Load clip from: " << pathToClip << std::endl; + m_clip = FromPretrained(pathToClip); + m_clip->to(m_device); + + std::cout << "Load processor from: " << pathToBPE << std::endl; + m_processor = RuCLIPProcessor::FromPretrained(m_pathToClip); + + m_processor.CacheText(m_labels); + + return res; + } + + /// + bool ProcessFrame(const cv::Mat& frame, const std::vector& rois, std::vector& result) + { + bool res = false; + + if (rois.empty()) + return res; + + result.resize(rois.size()); + + std::map img2roi; + + std::cout << "Resizing..." << std::endl; + std::vector images; + images.reserve(rois.size()); + for (size_t i = 0; i < rois.size(); ++i) + { + cv::Rect r = Clamp(rois[i], frame.size()); + if (r.width > m_processor.GetImageSize() / 10 && r.height > m_processor.GetImageSize() / 10) + { + img2roi[images.size()] = i; + images.emplace_back(cv::Mat(frame, r)); + } + } + if (images.empty()) + { + std::cout << "CLIP::ProcessFrame: empty images" << std::endl; + return res; + } + + std::cout << "Running on " << images.size() << "..." << std::endl; + auto dummy_input = m_processor.operator()(images); + try + { + torch::Tensor logits_per_image = m_clip->forward(dummy_input.first.to(m_device), dummy_input.second.to(m_device)); + torch::Tensor logits_per_text = logits_per_image.t(); + auto probs = logits_per_image.softmax(/*dim = */-1).detach().cpu(); + //std::cout << "probs per image: " << probs << std::endl; + + const float* tensorData = reinterpret_cast(probs.data_ptr()); + for (size_t imgInd = 0; imgInd < images.size(); ++imgInd) + { + float bestConf = 0.; + size_t bestInd = 0; + for (size_t labelInd = 0; labelInd < m_labels.size(); ++labelInd) + { + if (bestConf < tensorData[labelInd]) + { + bestConf = tensorData[labelInd]; + bestInd = labelInd; + } + } + result[img2roi[imgInd]] = CLIPResult(m_labels[bestInd], bestConf); + std::cout << "Object: " << m_labels[bestInd] << " - " << bestConf << std::endl; + tensorData += m_labels.size(); + } + res = true; + } + catch (std::exception& e) + { + res = false; + std::cout << "ClassificationCLIP::ProcessFrame: " << e.what() << std::endl; + } + + return res; + } + + +private: + std::string m_pathToClip = ""; + int m_indGPU = -1; // -1 - use CPU + + torch::Device m_device{ torch::kCPU }; + CLIP m_clip = nullptr; + RuCLIPProcessor m_processor; + + std::vector m_labels{ "human", "pedestrian", "car", "vehicle", "truck", "bus" }; +}; + +/// +ClassificationCLIP::ClassificationCLIP() noexcept +{ +} + +/// +ClassificationCLIP::~ClassificationCLIP() +{ + if (m_pImpl) + delete m_pImpl; +} + +/// +bool ClassificationCLIP::Init(const std::string& pathToClip, const std::string& pathToBPE, int inputImgSize, int indGPU, const std::vector& labels) +{ + if (m_pImpl) + delete m_pImpl; + + m_pImpl = new ClassificationCLIPImpl(); + + bool res = m_pImpl->Init(pathToClip, pathToBPE, inputImgSize, indGPU, labels); + assert(res); + return res; +} + +/// +bool ClassificationCLIP::ProcessFrame(const cv::Mat& frame, const std::vector& rois, std::vector& result) +{ + return m_pImpl->ProcessFrame(frame, rois, result); +} diff --git a/thirdparty/ruclip/ClipAPI.h b/thirdparty/ruclip/ClipAPI.h new file mode 100644 index 000000000..80168a2ed --- /dev/null +++ b/thirdparty/ruclip/ClipAPI.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +#if defined(_MSC_VER) +#define LIB_API __declspec(dllexport) +#else +#define LIB_API __attribute__((visibility("default"))) +#endif + +/// +struct CLIPResult +{ + CLIPResult() = default; + CLIPResult(const std::string& label, float conf) noexcept + : m_label(label), m_conf(conf) + { + } + + std::string m_label; + float m_conf = 0.f; +}; + +/// +class LIB_API ClassificationCLIP +{ +public: + ClassificationCLIP() noexcept; + ~ClassificationCLIP() noexcept; + + bool Init(const std::string& pathToClip, const std::string& pathToBPE, int inputImgSize, int indGPU, const std::vector& labels); + + bool ProcessFrame(const cv::Mat& frame, const std::vector& rois, std::vector& result); + + class ClassificationCLIPImpl; + +private: + ClassificationCLIPImpl* m_pImpl = nullptr; +}; diff --git a/thirdparty/ruclip/RuCLIP.cpp b/thirdparty/ruclip/RuCLIP.cpp new file mode 100644 index 000000000..c64b840b1 --- /dev/null +++ b/thirdparty/ruclip/RuCLIP.cpp @@ -0,0 +1,227 @@ +#include "RuCLIP.h" + +ResidualAttentionBlockImpl :: ResidualAttentionBlockImpl(const std::string &module_name, const int d_model, const int n_head, const torch::Tensor &attn_mask) + : torch::nn::Module(module_name) +{ + Attn = torch::nn::MultiheadAttention(d_model, n_head); + Ln1 = RCLayerNorm(std::vector() = { (int64_t)d_model }); + Mlp = torch::nn::Sequential({ + {"c_fc", torch::nn::Linear(d_model, d_model * 4)}, + {"gelu", QuickGELU()}, + {"c_proj", torch::nn::Linear(d_model * 4, d_model)} + }); + Ln2 = RCLayerNorm(std::vector() = { (int64_t)d_model }); + AttnMask = attn_mask; + + register_module("attn", Attn); + register_module("ln_1", Ln1); + register_module("mlp", Mlp); + register_module("ln_2", Ln2); + //register_buffer("attn_mask", AttnMask); +} + +torch::Tensor ResidualAttentionBlockImpl :: Attention(const torch::Tensor &x) +{ + if (AttnMask.defined() && (AttnMask.numel() != 0)) + AttnMask = AttnMask.to(x.dtype()).to(x.device()); + /*return Attn(x, x, x, weights = False, attn_mask = self.attn_mask)[0];*/ + //std::tuple forward(const Tensor & query, const Tensor & key, const Tensor & value, const Tensor & key_padding_mask = {}, bool need_weights = true, const Tensor & attn_mask = {}, bool average_attn_weights = true) + return std::get<0>(Attn->forward(x, x, x, {}, false, AttnMask)); +} + +torch::Tensor ResidualAttentionBlockImpl :: forward(const torch::Tensor &x) +{ + auto result = x + Attention(Ln1(x)); + result = result + Mlp->forward(Ln2(result)); + return result; +} + + + +TransformerImpl :: TransformerImpl(const std::string &module_name, const int width, const int layers, const int heads, const torch::Tensor &attn_mask /*= torch::Tensor()*/) + : torch::nn::Module(module_name), Width(width), Layers(layers), Heads(heads)/*, AttnMask(attn_mask)*/ +{ + for (int i = 0; i < layers; i++) + Resblocks->push_back(ResidualAttentionBlock(module_name + "_" + std::to_string(i), width, heads, attn_mask)); + + register_module("resblocks", Resblocks); //??? + //for (int i = 0; i < Resblocks->size(); i++) + // register_module(module_name + "_res_attn_block_" + std::to_string(i), Resblocks[i]); +} + +torch::Tensor TransformerImpl :: forward(const torch::Tensor& x) +{ + //!!!Сделать проверку и преобразование if (x.type() != ) + return Resblocks->forward(x); +} + +void TransformerImpl :: InitializeParameters() +{ + float proj_std = powf((float)Width, -0.5f) * powf(2.f * Layers, -0.5f); + float attn_std = powf((float)Width, -0.5f); + float fc_std = powf(2.f * Width, -0.5f); + + for (int i = 0; i < Resblocks->size(); i++) + { + auto block = Resblocks[i]->as(); + torch::nn::init::normal_(block->GetAttn()->in_proj_weight, 0., attn_std); + torch::nn::init::normal_(block->GetAttn()->out_proj->weight, 0., proj_std); + auto mlp = block->GetMlp(); + for (int j = 0; j < mlp->size(); j++) + { + if (mlp[j]->name() == "c_fc") + torch::nn::init::normal_(mlp[j]->as()->weight, 0., fc_std); + if (mlp[j]->name() == "c_proj") + torch::nn::init::normal_(mlp[j]->as()->weight, 0., proj_std); + } + } +} + + + +VisionTransformerImpl :: VisionTransformerImpl( + const std::string &module_name, + const int input_resolution, + const int patch_size, + const int width, + const int layers, + const int heads, + const int output_dim +) : torch::nn::Module(module_name), InputResolution(input_resolution), OutputDim(output_dim) +{ + Conv1 = torch::nn::Conv2d(torch::nn::Conv2dOptions(3, width, patch_size).stride(patch_size).bias(false)); + float scale = powf((float)width, -0.5); + ClassEmbedding = scale * torch::randn(width); + PositionalEmbedding = scale * torch::randn({ (int)pow(input_resolution / patch_size/*деление нацело*/, 2) + 1, width }); + LnPre = RCLayerNorm(std::vector() = { (int64_t)width }); + VTTransformer = Transformer("visual", width, layers, heads); + LnPost = RCLayerNorm(std::vector() = { (int64_t)width }); + Proj = scale * torch::randn({ width, output_dim }); + + register_buffer("class_embedding", ClassEmbedding); + register_buffer("positional_embedding", PositionalEmbedding); + register_buffer("proj", Proj); + register_module("conv1", Conv1); + register_module("ln_pre", LnPre); + register_module("ln_post", LnPost); + register_module("transformer", VTTransformer); +} + +torch::Tensor VisionTransformerImpl :: forward(const torch::Tensor &x) +{ + //!!!Сделать проверку и преобразование if (x.type() != ) + auto res = Conv1(x); //shape = [*, width, grid, grid] + res = res.reshape({ res.sizes()[0], res.sizes()[1], -1 }); //shape = [*, width, grid **2] + res = res.permute({ 0, 2, 1 }); //shape = [*, grid **2, width] + res = torch::cat({ + ClassEmbedding.to(res.dtype()) + torch::zeros({res.sizes()[0], 1, res.sizes().back()}, res.dtype()).to(x.device()), + res + }, 1); //shape = [*, grid **2 + 1, width] + res = res + PositionalEmbedding.to(res.dtype()); + res = LnPre(res); + res = res.permute({ 1, 0, 2 }); // NLD->LND + res = VTTransformer(res); + res = res.permute({ 1, 0, 2 }); // LND->NLD + res = LnPost(res.index({ torch::indexing::Slice(), 0, torch::indexing::Slice() })); + if (Proj.defined() && Proj.numel() != 0) + res = torch::mm(res, Proj); + return res; +} + + + +CLIPImpl :: CLIPImpl( + const std::string &module_name, + const int embed_dim, + const int image_resolution, + const int vision_layers, + const int vision_width, + const int vision_patch_size, + const int context_length, + const int vocab_size, + const int transformer_width, + const int transformer_heads, + const int transformer_layers, + const int eos_id /*= 3*/ +) : torch::nn::Module(module_name), EosId(eos_id), ContextLength(context_length), VocabSize(vocab_size), TransformerWidth(transformer_width), TransformerLayers(transformer_layers) +{ + int vision_heads = vision_width / 64; + Visual = VisionTransformer("visual", image_resolution, vision_patch_size, vision_width, vision_layers, vision_heads, embed_dim); + NVTransformer = Transformer("transformer", transformer_width, transformer_layers, transformer_heads, BuildAttentionMask()); + + TokenEmbedding = torch::nn::Embedding(vocab_size, transformer_width); + PositionalEmbedding = torch::empty({ context_length, transformer_width }); //!!!type, device + + std::cout << "transformer_width: " << transformer_width<< std::endl; + + LnFinal = RCLayerNorm(std::vector() = { (int64_t)transformer_width }); + TextProjection = torch::empty({ transformer_width, embed_dim }); //!!!type, device + LogitScale = torch::ones({}) * logf(1.f / 0.07f); + + register_module("visual", Visual); + register_module("transformer", NVTransformer); + register_module("token_embedding", TokenEmbedding); + register_module("ln_final", LnFinal); + register_buffer("positional_embedding", PositionalEmbedding); + register_buffer("text_projection", TextProjection); + register_buffer("logit_scale", LogitScale); + + InitializeParameters(); +} + +void CLIPImpl :: InitializeParameters() +{ + torch::nn::init::normal_(TokenEmbedding->weight, 0., 0.02); + torch::nn::init::normal_(PositionalEmbedding, 0., 0.01); + NVTransformer->InitializeParameters(); + if (TextProjection.defined() && TextProjection.numel() != 0) + torch::nn::init::normal_(TextProjection, 0., pow(TransformerWidth, -0.5)); +} + +torch::Tensor CLIPImpl :: BuildAttentionMask() +{ + auto mask = torch::empty({ ContextLength, ContextLength }); + mask.fill_(-std::numeric_limits::infinity()); + mask.triu_(1); + return mask; +} + +///pixel_values : torch::Tensor Processed images from RuCLIPProcessor class, out: image_latents : torch::Tensor Image embeddings +torch::Tensor CLIPImpl :: EncodeImage(torch::Tensor pixel_values) +{ + return Visual(pixel_values); +} + +///input_ids : torch::Tensor Tokenized texts from RuCLIPProcessor class, out: text_latents : torch::Tensor Text embeddings +torch::Tensor CLIPImpl :: EncodeText(torch::Tensor input_ids) +{ + auto x = TokenEmbedding(input_ids); //.type(dtype()) // [batch_size, n_ctx, d_model] + x = x + PositionalEmbedding; //.type(dtype()) + x = x.permute({ 1, 0, 2 }); //NLD->LND + x = NVTransformer(x); + x = x.permute({ 1, 0, 2 }); //LND->NLD + x = LnFinal(x); // type(self.dtype) //x.shape = [batch_size, n_ctx, transformer.width] + x = torch::mm(x.index({ torch::arange(x.sizes()[0]), torch::where(input_ids == EosId)[1] }), TextProjection); + return x; +} + +torch::Tensor CLIPImpl :: forward(torch::Tensor input_ids, torch::Tensor pixel_values) +{ + //std::cout << "pixel_values: " << pixel_values.sizes() << ", input_ids: " << input_ids.sizes() << std::endl; + + auto image_features = EncodeImage(pixel_values); + auto text_features = EncodeText(input_ids); + + //std::cout << "image_features: " << image_features.sizes() << ", text_features: " << text_features.sizes() << std::endl; + + //normalize features + image_features = image_features / image_features.norm(2/*L2*/, -1, true); + text_features = text_features / text_features.norm(2/*L2*/, -1, true); + + //cosine similarity as logits + auto scale = LogitScale.exp(); + auto logits_per_image = scale * torch::mm(image_features, text_features.t()); + auto logits_per_text = logits_per_image.t(); + + return logits_per_image; +} diff --git a/thirdparty/ruclip/RuCLIP.h b/thirdparty/ruclip/RuCLIP.h new file mode 100644 index 000000000..e0996183b --- /dev/null +++ b/thirdparty/ruclip/RuCLIP.h @@ -0,0 +1,194 @@ +#pragma once + +#include "json.hpp" +#include +#include + +#include "TorchHeader.h" + +///to handle fp16 +class RCLayerNormImpl : public torch::nn::LayerNormImpl { +protected: +public: + RCLayerNormImpl(std::vector normalized_shape) : LayerNormImpl(normalized_shape) {} + virtual ~RCLayerNormImpl() {} + + torch::Tensor forward(const torch::Tensor &x) ///!override + { + auto orig_type = x.dtype(); + auto result = torch::nn::LayerNormImpl::forward(x.to(torch::kFloat32)); + return result.to(orig_type); + } +}; +TORCH_MODULE(RCLayerNorm); + + +class QuickGELUImpl : public torch::nn::Module { +protected: +public: + QuickGELUImpl() : torch::nn::Module() {} + virtual ~QuickGELUImpl() {} + + torch::Tensor forward(const torch::Tensor &x) + { + return x * torch::sigmoid(1.702f * x); + } +}; +TORCH_MODULE(QuickGELU); + + +class ResidualAttentionBlockImpl : public torch::nn::Module { +protected: + torch::nn::MultiheadAttention Attn{ nullptr }; + RCLayerNorm Ln1{ nullptr }; + torch::nn::Sequential Mlp{ nullptr }; + RCLayerNorm Ln2{ nullptr }; + torch::Tensor AttnMask; +public: + ResidualAttentionBlockImpl(const std::string& module_name, const int d_model, const int n_head, const torch::Tensor& attn_mask); + virtual ~ResidualAttentionBlockImpl() {} + torch::Tensor Attention(const torch::Tensor &x); + torch::Tensor forward(const torch::Tensor &x); + torch::nn::MultiheadAttention GetAttn() { return Attn; } + torch::nn::Sequential GetMlp() { return Mlp; } +}; +TORCH_MODULE(ResidualAttentionBlock); + + +class TransformerImpl : public torch::nn::Module { +protected: + int Width, + Layers, + Heads; + //torch::Tensor AttnMask; + torch::nn::Sequential Resblocks; +public: + TransformerImpl(const std::string &module_name, const int width, const int layers, const int heads, const torch::Tensor &attn_mask = torch::Tensor()); + virtual ~TransformerImpl() {} + torch::Tensor forward(const torch::Tensor &x); + void InitializeParameters(); +}; +TORCH_MODULE(Transformer); + + +class VisionTransformerImpl : public torch::nn::Module { +protected: + int InputResolution, + OutputDim; + torch::nn::Conv2d Conv1{ nullptr }; + torch::Tensor ClassEmbedding, + PositionalEmbedding, + Proj; + RCLayerNorm LnPre{ nullptr }, + LnPost{ nullptr }; + Transformer VTTransformer{ nullptr }; +public: + VisionTransformerImpl( + const std::string& module_name, + const int input_resolution, + const int patch_size, + const int width, + const int layers, + const int heads, + const int output_dim + ); + virtual ~VisionTransformerImpl() {} + torch::Tensor forward(const torch::Tensor& x); +}; +TORCH_MODULE(VisionTransformer); + + +class CLIPImpl : public torch::nn::Module { +protected: + int EosId, + //EmbedDim, + //ImageResolution, + //VisionLayers, + //VisionWidth, + //VisionPatchSize, + ContextLength, + VocabSize, + TransformerWidth, + /*TransformerHeads,*/ + TransformerLayers; + VisionTransformer Visual{ nullptr }; + Transformer NVTransformer{ nullptr }; + torch::nn::Embedding TokenEmbedding{ nullptr }; + torch::Tensor PositionalEmbedding; + RCLayerNorm LnFinal{ nullptr }; + torch::Tensor TextProjection, + LogitScale; +public: + CLIPImpl( + const std::string &module_name, + const int embed_dim, + const int image_resolution, + const int vision_layers, + const int vision_width, + const int vision_patch_size, + const int context_length, + const int vocab_size, + const int transformer_width, + const int transformer_heads, + const int transformer_layers, + const int eos_id = 3 + ); + virtual ~CLIPImpl() {} + + void InitializeParameters(); + torch::Tensor BuildAttentionMask(); + + //auto dtype() + //{ + // return Visual.conv1.weight.dtype(); + //} + + ///pixel_values : torch::Tensor Processed images from RuCLIPProcessor class, out: image_latents : torch::Tensor Image embeddings + torch::Tensor EncodeImage(torch::Tensor pixel_values); + + ///input_ids : torch::Tensor Tokenized texts from RuCLIPProcessor class, out: text_latents : torch::Tensor Text embeddings + torch::Tensor EncodeText(torch::Tensor input_ids); + + torch::Tensor forward(torch::Tensor input_ids, torch::Tensor pixel_values); + + torch::Tensor GetLogitScale() { return LogitScale; } +}; +TORCH_MODULE(CLIP); + +inline CLIP FromPretrained(const std::filesystem::path &folder) +{ + using json = nlohmann::json; + std::filesystem::path path = folder / "config.json"; + std::cout << path << std::endl; + std::ifstream f(path); + json config = json::parse(f); + + // Создание модели + auto clip = CLIP("ruclip", + int(config["embed_dim"]), + int(config["image_resolution"]), + int(config["vision_layers"]), + int(config["vision_width"]), + int(config["vision_patch_size"]), + int(config["context_length"]), + int(config["vocab_size"]), + int(config["transformer_width"]), + int(config["transformer_heads"]), + int(config["transformer_layers"])); + + //for (auto &k : clip->named_parameters()) + // std::cout << k.key() << std::endl; + //std::cout << "Model params count: " << Trainable::ParamsCount(clip) << std::endl; + + // Загрузка состояния модели из файла + try + { + torch::load(clip, (folder / "jit_model.zip").string()); + } + catch (std::exception& e) + { + std::cout << e.what() << std::endl; + } + + return clip; +} diff --git a/thirdparty/ruclip/RuCLIPProcessor.cpp b/thirdparty/ruclip/RuCLIPProcessor.cpp new file mode 100644 index 000000000..1167df800 --- /dev/null +++ b/thirdparty/ruclip/RuCLIPProcessor.cpp @@ -0,0 +1,175 @@ +#include "RuCLIPProcessor.h" + +/// +inline torch::Tensor CVMatToTorchTensor(const cv::Mat img, const bool perm = true) +{ + auto tensor_image = torch::from_blob(img.data, { img.rows, img.cols, img.channels() }, at::kByte); + if (perm) + tensor_image = tensor_image.permute({ 2,0,1 }); + tensor_image.unsqueeze_(0); + tensor_image = tensor_image.toType(c10::kFloat).div(255); + return tensor_image; //tensor_image.clone(); +} + +/// +inline cv::Mat TorchTensorToCVMat(const torch::Tensor tensor_image, const bool perm = true) +{ + auto t = tensor_image.detach().squeeze().cpu(); + if (perm) + t = t.permute({ 1, 2, 0 }); + t = t.mul(255).clamp(0, 255).to(torch::kU8); + t = t.contiguous(); + cv::Mat result_img; + cv::Mat(static_cast(t.size(0)), static_cast(t.size(1)), CV_MAKETYPE(CV_8U, t.sizes().size() >= 3 ? static_cast(t.size(2)) : 1), t.data_ptr()).copyTo(result_img); + return result_img; +} + +/// +RuCLIPProcessor :: RuCLIPProcessor( + const std::string& tokenizer_path, + const int image_size /*= 224*/, + const int text_seq_length /*= 77*/, + const std::vector norm_mean /*= { 0.48145466, 0.4578275, 0.40821073 }*/, + const std::vector norm_std /*= { 0.26862954, 0.26130258, 0.27577711 }*/ +) : ImageSize(image_size), TextSeqLength(text_seq_length), NormMean(norm_mean), NormStd(norm_std) +{ + vkcom::Status status; + Tokenizer = std::make_unique(tokenizer_path, -1, &status); +} + +///!!!Локали-юникоды +torch::Tensor RuCLIPProcessor :: EncodeText(const/*std::vector<*/std::string &text) const +{ + std::vector> ret_ids; + vkcom::Status status; + ////for (auto &it : text) + //// it = lowercase(it); + //text = lowercase(text); + //output_type = vkcom::OutputType::ID, bos = false, eos = false, reverse = false, dropout_prob = 0.0 + std::vector texts{ text }; + status = Tokenizer->encode_as_ids(texts, &ret_ids); + if (status.code != 0) + throw std::runtime_error("RuCLIPProcessor::EncodeText error : " + status.message); + auto it = ret_ids[0]; + //for (auto &it : ret_ids) + //{ + if (it.size() > TextSeqLength - 2) + it.resize(TextSeqLength - 2); + it.insert(it.begin(), bos_id); //vector сдвинет при вставке + it.push_back(eos_id); + //} + return PrepareTokens(it); +} + +/// +cv::Mat RuCLIPProcessor::ResizeToInput(const cv::Mat& img, bool saveAspectRatio) const +{ + cv::Mat newImg(cv::Size(ImageSize, ImageSize), img.type(), cv::Scalar(0, 0, 0)); + + if (saveAspectRatio) + { + // resize the image with aspect ratio + float r = std::min(static_cast(ImageSize) / static_cast(img.rows), static_cast(ImageSize) / static_cast(img.cols)); + int newHeight = cvRound(img.rows * r); + int newWidth = cvRound(img.cols * r); + + // Additional checks for images with non even dims + if ((ImageSize - newWidth) % 2) + newWidth--; + if ((ImageSize - newHeight) % 2) + newHeight--; + assert((ImageSize - newWidth) % 2 == 0); + assert((ImageSize - newHeight) % 2 == 0); + + int xOffset = (ImageSize - newWidth) / 2; + int yOffset = (ImageSize - newHeight) / 2; + + assert(2 * xOffset + newWidth == ImageSize); + assert(2 * yOffset + newHeight == ImageSize); + + cv::resize(img, newImg(cv::Rect(xOffset, yOffset, newWidth, newHeight)), cv::Size(newWidth, newHeight), 0, 0, cv::INTER_CUBIC); + } + else + { + cv::resize(img, newImg, newImg.size(), 0, 0, cv::INTER_CUBIC); + } + return newImg; +} + +/// +torch::Tensor RuCLIPProcessor::EncodeImage(const cv::Mat& img) const +{ + torch::Tensor img_tensor = CVMatToTorchTensor(ResizeToInput(img), true); + img_tensor = torch::data::transforms::Normalize<>(NormMean, NormStd)(img_tensor); + return img_tensor; +} + +/// +torch::Tensor RuCLIPProcessor::PrepareTokens(/*std::vector<*/std::vector tokens) const //Передаю по значению чтобы внутри иметь дело с копией +{ + torch::Tensor result; + if (tokens.size() > TextSeqLength) + { + int32_t back = tokens.back(); + tokens.resize(TextSeqLength); + tokens.back() = back; + } + int empty_positions = TextSeqLength - static_cast(tokens.size()); + if (empty_positions > 0) + result = torch::cat({ torch::tensor(tokens, torch::kLong), torch::zeros(empty_positions, torch::kLong) }); //position tokens after text + return result; +} + +/// +void RuCLIPProcessor::CacheText(const std::vector & texts) +{ + m_textsTensors.clear(); + for (auto& it : texts) + { + std::string s = it; + torch::Tensor text_tensor = EncodeText(s); + m_textsTensors.push_back(text_tensor); + } +} + +/// +const std::vector& RuCLIPProcessor::GetTextTensors() const +{ + return m_textsTensors; +} + +/// +std::pair RuCLIPProcessor::operator()(const std::vector &texts, const std::vector &images) const +{ + std::vector texts_tensors; + for (auto& it : texts) + { + std::string s = it; + torch::Tensor text_tensor = EncodeText(s); + texts_tensors.push_back(text_tensor); + } + + std::vector images_tensors; + for (auto &it : images) + { + torch::Tensor img_tensor = CVMatToTorchTensor(ResizeToInput(it), true); + img_tensor = torch::data::transforms::Normalize<>(NormMean, NormStd)(img_tensor); + //img_tensor.clone(); + images_tensors.push_back(img_tensor); + } + return std::make_pair(!texts_tensors.empty()?/*torch::pad_sequence*/torch::stack(texts_tensors):torch::Tensor(), torch::pad_sequence(images_tensors).squeeze(0)); +} + +/// +std::pair RuCLIPProcessor::operator()(const std::vector & images) const +{ + std::vector images_tensors; + for (auto& it : images) + { + torch::Tensor img_tensor = CVMatToTorchTensor(ResizeToInput(it), true); + img_tensor = torch::data::transforms::Normalize<>(NormMean, NormStd)(img_tensor); + //img_tensor.clone(); + images_tensors.push_back(img_tensor); + } + return std::make_pair(torch::stack(m_textsTensors), torch::pad_sequence(images_tensors).squeeze(0)); +} diff --git a/thirdparty/ruclip/RuCLIPProcessor.h b/thirdparty/ruclip/RuCLIPProcessor.h new file mode 100644 index 000000000..780231166 --- /dev/null +++ b/thirdparty/ruclip/RuCLIPProcessor.h @@ -0,0 +1,149 @@ +#pragma once + +#include "json.hpp" +#include "youtokentome/bpe.h" +#include "TorchHeader.h" + +#include +#include +#include + +/// +class RuCLIPProcessor +{ +public: + RuCLIPProcessor() = default; + + RuCLIPProcessor(const std::string& tokenizer_path, + const int image_size = 224, + const int text_seq_length = 77, + const std::vector norm_mean = { 0.48145466, 0.4578275, 0.40821073 }, + const std::vector norm_std = { 0.26862954, 0.26130258, 0.27577711 }); + ~RuCLIPProcessor() = default; + + /// + RuCLIPProcessor& operator=(RuCLIPProcessor&& processor) noexcept + { + eos_id = processor.eos_id; + bos_id = processor.bos_id; + unk_id = processor.unk_id; + pad_id = processor.pad_id; + ImageSize = processor.ImageSize; + TextSeqLength = processor.TextSeqLength; + NormMean = processor.NormMean; + NormStd = processor.NormStd; + + Tokenizer = std::move(processor.Tokenizer); + + m_textsTensors = processor.m_textsTensors; + + return *this; + } + + ///!!!Локали-юникоды + torch::Tensor EncodeText(const /*std::vector<*/std::string &text) const; + torch::Tensor PrepareTokens(/*std::vector<*/std::vector tokens) const; //Передаю по значению чтобы внутри иметь дело с копией + torch::Tensor EncodeImage(const cv::Mat& img) const; + std::pair operator()(const std::vector & texts, const std::vector & images) const; + std::pair operator()(const std::vector & images) const; + + void CacheText(const std::vector & texts); + + /// + int GetImageSize() const noexcept + { + return ImageSize; + } + + /// + static RuCLIPProcessor FromPretrained(const std::filesystem::path &folder) + { + std::filesystem::path tokenizer_path = folder / "bpe.model"; + using json = nlohmann::json; + std::cout << tokenizer_path << std::endl; + std::ifstream f(folder / "config.json"); + json config = json::parse(f); + + auto mean = config["mean"].template get>(); + auto std = config["std"].template get>(); + + return RuCLIPProcessor(tokenizer_path.string(), + int(config["image_resolution"]), + int(config["context_length"]), + mean, std); + } + + const std::vector& GetTextTensors() const; + +private: + uint32_t eos_id = 3; + uint32_t bos_id = 2; + uint32_t unk_id = 1; + uint32_t pad_id = 0; + int ImageSize{ 224 }; + int TextSeqLength{ 77 }; + std::vector NormMean; + std::vector NormStd; + std::unique_ptr Tokenizer; + + std::vector m_textsTensors; + + /// + cv::Mat ResizeToInput(const cv::Mat& img, bool saveAspectRatio = true) const; +}; + +///relevancy for batch size == 1 at this moment, float lv = result.index({0,0}).item(); +/// +///std::vector canon_texts_tensors; +///canon_texts_tensors.push_back(ClipProcessor->EncodeText(std::string("объект"))); +///canon_texts_tensors.push_back(ClipProcessor->EncodeText(std::string("вещи"))); +///canon_texts_tensors.push_back(ClipProcessor->EncodeText(std::string("текстура"))); +///int negatives_len = (int)canon_texts_tensors.size(); +///auto canon_features = Clip->EncodeText(torch::stack(canon_texts_tensors).to(Device)).to(torch::kCPU); ///[3, 768] +///canon_features = canon_features / canon_features.norm(2/*L2*/, -1, true); +///auto input = ClipProcessor->EncodeText(std::string("малый барабан")); +///auto text_features = Clip->EncodeText(input.unsqueeze(0).to(Device)).to(torch::kCPU); ///[1, 768] +///text_features = text_features / text_features.norm(2/*L2*/, -1, true); +///torch::Tensor image_features = PyramidClipEmbedding.GetPixelValue(i,j,0.5f,img_id,pyramid_embedder_properties,cv::Size(data.W, data.H)).to(torch::kCPU); +///image_features = image_features / image_features.norm(2/*L2*/, -1, true); +///torch::Tensor rel = Relevancy(image_features, text_features, canon_features); +///float lv = rel.index({0,0}).item(); + +inline torch::Tensor Relevancy(torch::Tensor embeds, torch::Tensor positives, torch::Tensor negatives) +{ +#if 0 + std::cout << "Relevancy: 0" << std::endl; + auto embeds2 = torch::cat({ positives, negatives }); + std::cout << "Relevancy: 1" << std::endl; + auto logits = /*scale * */torch::mm(embeds, embeds2.t()); //[batch_size x phrases] + std::cout << "Relevancy: 2" << std::endl; + auto positive_vals = logits.index({ "...", torch::indexing::Slice(0, positives.sizes()[0]) }); // [batch_size x 1] + std::cout << "Relevancy: 3" << std::endl; + auto negative_vals = logits.index({ "...", torch::indexing::Slice(positives.sizes()[0], torch::indexing::None) }); // [batch_size x negative_phrase_n] + std::cout << "Relevancy: 4" << std::endl; + auto repeated_pos = positive_vals.repeat({ 1, negatives.sizes()[0] }); //[batch_size x negative_phrase_n] + std::cout << "Relevancy: 5: repeated_pos: " << repeated_pos.sizes() << ", negative_vals: " << negative_vals.sizes() << std::endl; + auto sims = torch::stack({ repeated_pos, negative_vals }, -1); //[batch_size x negative_phrase_n x 2] + std::cout << "Relevancy: 6" << std::endl; + auto smx = torch::softmax(10 * sims, -1); // [batch_size x negative_phrase_n x 2] + std::cout << "Relevancy: 7" << std::endl; + auto best_id = smx.index({ "...", 0 }).argmin(1); // [batch_size x 2] + std::cout << "Relevancy: 8" << std::endl; + auto result = torch::gather(smx, 1, best_id.index({ "...", torch::indexing::None, torch::indexing::None }).expand({ best_id.sizes()[0], negatives.sizes()[0], 2 }) + ).index({ torch::indexing::Slice(), 0, torch::indexing::Slice() });// [batch_size x 2] + return result; +#else + auto embeds2 = torch::cat({ positives, negatives }, 0); + auto logits = torch::mm(embeds, embeds2.t()); // [batch_size, 1 + negatives_len] + auto positive_vals = logits.index({ "...", torch::indexing::Slice(0, 1) }); // [batch_size, 1] + auto negative_vals = logits.index({ "...", torch::indexing::Slice(1, torch::indexing::None) }); // [batch_size, negatives_len] + auto repeated_pos = positive_vals.repeat({ 1, negatives.sizes()[0] }); // [batch_size, negatives_len] + auto sims = torch::stack({ repeated_pos, negative_vals }, -1); // [batch_size, negatives_len, 2] + auto smx = torch::softmax(10 * sims, -1); // [batch_size, negatives_len, 2] + //Находим индекс самого сложного негатива (с минимальной вероятностью позитивного класса) + auto best_id = smx.index({ "...", 0 }).argmin(1, /*keepdim=*/true); // [batch_size, 1] + //Собираем результаты для выбранных негативов + auto result = torch::gather(smx, 1, best_id.unsqueeze(-1).expand({ -1, -1, 2 })); + return result.squeeze(1); // [batch_size, 2] +#endif +} diff --git a/thirdparty/ruclip/TorchHeader.h b/thirdparty/ruclip/TorchHeader.h new file mode 100644 index 000000000..dce611ab0 --- /dev/null +++ b/thirdparty/ruclip/TorchHeader.h @@ -0,0 +1,27 @@ +#pragma once + +#if defined(_MSC_VER) +#define DISABLE_WARNING_PUSH __pragma(warning( push )) +#define DISABLE_WARNING_POP __pragma(warning( pop )) +#define DISABLE_WARNING(warningNumber) __pragma(warning( disable : warningNumber )) +#elif defined(__GNUC__) || defined(__clang__) +#define DO_PRAGMA(X) _Pragma(#X) +#define DISABLE_WARNING_PUSH DO_PRAGMA(GCC diagnostic push) +#define DISABLE_WARNING_POP DO_PRAGMA(GCC diagnostic pop) +#define DISABLE_WARNING(warningName) DO_PRAGMA(GCC diagnostic ignored #warningName) +#else +#define DISABLE_WARNING_PUSH +#define DISABLE_WARNING_POP +#define DISABLE_WARNING_UNREFERENCED_FORMAL_PARAMETER +#define DISABLE_WARNING_UNREFERENCED_FUNCTION +#endif + +DISABLE_WARNING_PUSH + +#if defined(_MSC_VER) +DISABLE_WARNING(4624) +#endif + +#include + +DISABLE_WARNING_POP diff --git a/thirdparty/ruclip/json.hpp b/thirdparty/ruclip/json.hpp new file mode 100644 index 000000000..2448bf22d --- /dev/null +++ b/thirdparty/ruclip/json.hpp @@ -0,0 +1,24640 @@ +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +/****************************************************************************\ + * Note on documentation: The source files contain links to the online * + * documentation of the public API at https://json.nlohmann.me. This URL * + * contains the most recent documentation and should also be applicable to * + * previous versions; documentation for deprecated functions is not * + * removed, but marked deprecated. See "Generate documentation" section in * + * file docs/README.md. * +\****************************************************************************/ + +#ifndef INCLUDE_NLOHMANN_JSON_HPP_ +#define INCLUDE_NLOHMANN_JSON_HPP_ + +#include // all_of, find, for_each +#include // nullptr_t, ptrdiff_t, size_t +#include // hash, less +#include // initializer_list +#ifndef JSON_NO_IO + #include // istream, ostream +#endif // JSON_NO_IO +#include // random_access_iterator_tag +#include // unique_ptr +#include // string, stoi, to_string +#include // declval, forward, move, pair, swap +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// This file contains all macro definitions affecting or depending on the ABI + +#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK + #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) + #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2 + #warning "Already included a different version of the library!" + #endif + #endif +#endif + +#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) +#define NLOHMANN_JSON_VERSION_PATCH 2 // NOLINT(modernize-macro-to-enum) + +#ifndef JSON_DIAGNOSTICS + #define JSON_DIAGNOSTICS 0 +#endif + +#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 +#endif + +#if JSON_DIAGNOSTICS + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag +#else + #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS +#endif + +#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp +#else + #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION + #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 +#endif + +// Construct the namespace ABI tags component +#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b +#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ + NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) + +#define NLOHMANN_JSON_ABI_TAGS \ + NLOHMANN_JSON_ABI_TAGS_CONCAT( \ + NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ + NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) + +// Construct the namespace version component +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ + _v ## major ## _ ## minor ## _ ## patch +#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) + +#if NLOHMANN_JSON_NAMESPACE_NO_VERSION +#define NLOHMANN_JSON_NAMESPACE_VERSION +#else +#define NLOHMANN_JSON_NAMESPACE_VERSION \ + NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ + NLOHMANN_JSON_VERSION_MINOR, \ + NLOHMANN_JSON_VERSION_PATCH) +#endif + +// Combine namespace components +#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b +#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ + NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) + +#ifndef NLOHMANN_JSON_NAMESPACE +#define NLOHMANN_JSON_NAMESPACE \ + nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN +#define NLOHMANN_JSON_NAMESPACE_BEGIN \ + namespace nlohmann \ + { \ + inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ + NLOHMANN_JSON_ABI_TAGS, \ + NLOHMANN_JSON_NAMESPACE_VERSION) \ + { +#endif + +#ifndef NLOHMANN_JSON_NAMESPACE_END +#define NLOHMANN_JSON_NAMESPACE_END \ + } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ + } // namespace nlohmann +#endif + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // transform +#include // array +#include // forward_list +#include // inserter, front_inserter, end +#include // map +#include // string +#include // tuple, make_tuple +#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible +#include // unordered_map +#include // pair, declval +#include // valarray + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // nullptr_t +#include // exception +#if JSON_DIAGNOSTICS + #include // accumulate +#endif +#include // runtime_error +#include // to_string +#include // vector + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // uint8_t +#include // string + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // declval, pair +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template struct make_void +{ + using type = void; +}; +template using void_t = typename make_void::type; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +// https://en.cppreference.com/w/cpp/experimental/is_detected +struct nonesuch +{ + nonesuch() = delete; + ~nonesuch() = delete; + nonesuch(nonesuch const&) = delete; + nonesuch(nonesuch const&&) = delete; + void operator=(nonesuch const&) = delete; + void operator=(nonesuch&&) = delete; +}; + +template class Op, + class... Args> +struct detector +{ + using value_t = std::false_type; + using type = Default; +}; + +template class Op, class... Args> +struct detector>, Op, Args...> +{ + using value_t = std::true_type; + using type = Op; +}; + +template class Op, class... Args> +using is_detected = typename detector::value_t; + +template class Op, class... Args> +struct is_detected_lazy : is_detected { }; + +template class Op, class... Args> +using detected_t = typename detector::type; + +template class Op, class... Args> +using detected_or = detector; + +template class Op, class... Args> +using detected_or_t = typename detected_or::type; + +template class Op, class... Args> +using is_detected_exact = std::is_same>; + +template class Op, class... Args> +using is_detected_convertible = + std::is_convertible, To>; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + + +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson +// SPDX-License-Identifier: MIT + +/* Hedley - https://nemequ.github.io/hedley + * Created by Evan Nemerson + */ + +#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) +#if defined(JSON_HEDLEY_VERSION) + #undef JSON_HEDLEY_VERSION +#endif +#define JSON_HEDLEY_VERSION 15 + +#if defined(JSON_HEDLEY_STRINGIFY_EX) + #undef JSON_HEDLEY_STRINGIFY_EX +#endif +#define JSON_HEDLEY_STRINGIFY_EX(x) #x + +#if defined(JSON_HEDLEY_STRINGIFY) + #undef JSON_HEDLEY_STRINGIFY +#endif +#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) + +#if defined(JSON_HEDLEY_CONCAT_EX) + #undef JSON_HEDLEY_CONCAT_EX +#endif +#define JSON_HEDLEY_CONCAT_EX(a,b) a##b + +#if defined(JSON_HEDLEY_CONCAT) + #undef JSON_HEDLEY_CONCAT +#endif +#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) + +#if defined(JSON_HEDLEY_CONCAT3_EX) + #undef JSON_HEDLEY_CONCAT3_EX +#endif +#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c + +#if defined(JSON_HEDLEY_CONCAT3) + #undef JSON_HEDLEY_CONCAT3 +#endif +#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) + +#if defined(JSON_HEDLEY_VERSION_ENCODE) + #undef JSON_HEDLEY_VERSION_ENCODE +#endif +#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) + #undef JSON_HEDLEY_VERSION_DECODE_MAJOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) + #undef JSON_HEDLEY_VERSION_DECODE_MINOR +#endif +#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) + +#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) + #undef JSON_HEDLEY_VERSION_DECODE_REVISION +#endif +#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) + +#if defined(JSON_HEDLEY_GNUC_VERSION) + #undef JSON_HEDLEY_GNUC_VERSION +#endif +#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#elif defined(__GNUC__) + #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) +#endif + +#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) + #undef JSON_HEDLEY_GNUC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GNUC_VERSION) + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION) + #undef JSON_HEDLEY_MSVC_VERSION +#endif +#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) +#elif defined(_MSC_FULL_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) +#elif defined(_MSC_VER) && !defined(__ICL) + #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) + #undef JSON_HEDLEY_MSVC_VERSION_CHECK +#endif +#if !defined(JSON_HEDLEY_MSVC_VERSION) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) +#elif defined(_MSC_VER) && (_MSC_VER >= 1200) + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) +#else + #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION) + #undef JSON_HEDLEY_INTEL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) +#elif defined(__INTEL_COMPILER) && !defined(__ICL) + #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_VERSION) + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #undef JSON_HEDLEY_INTEL_CL_VERSION +#endif +#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) + #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) +#endif + +#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) + #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_INTEL_CL_VERSION) + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION) + #undef JSON_HEDLEY_PGI_VERSION +#endif +#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) + #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) +#endif + +#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) + #undef JSON_HEDLEY_PGI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PGI_VERSION) + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #undef JSON_HEDLEY_SUNPRO_VERSION +#endif +#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) +#elif defined(__SUNPRO_C) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) +#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) +#elif defined(__SUNPRO_CC) + #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) +#endif + +#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) + #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_SUNPRO_VERSION) + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION +#endif +#if defined(__EMSCRIPTEN__) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) +#endif + +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) + #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION) + #undef JSON_HEDLEY_ARM_VERSION +#endif +#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) +#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) + #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) +#endif + +#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) + #undef JSON_HEDLEY_ARM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_ARM_VERSION) + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION) + #undef JSON_HEDLEY_IBM_VERSION +#endif +#if defined(__ibmxl__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) +#elif defined(__xlC__) && defined(__xlC_ver__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) +#elif defined(__xlC__) + #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) +#endif + +#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) + #undef JSON_HEDLEY_IBM_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IBM_VERSION) + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_VERSION) + #undef JSON_HEDLEY_TI_VERSION +#endif +#if \ + defined(__TI_COMPILER_VERSION__) && \ + ( \ + defined(__TMS470__) || defined(__TI_ARM__) || \ + defined(__MSP430__) || \ + defined(__TMS320C2000__) \ + ) +#if (__TI_COMPILER_VERSION__ >= 16000000) + #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif +#endif + +#if defined(JSON_HEDLEY_TI_VERSION_CHECK) + #undef JSON_HEDLEY_TI_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_VERSION) + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #undef JSON_HEDLEY_TI_CL2000_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) + #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL2000_VERSION) + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #undef JSON_HEDLEY_TI_CL430_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) + #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL430_VERSION) + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #undef JSON_HEDLEY_TI_ARMCL_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) + #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) + #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #undef JSON_HEDLEY_TI_CL6X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) + #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL6X_VERSION) + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #undef JSON_HEDLEY_TI_CL7X_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) + #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CL7X_VERSION) + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #undef JSON_HEDLEY_TI_CLPRU_VERSION +#endif +#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) + #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) +#endif + +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) + #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION) + #undef JSON_HEDLEY_CRAY_VERSION +#endif +#if defined(_CRAYC) + #if defined(_RELEASE_PATCHLEVEL) + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) + #else + #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) + #undef JSON_HEDLEY_CRAY_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_CRAY_VERSION) + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION) + #undef JSON_HEDLEY_IAR_VERSION +#endif +#if defined(__IAR_SYSTEMS_ICC__) + #if __VER__ > 1000 + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) + #else + #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) + #endif +#endif + +#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) + #undef JSON_HEDLEY_IAR_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_IAR_VERSION) + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION) + #undef JSON_HEDLEY_TINYC_VERSION +#endif +#if defined(__TINYC__) + #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) +#endif + +#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) + #undef JSON_HEDLEY_TINYC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION) + #undef JSON_HEDLEY_DMC_VERSION +#endif +#if defined(__DMC__) + #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) +#endif + +#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) + #undef JSON_HEDLEY_DMC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_DMC_VERSION) + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #undef JSON_HEDLEY_COMPCERT_VERSION +#endif +#if defined(__COMPCERT_VERSION__) + #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) +#endif + +#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) + #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_COMPCERT_VERSION) + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION) + #undef JSON_HEDLEY_PELLES_VERSION +#endif +#if defined(__POCC__) + #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) +#endif + +#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) + #undef JSON_HEDLEY_PELLES_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_PELLES_VERSION) + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #undef JSON_HEDLEY_MCST_LCC_VERSION +#endif +#if defined(__LCC__) && defined(__LCC_MINOR__) + #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) +#endif + +#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) + #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION) + #undef JSON_HEDLEY_GCC_VERSION +#endif +#if \ + defined(JSON_HEDLEY_GNUC_VERSION) && \ + !defined(__clang__) && \ + !defined(JSON_HEDLEY_INTEL_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_ARM_VERSION) && \ + !defined(JSON_HEDLEY_CRAY_VERSION) && \ + !defined(JSON_HEDLEY_TI_VERSION) && \ + !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ + !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ + !defined(__COMPCERT__) && \ + !defined(JSON_HEDLEY_MCST_LCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION +#endif + +#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_VERSION_CHECK +#endif +#if defined(JSON_HEDLEY_GCC_VERSION) + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) +#else + #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_ATTRIBUTE +#endif +#if \ + defined(__has_attribute) && \ + ( \ + (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ + ) +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) +#else +# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE +#endif +#if defined(__has_attribute) + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE +#endif +#if \ + defined(__has_cpp_attribute) && \ + defined(__cplusplus) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) + #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS +#endif +#if !defined(__cplusplus) || !defined(__has_cpp_attribute) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#elif \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION) && \ + (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ + (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) +#else + #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE +#endif +#if defined(__has_cpp_attribute) && defined(__cplusplus) + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_BUILTIN) + #undef JSON_HEDLEY_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) +#else + #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) + #undef JSON_HEDLEY_GNUC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) + #undef JSON_HEDLEY_GCC_HAS_BUILTIN +#endif +#if defined(__has_builtin) + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) +#else + #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_FEATURE) + #undef JSON_HEDLEY_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) +#else + #define JSON_HEDLEY_HAS_FEATURE(feature) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) + #undef JSON_HEDLEY_GNUC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) + #undef JSON_HEDLEY_GCC_HAS_FEATURE +#endif +#if defined(__has_feature) + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) +#else + #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_EXTENSION) + #undef JSON_HEDLEY_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) +#else + #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) + #undef JSON_HEDLEY_GNUC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) + #undef JSON_HEDLEY_GCC_HAS_EXTENSION +#endif +#if defined(__has_extension) + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) +#else + #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE +#endif +#if defined(__has_declspec_attribute) + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) +#else + #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_HAS_WARNING) + #undef JSON_HEDLEY_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) +#else + #define JSON_HEDLEY_HAS_WARNING(warning) (0) +#endif + +#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) + #undef JSON_HEDLEY_GNUC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_GCC_HAS_WARNING) + #undef JSON_HEDLEY_GCC_HAS_WARNING +#endif +#if defined(__has_warning) + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) +#else + #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + defined(__clang__) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ + (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) + #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_PRAGMA(value) __pragma(value) +#else + #define JSON_HEDLEY_PRAGMA(value) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) + #undef JSON_HEDLEY_DIAGNOSTIC_PUSH +#endif +#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) + #undef JSON_HEDLEY_DIAGNOSTIC_POP +#endif +#if defined(__clang__) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) + #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) +#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") + #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_PUSH + #define JSON_HEDLEY_DIAGNOSTIC_POP +#endif + +/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") +# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") +# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# else +# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ + xpr \ + JSON_HEDLEY_DIAGNOSTIC_POP +# endif +# endif +#endif +#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x +#endif + +#if defined(JSON_HEDLEY_CONST_CAST) + #undef JSON_HEDLEY_CONST_CAST +#endif +#if defined(__cplusplus) +# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) +#elif \ + JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_REINTERPRET_CAST) + #undef JSON_HEDLEY_REINTERPRET_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) +#else + #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_STATIC_CAST) + #undef JSON_HEDLEY_STATIC_CAST +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) +#else + #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) +#endif + +#if defined(JSON_HEDLEY_CPP_CAST) + #undef JSON_HEDLEY_CPP_CAST +#endif +#if defined(__cplusplus) +# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ + ((T) (expr)) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) +# define JSON_HEDLEY_CPP_CAST(T, expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("diag_suppress=Pe137") \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) +# endif +#else +# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") +#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") +#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") +#elif \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") +#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL +#endif + +#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) + #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") +#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) +#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") +#else + #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION +#endif + +#if defined(JSON_HEDLEY_DEPRECATED) + #undef JSON_HEDLEY_DEPRECATED +#endif +#if defined(JSON_HEDLEY_DEPRECATED_FOR) + #undef JSON_HEDLEY_DEPRECATED_FOR +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) +#elif \ + (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) +#elif defined(__cplusplus) && (__cplusplus >= 201402L) + #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") +#else + #define JSON_HEDLEY_DEPRECATED(since) + #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) +#endif + +#if defined(JSON_HEDLEY_UNAVAILABLE) + #undef JSON_HEDLEY_UNAVAILABLE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) +#else + #define JSON_HEDLEY_UNAVAILABLE(available_since) +#endif + +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT +#endif +#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) + #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) +#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) + #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) +#elif defined(_Check_return_) /* SAL */ + #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ +#else + #define JSON_HEDLEY_WARN_UNUSED_RESULT + #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) +#endif + +#if defined(JSON_HEDLEY_SENTINEL) + #undef JSON_HEDLEY_SENTINEL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) +#else + #define JSON_HEDLEY_SENTINEL(position) +#endif + +#if defined(JSON_HEDLEY_NO_RETURN) + #undef JSON_HEDLEY_NO_RETURN +#endif +#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NO_RETURN __noreturn +#elif \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L + #define JSON_HEDLEY_NO_RETURN _Noreturn +#elif defined(__cplusplus) && (__cplusplus >= 201103L) + #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) +#else + #define JSON_HEDLEY_NO_RETURN +#endif + +#if defined(JSON_HEDLEY_NO_ESCAPE) + #undef JSON_HEDLEY_NO_ESCAPE +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) + #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) +#else + #define JSON_HEDLEY_NO_ESCAPE +#endif + +#if defined(JSON_HEDLEY_UNREACHABLE) + #undef JSON_HEDLEY_UNREACHABLE +#endif +#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) + #undef JSON_HEDLEY_UNREACHABLE_RETURN +#endif +#if defined(JSON_HEDLEY_ASSUME) + #undef JSON_HEDLEY_ASSUME +#endif +#if \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_ASSUME(expr) __assume(expr) +#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) + #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) +#elif \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #if defined(__cplusplus) + #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) + #else + #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) + #endif +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() +#elif defined(JSON_HEDLEY_ASSUME) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif +#if !defined(JSON_HEDLEY_ASSUME) + #if defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) + #else + #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) + #endif +#endif +#if defined(JSON_HEDLEY_UNREACHABLE) + #if \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) + #else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() + #endif +#else + #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) +#endif +#if !defined(JSON_HEDLEY_UNREACHABLE) + #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) +#endif + +JSON_HEDLEY_DIAGNOSTIC_PUSH +#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") + #pragma clang diagnostic ignored "-Wpedantic" +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) + #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" +#endif +#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) + #if defined(__clang__) + #pragma clang diagnostic ignored "-Wvariadic-macros" + #elif defined(JSON_HEDLEY_GCC_VERSION) + #pragma GCC diagnostic ignored "-Wvariadic-macros" + #endif +#endif +#if defined(JSON_HEDLEY_NON_NULL) + #undef JSON_HEDLEY_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) +#else + #define JSON_HEDLEY_NON_NULL(...) +#endif +JSON_HEDLEY_DIAGNOSTIC_POP + +#if defined(JSON_HEDLEY_PRINTF_FORMAT) + #undef JSON_HEDLEY_PRINTF_FORMAT +#endif +#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) +#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) +#elif \ + JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) +#else + #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) +#endif + +#if defined(JSON_HEDLEY_CONSTEXPR) + #undef JSON_HEDLEY_CONSTEXPR +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) + #endif +#endif +#if !defined(JSON_HEDLEY_CONSTEXPR) + #define JSON_HEDLEY_CONSTEXPR +#endif + +#if defined(JSON_HEDLEY_PREDICT) + #undef JSON_HEDLEY_PREDICT +#endif +#if defined(JSON_HEDLEY_LIKELY) + #undef JSON_HEDLEY_LIKELY +#endif +#if defined(JSON_HEDLEY_UNLIKELY) + #undef JSON_HEDLEY_UNLIKELY +#endif +#if defined(JSON_HEDLEY_UNPREDICTABLE) + #undef JSON_HEDLEY_UNPREDICTABLE +#endif +#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) + #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) +#endif +#if \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) +#elif \ + (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ + (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ + })) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ + (__extension__ ({ \ + double hedley_probability_ = (probability); \ + ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ + })) +# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) +# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#else +# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) +# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) +# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) +# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) +#endif +#if !defined(JSON_HEDLEY_UNPREDICTABLE) + #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) +#endif + +#if defined(JSON_HEDLEY_MALLOC) + #undef JSON_HEDLEY_MALLOC +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_MALLOC __declspec(restrict) +#else + #define JSON_HEDLEY_MALLOC +#endif + +#if defined(JSON_HEDLEY_PURE) + #undef JSON_HEDLEY_PURE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PURE __attribute__((__pure__)) +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) +# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ + ) +# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") +#else +# define JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_CONST) + #undef JSON_HEDLEY_CONST +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_CONST __attribute__((__const__)) +#elif \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) + #define JSON_HEDLEY_CONST _Pragma("no_side_effect") +#else + #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE +#endif + +#if defined(JSON_HEDLEY_RESTRICT) + #undef JSON_HEDLEY_RESTRICT +#endif +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT restrict +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ + defined(__clang__) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RESTRICT __restrict +#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) + #define JSON_HEDLEY_RESTRICT _Restrict +#else + #define JSON_HEDLEY_RESTRICT +#endif + +#if defined(JSON_HEDLEY_INLINE) + #undef JSON_HEDLEY_INLINE +#endif +#if \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ + (defined(__cplusplus) && (__cplusplus >= 199711L)) + #define JSON_HEDLEY_INLINE inline +#elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) + #define JSON_HEDLEY_INLINE __inline__ +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_INLINE __inline +#else + #define JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_ALWAYS_INLINE) + #undef JSON_HEDLEY_ALWAYS_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) +# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_ALWAYS_INLINE __forceinline +#elif defined(__cplusplus) && \ + ( \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ + ) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") +#else +# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE +#endif + +#if defined(JSON_HEDLEY_NEVER_INLINE) + #undef JSON_HEDLEY_NEVER_INLINE +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ + JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ + (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ + (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ + (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ + JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ + JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ + JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") +#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) + #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") +#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) + #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) + #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) +#else + #define JSON_HEDLEY_NEVER_INLINE +#endif + +#if defined(JSON_HEDLEY_PRIVATE) + #undef JSON_HEDLEY_PRIVATE +#endif +#if defined(JSON_HEDLEY_PUBLIC) + #undef JSON_HEDLEY_PUBLIC +#endif +#if defined(JSON_HEDLEY_IMPORT) + #undef JSON_HEDLEY_IMPORT +#endif +#if defined(_WIN32) || defined(__CYGWIN__) +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC __declspec(dllexport) +# define JSON_HEDLEY_IMPORT __declspec(dllimport) +#else +# if \ + JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + ( \ + defined(__TI_EABI__) && \ + ( \ + (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ + ) \ + ) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) +# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) +# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) +# else +# define JSON_HEDLEY_PRIVATE +# define JSON_HEDLEY_PUBLIC +# endif +# define JSON_HEDLEY_IMPORT extern +#endif + +#if defined(JSON_HEDLEY_NO_THROW) + #undef JSON_HEDLEY_NO_THROW +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) + #define JSON_HEDLEY_NO_THROW __declspec(nothrow) +#else + #define JSON_HEDLEY_NO_THROW +#endif + +#if defined(JSON_HEDLEY_FALL_THROUGH) + #undef JSON_HEDLEY_FALL_THROUGH +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) +#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) + #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) +#elif defined(__fallthrough) /* SAL */ + #define JSON_HEDLEY_FALL_THROUGH __fallthrough +#else + #define JSON_HEDLEY_FALL_THROUGH +#endif + +#if defined(JSON_HEDLEY_RETURNS_NON_NULL) + #undef JSON_HEDLEY_RETURNS_NON_NULL +#endif +#if \ + JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) +#elif defined(_Ret_notnull_) /* SAL */ + #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ +#else + #define JSON_HEDLEY_RETURNS_NON_NULL +#endif + +#if defined(JSON_HEDLEY_ARRAY_PARAM) + #undef JSON_HEDLEY_ARRAY_PARAM +#endif +#if \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + !defined(__STDC_NO_VLA__) && \ + !defined(__cplusplus) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_TINYC_VERSION) + #define JSON_HEDLEY_ARRAY_PARAM(name) (name) +#else + #define JSON_HEDLEY_ARRAY_PARAM(name) +#endif + +#if defined(JSON_HEDLEY_IS_CONSTANT) + #undef JSON_HEDLEY_IS_CONSTANT +#endif +#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) + #undef JSON_HEDLEY_REQUIRE_CONSTEXPR +#endif +/* JSON_HEDLEY_IS_CONSTEXPR_ is for + HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #undef JSON_HEDLEY_IS_CONSTEXPR_ +#endif +#if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ + (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) + #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) +#endif +#if !defined(__cplusplus) +# if \ + JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ + JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ + JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) +#endif +# elif \ + ( \ + defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ + !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ + !defined(JSON_HEDLEY_PGI_VERSION) && \ + !defined(JSON_HEDLEY_IAR_VERSION)) || \ + (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ + JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ + JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) +#if defined(__INTPTR_TYPE__) + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) +#else + #include + #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) +#endif +# elif \ + defined(JSON_HEDLEY_GCC_VERSION) || \ + defined(JSON_HEDLEY_INTEL_VERSION) || \ + defined(JSON_HEDLEY_TINYC_VERSION) || \ + defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ + JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ + defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ + defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ + defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ + defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ + defined(__clang__) +# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ + sizeof(void) != \ + sizeof(*( \ + 1 ? \ + ((void*) ((expr) * 0L) ) : \ +((struct { char v[sizeof(void) * 2]; } *) 1) \ + ) \ + ) \ + ) +# endif +#endif +#if defined(JSON_HEDLEY_IS_CONSTEXPR_) + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) +#else + #if !defined(JSON_HEDLEY_IS_CONSTANT) + #define JSON_HEDLEY_IS_CONSTANT(expr) (0) + #endif + #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) +#endif + +#if defined(JSON_HEDLEY_BEGIN_C_DECLS) + #undef JSON_HEDLEY_BEGIN_C_DECLS +#endif +#if defined(JSON_HEDLEY_END_C_DECLS) + #undef JSON_HEDLEY_END_C_DECLS +#endif +#if defined(JSON_HEDLEY_C_DECL) + #undef JSON_HEDLEY_C_DECL +#endif +#if defined(__cplusplus) + #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { + #define JSON_HEDLEY_END_C_DECLS } + #define JSON_HEDLEY_C_DECL extern "C" +#else + #define JSON_HEDLEY_BEGIN_C_DECLS + #define JSON_HEDLEY_END_C_DECLS + #define JSON_HEDLEY_C_DECL +#endif + +#if defined(JSON_HEDLEY_STATIC_ASSERT) + #undef JSON_HEDLEY_STATIC_ASSERT +#endif +#if \ + !defined(__cplusplus) && ( \ + (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ + (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ + JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ + defined(_Static_assert) \ + ) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) +#elif \ + (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ + JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) +#else +# define JSON_HEDLEY_STATIC_ASSERT(expr, message) +#endif + +#if defined(JSON_HEDLEY_NULL) + #undef JSON_HEDLEY_NULL +#endif +#if defined(__cplusplus) + #if __cplusplus >= 201103L + #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) + #elif defined(NULL) + #define JSON_HEDLEY_NULL NULL + #else + #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) + #endif +#elif defined(NULL) + #define JSON_HEDLEY_NULL NULL +#else + #define JSON_HEDLEY_NULL ((void*) 0) +#endif + +#if defined(JSON_HEDLEY_MESSAGE) + #undef JSON_HEDLEY_MESSAGE +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_MESSAGE(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(message msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) +#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) +#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) +# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_WARNING) + #undef JSON_HEDLEY_WARNING +#endif +#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") +# define JSON_HEDLEY_WARNING(msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ + JSON_HEDLEY_PRAGMA(clang warning msg) \ + JSON_HEDLEY_DIAGNOSTIC_POP +#elif \ + JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ + JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ + JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) +#elif \ + JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) +#else +# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) +#endif + +#if defined(JSON_HEDLEY_REQUIRE) + #undef JSON_HEDLEY_REQUIRE +#endif +#if defined(JSON_HEDLEY_REQUIRE_MSG) + #undef JSON_HEDLEY_REQUIRE_MSG +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) +# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") +# define JSON_HEDLEY_REQUIRE(expr) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), #expr, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ + __attribute__((diagnose_if(!(expr), msg, "error"))) \ + JSON_HEDLEY_DIAGNOSTIC_POP +# else +# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) +# endif +#else +# define JSON_HEDLEY_REQUIRE(expr) +# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) +#endif + +#if defined(JSON_HEDLEY_FLAGS) + #undef JSON_HEDLEY_FLAGS +#endif +#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) + #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) +#else + #define JSON_HEDLEY_FLAGS +#endif + +#if defined(JSON_HEDLEY_FLAGS_CAST) + #undef JSON_HEDLEY_FLAGS_CAST +#endif +#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) +# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ + JSON_HEDLEY_DIAGNOSTIC_PUSH \ + _Pragma("warning(disable:188)") \ + ((T) (expr)); \ + JSON_HEDLEY_DIAGNOSTIC_POP \ + })) +#else +# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) +#endif + +#if defined(JSON_HEDLEY_EMPTY_BASES) + #undef JSON_HEDLEY_EMPTY_BASES +#endif +#if \ + (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ + JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) + #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) +#else + #define JSON_HEDLEY_EMPTY_BASES +#endif + +/* Remaining macros are deprecated. */ + +#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) + #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK +#endif +#if defined(__clang__) + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) +#else + #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) +#endif + +#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) + #undef JSON_HEDLEY_CLANG_HAS_BUILTIN +#endif +#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) + +#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) + #undef JSON_HEDLEY_CLANG_HAS_FEATURE +#endif +#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) + +#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) + #undef JSON_HEDLEY_CLANG_HAS_EXTENSION +#endif +#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) + +#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) + #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE +#endif +#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) + +#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) + #undef JSON_HEDLEY_CLANG_HAS_WARNING +#endif +#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) + +#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ + + +// This file contains all internal macro definitions (except those affecting ABI) +// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them + +// #include + + +// exclude unsupported compilers +#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) + #if defined(__clang__) + #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 + #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) + #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 + #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" + #endif + #endif +#endif + +// C++ language standard detection +// if the user manually specified the used c++ version this is skipped +#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) + #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) + #define JSON_HAS_CPP_20 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 + #define JSON_HAS_CPP_17 + #define JSON_HAS_CPP_14 + #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) + #define JSON_HAS_CPP_14 + #endif + // the cpp 11 flag is always specified because it is the minimal required version + #define JSON_HAS_CPP_11 +#endif + +#ifdef __has_include + #if __has_include() + #include + #endif +#endif + +#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) + #ifdef JSON_HAS_CPP_17 + #if defined(__cpp_lib_filesystem) + #define JSON_HAS_FILESYSTEM 1 + #elif defined(__cpp_lib_experimental_filesystem) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif !defined(__has_include) + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_FILESYSTEM 1 + #elif __has_include() + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 + #endif + + // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ + #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support + #if defined(__clang_major__) && __clang_major__ < 7 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support + #if defined(_MSC_VER) && _MSC_VER < 1914 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before iOS 13 + #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + + // no filesystem support before macOS Catalina + #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 + #undef JSON_HAS_FILESYSTEM + #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #endif + #endif +#endif + +#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM + #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_FILESYSTEM + #define JSON_HAS_FILESYSTEM 0 +#endif + +#ifndef JSON_HAS_THREE_WAY_COMPARISON + #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ + && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L + #define JSON_HAS_THREE_WAY_COMPARISON 1 + #else + #define JSON_HAS_THREE_WAY_COMPARISON 0 + #endif +#endif + +#ifndef JSON_HAS_RANGES + // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error + #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 + #define JSON_HAS_RANGES 0 + #elif defined(__cpp_lib_ranges) + #define JSON_HAS_RANGES 1 + #else + #define JSON_HAS_RANGES 0 + #endif +#endif + +#ifdef JSON_HAS_CPP_17 + #define JSON_INLINE_VARIABLE inline +#else + #define JSON_INLINE_VARIABLE +#endif + +#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) + #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else + #define JSON_NO_UNIQUE_ADDRESS +#endif + +// disable documentation warnings on clang +#if defined(__clang__) + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdocumentation" + #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" +#endif + +// allow disabling exceptions +#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) + #define JSON_THROW(exception) throw exception + #define JSON_TRY try + #define JSON_CATCH(exception) catch(exception) + #define JSON_INTERNAL_CATCH(exception) catch(exception) +#else + #include + #define JSON_THROW(exception) std::abort() + #define JSON_TRY if(true) + #define JSON_CATCH(exception) if(false) + #define JSON_INTERNAL_CATCH(exception) if(false) +#endif + +// override exception macros +#if defined(JSON_THROW_USER) + #undef JSON_THROW + #define JSON_THROW JSON_THROW_USER +#endif +#if defined(JSON_TRY_USER) + #undef JSON_TRY + #define JSON_TRY JSON_TRY_USER +#endif +#if defined(JSON_CATCH_USER) + #undef JSON_CATCH + #define JSON_CATCH JSON_CATCH_USER + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_CATCH_USER +#endif +#if defined(JSON_INTERNAL_CATCH_USER) + #undef JSON_INTERNAL_CATCH + #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER +#endif + +// allow overriding assert +#if !defined(JSON_ASSERT) + #include // assert + #define JSON_ASSERT(x) assert(x) +#endif + +// allow to access some private functions (needed by the test suite) +#if defined(JSON_TESTS_PRIVATE) + #define JSON_PRIVATE_UNLESS_TESTED public +#else + #define JSON_PRIVATE_UNLESS_TESTED private +#endif + +/*! +@brief macro to briefly define a mapping between an enum and JSON +@def NLOHMANN_JSON_SERIALIZE_ENUM +@since version 3.4.0 +*/ +#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ + template \ + inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [e](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.first == e; \ + }); \ + j = ((it != std::end(m)) ? it : std::begin(m))->second; \ + } \ + template \ + inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ + { \ + static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ + static const std::pair m[] = __VA_ARGS__; \ + auto it = std::find_if(std::begin(m), std::end(m), \ + [&j](const std::pair& ej_pair) -> bool \ + { \ + return ej_pair.second == j; \ + }); \ + e = ((it != std::end(m)) ? it : std::begin(m))->first; \ + } + +// Ugly macros to avoid uglier copy-paste when specializing basic_json. They +// may be removed in the future once the class is split. + +#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ + template class ObjectType, \ + template class ArrayType, \ + class StringType, class BooleanType, class NumberIntegerType, \ + class NumberUnsignedType, class NumberFloatType, \ + template class AllocatorType, \ + template class JSONSerializer, \ + class BinaryType, \ + class CustomBaseClass> + +#define NLOHMANN_BASIC_JSON_TPL \ + basic_json + +// Macros to simplify conversion from/to types + +#define NLOHMANN_JSON_EXPAND( x ) x +#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME +#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ + NLOHMANN_JSON_PASTE64, \ + NLOHMANN_JSON_PASTE63, \ + NLOHMANN_JSON_PASTE62, \ + NLOHMANN_JSON_PASTE61, \ + NLOHMANN_JSON_PASTE60, \ + NLOHMANN_JSON_PASTE59, \ + NLOHMANN_JSON_PASTE58, \ + NLOHMANN_JSON_PASTE57, \ + NLOHMANN_JSON_PASTE56, \ + NLOHMANN_JSON_PASTE55, \ + NLOHMANN_JSON_PASTE54, \ + NLOHMANN_JSON_PASTE53, \ + NLOHMANN_JSON_PASTE52, \ + NLOHMANN_JSON_PASTE51, \ + NLOHMANN_JSON_PASTE50, \ + NLOHMANN_JSON_PASTE49, \ + NLOHMANN_JSON_PASTE48, \ + NLOHMANN_JSON_PASTE47, \ + NLOHMANN_JSON_PASTE46, \ + NLOHMANN_JSON_PASTE45, \ + NLOHMANN_JSON_PASTE44, \ + NLOHMANN_JSON_PASTE43, \ + NLOHMANN_JSON_PASTE42, \ + NLOHMANN_JSON_PASTE41, \ + NLOHMANN_JSON_PASTE40, \ + NLOHMANN_JSON_PASTE39, \ + NLOHMANN_JSON_PASTE38, \ + NLOHMANN_JSON_PASTE37, \ + NLOHMANN_JSON_PASTE36, \ + NLOHMANN_JSON_PASTE35, \ + NLOHMANN_JSON_PASTE34, \ + NLOHMANN_JSON_PASTE33, \ + NLOHMANN_JSON_PASTE32, \ + NLOHMANN_JSON_PASTE31, \ + NLOHMANN_JSON_PASTE30, \ + NLOHMANN_JSON_PASTE29, \ + NLOHMANN_JSON_PASTE28, \ + NLOHMANN_JSON_PASTE27, \ + NLOHMANN_JSON_PASTE26, \ + NLOHMANN_JSON_PASTE25, \ + NLOHMANN_JSON_PASTE24, \ + NLOHMANN_JSON_PASTE23, \ + NLOHMANN_JSON_PASTE22, \ + NLOHMANN_JSON_PASTE21, \ + NLOHMANN_JSON_PASTE20, \ + NLOHMANN_JSON_PASTE19, \ + NLOHMANN_JSON_PASTE18, \ + NLOHMANN_JSON_PASTE17, \ + NLOHMANN_JSON_PASTE16, \ + NLOHMANN_JSON_PASTE15, \ + NLOHMANN_JSON_PASTE14, \ + NLOHMANN_JSON_PASTE13, \ + NLOHMANN_JSON_PASTE12, \ + NLOHMANN_JSON_PASTE11, \ + NLOHMANN_JSON_PASTE10, \ + NLOHMANN_JSON_PASTE9, \ + NLOHMANN_JSON_PASTE8, \ + NLOHMANN_JSON_PASTE7, \ + NLOHMANN_JSON_PASTE6, \ + NLOHMANN_JSON_PASTE5, \ + NLOHMANN_JSON_PASTE4, \ + NLOHMANN_JSON_PASTE3, \ + NLOHMANN_JSON_PASTE2, \ + NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) +#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) +#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) +#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) +#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) +#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) +#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) +#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) +#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) +#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) +#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) +#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) +#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) +#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) +#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) +#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) +#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) +#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) +#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) +#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) +#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) +#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) +#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) +#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) +#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) +#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) +#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) +#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) +#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) +#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) +#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) +#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) +#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) +#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) +#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) +#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) +#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) +#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) +#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) +#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) +#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) +#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) +#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) +#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) +#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) +#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) +#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) +#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) +#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) +#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) +#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) +#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) +#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) +#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) +#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) +#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) +#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) +#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) +#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) +#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) +#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) +#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) +#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) +#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) + +#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; +#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); +#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + +/*! +@brief macro +@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE +@since version 3.9.0 +*/ +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } + +#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ + inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ + inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } + + +// inspired from https://stackoverflow.com/a/26745591 +// allows to call any std function as if (e.g. with begin): +// using std::begin; begin(x); +// +// it allows using the detected idiom to retrieve the return type +// of such an expression +#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ + namespace detail { \ + using std::std_name; \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + } \ + \ + namespace detail2 { \ + struct std_name##_tag \ + { \ + }; \ + \ + template \ + std_name##_tag std_name(T&&...); \ + \ + template \ + using result_of_##std_name = decltype(std_name(std::declval()...)); \ + \ + template \ + struct would_call_std_##std_name \ + { \ + static constexpr auto const value = ::nlohmann::detail:: \ + is_detected_exact::value; \ + }; \ + } /* namespace detail2 */ \ + \ + template \ + struct would_call_std_##std_name : detail2::would_call_std_##std_name \ + { \ + } + +#ifndef JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_USE_IMPLICIT_CONVERSIONS 1 +#endif + +#if JSON_USE_IMPLICIT_CONVERSIONS + #define JSON_EXPLICIT +#else + #define JSON_EXPLICIT explicit +#endif + +#ifndef JSON_DISABLE_ENUM_SERIALIZATION + #define JSON_DISABLE_ENUM_SERIALIZATION 0 +#endif + +#ifndef JSON_USE_GLOBAL_UDLS + #define JSON_USE_GLOBAL_UDLS 1 +#endif + +#if JSON_HAS_THREE_WAY_COMPARISON + #include // partial_ordering +#endif + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/////////////////////////// +// JSON type enumeration // +/////////////////////////// + +/*! +@brief the JSON type enumeration + +This enumeration collects the different JSON types. It is internally used to +distinguish the stored values, and the functions @ref basic_json::is_null(), +@ref basic_json::is_object(), @ref basic_json::is_array(), +@ref basic_json::is_string(), @ref basic_json::is_boolean(), +@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), +@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), +@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and +@ref basic_json::is_structured() rely on it. + +@note There are three enumeration entries (number_integer, number_unsigned, and +number_float), because the library distinguishes these three types for numbers: +@ref basic_json::number_unsigned_t is used for unsigned integers, +@ref basic_json::number_integer_t is used for signed integers, and +@ref basic_json::number_float_t is used for floating-point numbers or to +approximate integers which do not fit in the limits of their respective type. + +@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON +value with the default value for a given type + +@since version 1.0.0 +*/ +enum class value_t : std::uint8_t +{ + null, ///< null value + object, ///< object (unordered set of name/value pairs) + array, ///< array (ordered collection of values) + string, ///< string value + boolean, ///< boolean value + number_integer, ///< number value (signed integer) + number_unsigned, ///< number value (unsigned integer) + number_float, ///< number value (floating-point) + binary, ///< binary array (ordered collection of bytes) + discarded ///< discarded by the parser callback function +}; + +/*! +@brief comparison operator for JSON types + +Returns an ordering that is similar to Python: +- order: null < boolean < number < object < array < string < binary +- furthermore, each type is not smaller than itself +- discarded values are not comparable +- binary is represented as a b"" string in python and directly comparable to a + string; however, making a binary array directly comparable with a string would + be surprising behavior in a JSON file. + +@since version 1.0.0 +*/ +#if JSON_HAS_THREE_WAY_COMPARISON + inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* +#else + inline bool operator<(const value_t lhs, const value_t rhs) noexcept +#endif +{ + static constexpr std::array order = {{ + 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, + 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, + 6 /* binary */ + } + }; + + const auto l_index = static_cast(lhs); + const auto r_index = static_cast(rhs); +#if JSON_HAS_THREE_WAY_COMPARISON + if (l_index < order.size() && r_index < order.size()) + { + return order[l_index] <=> order[r_index]; // *NOPAD* + } + return std::partial_ordering::unordered; +#else + return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; +#endif +} + +// GCC selects the built-in operator< over an operator rewritten from +// a user-defined spaceship operator +// Clang, MSVC, and ICC select the rewritten candidate +// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) +#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) +inline bool operator<(const value_t lhs, const value_t rhs) noexcept +{ + return std::is_lt(lhs <=> rhs); // *NOPAD* +} +#endif + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/*! +@brief replace all occurrences of a substring by another string + +@param[in,out] s the string to manipulate; changed so that all + occurrences of @a f are replaced with @a t +@param[in] f the substring to replace with @a t +@param[in] t the string to replace @a f + +@pre The search string @a f must not be empty. **This precondition is +enforced with an assertion.** + +@since version 2.0.0 +*/ +template +inline void replace_substring(StringType& s, const StringType& f, + const StringType& t) +{ + JSON_ASSERT(!f.empty()); + for (auto pos = s.find(f); // find first occurrence of f + pos != StringType::npos; // make sure f was found + s.replace(pos, f.size(), t), // replace with t, and + pos = s.find(f, pos + t.size())) // find next occurrence of f + {} +} + +/*! + * @brief string escaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to escape + * @return escaped string + * + * Note the order of escaping "~" to "~0" and "/" to "~1" is important. + */ +template +inline StringType escape(StringType s) +{ + replace_substring(s, StringType{"~"}, StringType{"~0"}); + replace_substring(s, StringType{"/"}, StringType{"~1"}); + return s; +} + +/*! + * @brief string unescaping as described in RFC 6901 (Sect. 4) + * @param[in] s string to unescape + * @return unescaped string + * + * Note the order of escaping "~1" to "/" and "~0" to "~" is important. + */ +template +static void unescape(StringType& s) +{ + replace_substring(s, StringType{"~1"}, StringType{"/"}); + replace_substring(s, StringType{"~0"}, StringType{"~"}); +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // size_t + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +/// struct to capture the start position of the current token +struct position_t +{ + /// the total number of characters read + std::size_t chars_read_total = 0; + /// the number of characters read in the current line + std::size_t chars_read_current_line = 0; + /// the number of lines read + std::size_t lines_read = 0; + + /// conversion to size_t to preserve SAX interface + constexpr operator size_t() const + { + return chars_read_total; + } +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-FileCopyrightText: 2018 The Abseil Authors +// SPDX-License-Identifier: MIT + + + +#include // array +#include // size_t +#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type +#include // index_sequence, make_index_sequence, index_sequence_for + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +using uncvref_t = typename std::remove_cv::type>::type; + +#ifdef JSON_HAS_CPP_14 + +// the following utilities are natively available in C++14 +using std::enable_if_t; +using std::index_sequence; +using std::make_index_sequence; +using std::index_sequence_for; + +#else + +// alias templates to reduce boilerplate +template +using enable_if_t = typename std::enable_if::type; + +// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h +// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. + +//// START OF CODE FROM GOOGLE ABSEIL + +// integer_sequence +// +// Class template representing a compile-time integer sequence. An instantiation +// of `integer_sequence` has a sequence of integers encoded in its +// type through its template arguments (which is a common need when +// working with C++11 variadic templates). `absl::integer_sequence` is designed +// to be a drop-in replacement for C++14's `std::integer_sequence`. +// +// Example: +// +// template< class T, T... Ints > +// void user_function(integer_sequence); +// +// int main() +// { +// // user_function's `T` will be deduced to `int` and `Ints...` +// // will be deduced to `0, 1, 2, 3, 4`. +// user_function(make_integer_sequence()); +// } +template +struct integer_sequence +{ + using value_type = T; + static constexpr std::size_t size() noexcept + { + return sizeof...(Ints); + } +}; + +// index_sequence +// +// A helper template for an `integer_sequence` of `size_t`, +// `absl::index_sequence` is designed to be a drop-in replacement for C++14's +// `std::index_sequence`. +template +using index_sequence = integer_sequence; + +namespace utility_internal +{ + +template +struct Extend; + +// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. +template +struct Extend, SeqSize, 0> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; +}; + +template +struct Extend, SeqSize, 1> +{ + using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; +}; + +// Recursion helper for 'make_integer_sequence'. +// 'Gen::type' is an alias for 'integer_sequence'. +template +struct Gen +{ + using type = + typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; +}; + +template +struct Gen +{ + using type = integer_sequence; +}; + +} // namespace utility_internal + +// Compile-time sequences of integers + +// make_integer_sequence +// +// This template alias is equivalent to +// `integer_sequence`, and is designed to be a drop-in +// replacement for C++14's `std::make_integer_sequence`. +template +using make_integer_sequence = typename utility_internal::Gen::type; + +// make_index_sequence +// +// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, +// and is designed to be a drop-in replacement for C++14's +// `std::make_index_sequence`. +template +using make_index_sequence = make_integer_sequence; + +// index_sequence_for +// +// Converts a typename pack into an index sequence of the same length, and +// is designed to be a drop-in replacement for C++14's +// `std::index_sequence_for()` +template +using index_sequence_for = make_index_sequence; + +//// END OF CODE FROM GOOGLE ABSEIL + +#endif + +// dispatch utility (taken from ranges-v3) +template struct priority_tag : priority_tag < N - 1 > {}; +template<> struct priority_tag<0> {}; + +// taken from ranges-v3 +template +struct static_const +{ + static JSON_INLINE_VARIABLE constexpr T value{}; +}; + +#ifndef JSON_HAS_CPP_17 + template + constexpr T static_const::value; +#endif + +template +inline constexpr std::array make_array(Args&& ... args) +{ + return std::array {{static_cast(std::forward(args))...}}; +} + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // numeric_limits +#include // false_type, is_constructible, is_integral, is_same, true_type +#include // declval +#include // tuple + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +#include // random_access_iterator_tag + +// #include + +// #include + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN +namespace detail +{ + +template +struct iterator_types {}; + +template +struct iterator_types < + It, + void_t> +{ + using difference_type = typename It::difference_type; + using value_type = typename It::value_type; + using pointer = typename It::pointer; + using reference = typename It::reference; + using iterator_category = typename It::iterator_category; +}; + +// This is required as some compilers implement std::iterator_traits in a way that +// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. +template +struct iterator_traits +{ +}; + +template +struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> + : iterator_types +{ +}; + +template +struct iterator_traits::value>> +{ + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; +}; + +} // namespace detail +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); + +NLOHMANN_JSON_NAMESPACE_END + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + + + +// #include + + +NLOHMANN_JSON_NAMESPACE_BEGIN + +NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); + +NLOHMANN_JSON_NAMESPACE_END + +// #include + +// #include + +// #include +// __ _____ _____ _____ +// __| | __| | | | JSON for Modern C++ +// | | |__ | | | | | | version 3.11.2 +// |_____|_____|_____|_|___| https://github.com/nlohmann/json +// +// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann +// SPDX-License-Identifier: MIT + +#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ + #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + #include // int64_t, uint64_t + #include // map + #include // allocator + #include // string + #include // vector + + // #include + + + /*! + @brief namespace for Niels Lohmann + @see https://github.com/nlohmann + @since version 1.0.0 + */ + NLOHMANN_JSON_NAMESPACE_BEGIN + + /*! + @brief default JSONSerializer template argument + + This serializer ignores the template arguments and uses ADL + ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) + for serialization. + */ + template + struct adl_serializer; + + /// a class to store JSON values + /// @sa https://json.nlohmann.me/api/basic_json/ + template class ObjectType = + std::map, + template class ArrayType = std::vector, + class StringType = std::string, class BooleanType = bool, + class NumberIntegerType = std::int64_t, + class NumberUnsignedType = std::uint64_t, + class NumberFloatType = double, + template class AllocatorType = std::allocator, + template class JSONSerializer = + adl_serializer, + class BinaryType = std::vector, // cppcheck-suppress syntaxError + class CustomBaseClass = void> + class basic_json; + + /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document + /// @sa https://json.nlohmann.me/api/json_pointer/ + template + class json_pointer; + + /*! + @brief default specialization + @sa https://json.nlohmann.me/api/json/ + */ + using json = basic_json<>; + + /// @brief a minimal map-like container that preserves insertion order + /// @sa https://json.nlohmann.me/api/ordered_map/ + template + struct ordered_map; + + /// @brief specialization that maintains the insertion order of object keys + /// @sa https://json.nlohmann.me/api/ordered_json/ + using ordered_json = basic_json; + + NLOHMANN_JSON_NAMESPACE_END + +#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ + + +NLOHMANN_JSON_NAMESPACE_BEGIN +/*! +@brief detail namespace with internal helper functions + +This namespace collects functions that should not be exposed, +implementations of some @ref basic_json methods, and meta-programming helpers. + +@since version 2.1.0 +*/ +namespace detail +{ + +///////////// +// helpers // +///////////// + +// Note to maintainers: +// +// Every trait in this file expects a non CV-qualified type. +// The only exceptions are in the 'aliases for detected' section +// (i.e. those of the form: decltype(T::member_function(std::declval()))) +// +// In this case, T has to be properly CV-qualified to constraint the function arguments +// (e.g. to_json(BasicJsonType&, const T&)) + +template struct is_basic_json : std::false_type {}; + +NLOHMANN_BASIC_JSON_TPL_DECLARATION +struct is_basic_json : std::true_type {}; + +// used by exceptions create() member functions +// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t +// false_type otherwise +template +struct is_basic_json_context : + std::integral_constant < bool, + is_basic_json::type>::type>::value + || std::is_same::value > +{}; + +////////////////////// +// json_ref helpers // +////////////////////// + +template +class json_ref; + +template +struct is_json_ref : std::false_type {}; + +template +struct is_json_ref> : std::true_type {}; + +////////////////////////// +// aliases for detected // +////////////////////////// + +template +using mapped_type_t = typename T::mapped_type; + +template +using key_type_t = typename T::key_type; + +template +using value_type_t = typename T::value_type; + +template +using difference_type_t = typename T::difference_type; + +template +using pointer_t = typename T::pointer; + +template +using reference_t = typename T::reference; + +template +using iterator_category_t = typename T::iterator_category; + +template +using to_json_function = decltype(T::to_json(std::declval()...)); + +template +using from_json_function = decltype(T::from_json(std::declval()...)); + +template +using get_template_function = decltype(std::declval().template get()); + +// trait checking if JSONSerializer::from_json(json const&, udt&) exists +template +struct has_from_json : std::false_type {}; + +// trait checking if j.get is valid +// use this trait instead of std::is_constructible or std::is_convertible, +// both rely on, or make use of implicit conversions, and thus fail when T +// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) +template +struct is_getable +{ + static constexpr bool value = is_detected::value; +}; + +template +struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if JSONSerializer::from_json(json const&) exists +// this overload is used for non-default-constructible user-defined-types +template +struct has_non_default_from_json : std::false_type {}; + +template +struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +// This trait checks if BasicJsonType::json_serializer::to_json exists +// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. +template +struct has_to_json : std::false_type {}; + +template +struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> +{ + using serializer = typename BasicJsonType::template json_serializer; + + static constexpr bool value = + is_detected_exact::value; +}; + +template +using detect_key_compare = typename T::key_compare; + +template +struct has_key_compare : std::integral_constant::value> {}; + +// obtains the actual object key comparator +template +struct actual_object_comparator +{ + using object_t = typename BasicJsonType::object_t; + using object_comparator_t = typename BasicJsonType::default_object_comparator_t; + using type = typename std::conditional < has_key_compare::value, + typename object_t::key_compare, object_comparator_t>::type; +}; + +template +using actual_object_comparator_t = typename actual_object_comparator::type; + +/////////////////// +// is_ functions // +/////////////////// + +// https://en.cppreference.com/w/cpp/types/conjunction +template struct conjunction : std::true_type { }; +template struct conjunction : B { }; +template +struct conjunction +: std::conditional(B::value), conjunction, B>::type {}; + +// https://en.cppreference.com/w/cpp/types/negation +template struct negation : std::integral_constant < bool, !B::value > { }; + +// Reimplementation of is_constructible and is_default_constructible, due to them being broken for +// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). +// This causes compile errors in e.g. clang 3.5 or gcc 4.9. +template +struct is_default_constructible : std::is_default_constructible {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction, is_default_constructible> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + +template +struct is_default_constructible> + : conjunction...> {}; + + +template +struct is_constructible : std::is_constructible {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + +template +struct is_constructible> : is_default_constructible> {}; + + +template +struct is_iterator_traits : std::false_type {}; + +template +struct is_iterator_traits> +{ + private: + using traits = iterator_traits; + + public: + static constexpr auto value = + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value && + is_detected::value; +}; + +template +struct is_range +{ + private: + using t_ref = typename std::add_lvalue_reference::type; + + using iterator = detected_t; + using sentinel = detected_t; + + // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator + // and https://en.cppreference.com/w/cpp/iterator/sentinel_for + // but reimplementing these would be too much work, as a lot of other concepts are used underneath + static constexpr auto is_iterator_begin = + is_iterator_traits>::value; + + public: + static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; +}; + +template +using iterator_t = enable_if_t::value, result_of_begin())>>; + +template +using range_value_t = value_type_t>>; + +// The following implementation of is_complete_type is taken from +// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ +// and is written by Xiang Fan who agreed to using it in this library. + +template +struct is_complete_type : std::false_type {}; + +template +struct is_complete_type : std::true_type {}; + +template +struct is_compatible_object_type_impl : std::false_type {}; + +template +struct is_compatible_object_type_impl < + BasicJsonType, CompatibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + // macOS's is_constructible does not play well with nonesuch... + static constexpr bool value = + is_constructible::value && + is_constructible::value; +}; + +template +struct is_compatible_object_type + : is_compatible_object_type_impl {}; + +template +struct is_constructible_object_type_impl : std::false_type {}; + +template +struct is_constructible_object_type_impl < + BasicJsonType, ConstructibleObjectType, + enable_if_t < is_detected::value&& + is_detected::value >> +{ + using object_t = typename BasicJsonType::object_t; + + static constexpr bool value = + (is_default_constructible::value && + (std::is_move_assignable::value || + std::is_copy_assignable::value) && + (is_constructible::value && + std::is_same < + typename object_t::mapped_type, + typename ConstructibleObjectType::mapped_type >::value)) || + (has_from_json::value || + has_non_default_from_json < + BasicJsonType, + typename ConstructibleObjectType::mapped_type >::value); +}; + +template +struct is_constructible_object_type + : is_constructible_object_type_impl {}; + +template +struct is_compatible_string_type +{ + static constexpr auto value = + is_constructible::value; +}; + +template +struct is_constructible_string_type +{ + // launder type through decltype() to fix compilation failure on ICPC +#ifdef __INTEL_COMPILER + using laundered_type = decltype(std::declval()); +#else + using laundered_type = ConstructibleStringType; +#endif + + static constexpr auto value = + conjunction < + is_constructible, + is_detected_exact>::value; +}; + +template +struct is_compatible_array_type_impl : std::false_type {}; + +template +struct is_compatible_array_type_impl < + BasicJsonType, CompatibleArrayType, + enable_if_t < + is_detected::value&& + is_iterator_traits>>::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 + !std::is_same>::value >> +{ + static constexpr bool value = + is_constructible>::value; +}; + +template +struct is_compatible_array_type + : is_compatible_array_type_impl {}; + +template +struct is_constructible_array_type_impl : std::false_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t::value >> + : std::true_type {}; + +template +struct is_constructible_array_type_impl < + BasicJsonType, ConstructibleArrayType, + enable_if_t < !std::is_same::value&& + !is_compatible_string_type::value&& + is_default_constructible::value&& +(std::is_move_assignable::value || + std::is_copy_assignable::value)&& +is_detected::value&& +is_iterator_traits>>::value&& +is_detected::value&& +// special case for types like std::filesystem::path whose iterator's value_type are themselves +// c.f. https://github.com/nlohmann/json/pull/3073 +!std::is_same>::value&& + is_complete_type < + detected_t>::value >> +{ + using value_type = range_value_t; + + static constexpr bool value = + std::is_same::value || + has_from_json::value || + has_non_default_from_json < + BasicJsonType, + value_type >::value; +}; + +template +struct is_constructible_array_type + : is_constructible_array_type_impl {}; + +template +struct is_compatible_integer_type_impl : std::false_type {}; + +template +struct is_compatible_integer_type_impl < + RealIntegerType, CompatibleNumberIntegerType, + enable_if_t < std::is_integral::value&& + std::is_integral::value&& + !std::is_same::value >> +{ + // is there an assert somewhere on overflows? + using RealLimits = std::numeric_limits; + using CompatibleLimits = std::numeric_limits; + + static constexpr auto value = + is_constructible::value && + CompatibleLimits::is_integer && + RealLimits::is_signed == CompatibleLimits::is_signed; +}; + +template +struct is_compatible_integer_type + : is_compatible_integer_type_impl {}; + +template +struct is_compatible_type_impl: std::false_type {}; + +template +struct is_compatible_type_impl < + BasicJsonType, CompatibleType, + enable_if_t::value >> +{ + static constexpr bool value = + has_to_json::value; +}; + +template +struct is_compatible_type + : is_compatible_type_impl {}; + +template +struct is_constructible_tuple : std::false_type {}; + +template +struct is_constructible_tuple> : conjunction...> {}; + +template +struct is_json_iterator_of : std::false_type {}; + +template +struct is_json_iterator_of : std::true_type {}; + +template +struct is_json_iterator_of : std::true_type +{}; + +// checks if a given type T is a template specialization of Primary +template