Bug Fix

adipandas · adipandas · commit 3a584b58a81a · 2020-07-21T22:07:17.000-04:00
diff --git a/examples/IOUTracker-caffe-model-mobilenetSSD.ipynb b/examples/IOUTracker-caffe-model-mobilenetSSD.ipynb
@@ -0,0 +1,301 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import cv2 as cv\n",
+    "from motrackers import IOUTracker\n",
+    "from motrackers.utils import select_caffemodel, select_videofile"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Setup \n",
+    "\n",
+    "#### Choose file paths\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# video_file = select_videofile('..')\n",
+    "# prototxt, weights = select_caffemodel('..')\n",
+    "# display(video_file, prototxt, weights)\n",
+    "\n",
+    "video_file = \"/home/adi/git_clones/multi-object-tracker/video_data/people.mp4\"\n",
+    "prototxt = \"/home/adi/git_clones/multi-object-tracker/pretrained_models/caffemodel_weights/MobileNetSSD_deploy.prototxt\" \n",
+    "weights = \"/home/adi/git_clones/multi-object-tracker/pretrained_models/caffemodel_weights/MobileNetSSD_deploy.caffemodel\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Set hyper parameters and constants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print_tracks = False   # print the tracker output if True\n",
+    "\n",
+    "video = video_file\n",
+    "\n",
+    "model = {\"prototxt\": prototxt,\n",
+    "         \"weights\": weights,\n",
+    "         \"object_names\": {0: 'background', \n",
+    "                          1: 'aeroplane', \n",
+    "                          2: 'bicycle', \n",
+    "                          3: 'bird',\n",
+    "                          4: 'boat',\n",
+    "                          5: 'bottle',\n",
+    "                          6: 'bus', \n",
+    "                          7: 'car', \n",
+    "                          8: 'cat', \n",
+    "                          9: 'chair',\n",
+    "                          10: 'cow', \n",
+    "                          11: 'diningtable', \n",
+    "                          12: 'dog', \n",
+    "                          13: 'horse',\n",
+    "                          14: 'motorbike', \n",
+    "                          15: 'person', \n",
+    "                          16: 'pottedplant',\n",
+    "                          17: 'sheep', \n",
+    "                          18: 'sofa', \n",
+    "                          19: 'train',\n",
+    "                          20: 'tvmonitor'},\n",
+    "         \"threshold\": 0.2,\n",
+    "         \"confidence_threshold\": 0.2,\n",
+    "         \"pixel_std\":1/127.5,\n",
+    "         \"pixel_mean\": 127.5,\n",
+    "         \"input_size\": (300, 300)\n",
+    "        }\n",
+    "\n",
+    "max_object_lost_count = 5   # maximum number of object losts counted when the object is being tracked\n",
+    "\n",
+    "np.random.seed(12345)\n",
+    "bbox_colors = {key: np.random.randint(0, 255, size=(3,)).tolist() for key in model['object_names'].keys()}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize\n",
+    "* Initialize video\n",
+    "* Initialize network\n",
+    "* Initialize tracker"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cap = cv.VideoCapture(video)\n",
+    "net = cv.dnn.readNetFromCaffe(model[\"prototxt\"], model[\"weights\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tracker = IOUTracker(max_lost=2, \n",
+    "                     iou_threshold=0.5, \n",
+    "                     min_detection_confidence=0.7,\n",
+    "                     max_detection_confidence=0.9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cannot read the video feed.\n"
+     ]
+    }
+   ],
+   "source": [
+    "(H, W) = (None, None)\n",
+    "writer = None\n",
+    "\n",
+    "while True:\n",
+    "    ok, image = cap.read()\n",
+    "    \n",
+    "    if not ok:\n",
+    "        print(\"Cannot read the video feed.\")\n",
+    "        break\n",
+    "    \n",
+    "    if W is None or H is None: \n",
+    "        (H, W) = image.shape[:2]\n",
+    "    \n",
+    "    image_resized = cv.resize(image, model[\"input_size\"])\n",
+    "\n",
+    "    blob = cv.dnn.blobFromImage(\n",
+    "        image_resized, model[\"pixel_std\"], model[\"input_size\"], \n",
+    "        (model[\"pixel_mean\"], model[\"pixel_mean\"], model[\"pixel_mean\"]), False\n",
+    "    )\n",
+    "    net.setInput(blob)\n",
+    "    detections = net.forward()\n",
+    "\n",
+    "    rows = image_resized.shape[0]\n",
+    "    cols = image_resized.shape[1]\n",
+    "    \n",
+    "    boxes, confidences, classIDs, detections_bbox = [], [], [], []\n",
+    "\n",
+    "    for i in range(detections.shape[2]):\n",
+    "        confidence = detections[0, 0, i, 2]\n",
+    "        if confidence > model['confidence_threshold']:\n",
+    "            class_id = int(detections[0, 0, i, 1])\n",
+    "\n",
+    "            # object location \n",
+    "            left = int(detections[0, 0, i, 3] * cols) \n",
+    "            top = int(detections[0, 0, i, 4] * rows)\n",
+    "            right = int(detections[0, 0, i, 5] * cols)\n",
+    "            bottom = int(detections[0, 0, i, 6] * rows)\n",
+    "            \n",
+    "            # scaling factor of image\n",
+    "            height_factor = image.shape[0]/float(model[\"input_size\"][0])\n",
+    "            width_factor = image.shape[1]/float(model[\"input_size\"][1])\n",
+    "            \n",
+    "            # scale object detection bounding box to original image\n",
+    "            left = int(width_factor * left) \n",
+    "            top = int(height_factor * top)\n",
+    "            \n",
+    "            right = int(width_factor * right)\n",
+    "            bottom = int(height_factor * bottom)\n",
+    "            \n",
+    "            width = right - left\n",
+    "            height = bottom - top\n",
+    "            \n",
+    "            boxes.append([left, top, width, height])\n",
+    "            confidences.append(float(confidence))\n",
+    "            classIDs.append(int(class_id))\n",
+    "        \n",
+    "    indices = cv.dnn.NMSBoxes(boxes, confidences, model[\"confidence_threshold\"], model[\"threshold\"])\n",
+    "    \n",
+    "    class_ids_to_track = []\n",
+    "    confidences_track = []\n",
+    "    if len(indices)>0:\n",
+    "        for i in indices.flatten():\n",
+    "            x, y, w, h = boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]\n",
+    "            \n",
+    "            detections_bbox.append((x, y, x+w, y+h))\n",
+    "            class_ids_to_track.append(classIDs[i])\n",
+    "            \n",
+    "            clr = [int(c) for c in bbox_colors[classIDs[i]]]\n",
+    "            cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)\n",
+    "            \n",
+    "            confidences_track.append(confidences[i])\n",
+    "            label = \"{}:{:.4f}\".format(model[\"object_names\"][classIDs[i]], confidences[i])\n",
+    "            \n",
+    "            (label_width, label_height), baseLine = cv.getTextSize(\n",
+    "                label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 2)\n",
+    "            \n",
+    "            y_label = max(y, label_height)\n",
+    "            \n",
+    "            cv.rectangle(image, \n",
+    "                         (x, y_label-label_height),\n",
+    "                         (x+label_width, y_label+baseLine), \n",
+    "                         (255, 255, 255), \n",
+    "                         cv.FILLED)\n",
+    "            \n",
+    "            cv.putText(image, label, (x, y_label), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)\n",
+    "        \n",
+    "    tracks = tracker.update(detections_bbox, class_ids_to_track, confidences_track)\n",
+    "    \n",
+    "    if print_tracks:\n",
+    "        print(\"Tracker Outputs:\\n\", tracks, \"\\n\")\n",
+    "    \n",
+    "    for track in tracks:\n",
+    "        track_count, trackid, class_id, centroid, track_bbox, track_info = track\n",
+    "        \n",
+    "        text = \"ID {}\".format(trackid)\n",
+    "        \n",
+    "        cv.putText(image, \n",
+    "                   text, \n",
+    "                   (centroid[0] - 10, centroid[1] - 10),\n",
+    "                   cv.FONT_HERSHEY_SIMPLEX,\n",
+    "                   0.5, \n",
+    "                   (0, 255, 0), \n",
+    "                   2)\n",
+    "        \n",
+    "        cv.circle(image, \n",
+    "                  (centroid[0], centroid[1]), \n",
+    "                  4, \n",
+    "                  (0, 255, 0), \n",
+    "                  -1)\n",
+    "    \n",
+    "    cv.imshow(\"image\", image)\n",
+    "    \n",
+    "    if cv.waitKey(1) & 0xFF == ord('q'):\n",
+    "        break\n",
+    "        \n",
+    "    if writer is None:\n",
+    "        fourcc = cv.VideoWriter_fourcc(*\"MJPG\")\n",
+    "        writer = cv.VideoWriter(\"output.avi\", fourcc, 30, (W, H), True)\n",
+    "    writer.write(image)\n",
+    "\n",
+    "writer.release()\n",
+    "cap.release()\n",
+    "cv.destroyWindow(\"image\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/motrackers/iou_tracker.py b/motrackers/iou_tracker.py
@@ -54,37 +54,30 @@ def update(self, bboxes: list, class_ids: list, detection_scores: list):
         new_bboxes = np.array(bboxes, dtype='int')
         new_class_ids = np.array(class_ids, dtype='int')
         new_detection_scores = np.array(detection_scores)
-
         new_centroids = get_centroids(new_bboxes)
 
-        new_detections = list(zip(
-            range(len(bboxes)), new_bboxes, new_class_ids, new_centroids, new_detection_scores
-        ))
-
+        new_detections = list(zip(new_bboxes, new_class_ids, new_centroids, new_detection_scores))
         track_ids = list(self.tracks.keys())
 
         updated_tracks = []
         for track_id in track_ids:
             if len(new_detections) > 0:
-                idx, bb, cid, ctrd, scr = max(new_detections, key=lambda x: iou(self.tracks[track_id].bbox, x[1]))
+                idx, best_match = max(enumerate(new_detections), key=lambda x: iou(self.tracks[track_id].bbox, x[1][0]))
+                (bb, cid, ctrd, scr) = best_match
 
-                if iou(self.tracks[track_id].bbox, bb) > self.iou_threshold and self.tracks[track_id].class_id == cid:
+                if iou(self.tracks[track_id].bbox, bb) > self.iou_threshold:
                     max_score = max(self.tracks[track_id].info['max_score'], scr)
                     self._update_track(track_id, ctrd, bb, score=scr, max_score=max_score)
-
                     updated_tracks.append(track_id)
-
                     del new_detections[idx]
 
             if len(updated_tracks) == 0 or track_id is not updated_tracks[-1]:
                 self.tracks[track_id].lost += 1
-
-                if self.tracks[track_id].lost > self.max_lost and \
-                        self.tracks[track_id].info['max_score'] >= self.max_detection_confidence:
+                if self.tracks[track_id].lost > self.max_lost:
                     self._remove_track(track_id)
 
-        for idx, bb, cid, ctrd, scr in new_detections:
-            self._add_track(ctrd, bb, cid, score=scr, max_score=scr)
+        for bb_, cid_, ctrd_, scr_ in new_detections:
+            self._add_track(ctrd_, bb_, cid_, score=scr_, max_score=scr_)
 
         outputs = self._get_tracks(self.tracks)
         return outputs