mjtaheri11
diff --git a/‎examples/SimpleTracker-caffe-model-mobilenetSSD.ipynb‎
Lines changed: 265 additions & 0 deletions b/‎examples/SimpleTracker-caffe-model-mobilenetSSD.ipynb‎
Lines changed: 265 additions & 0 deletions
@@ -0,0 +1,265 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import cv2 as cv\n",
+    "from motrackers import SimpleTracker\n",
+    "from motrackers.utils import select_caffemodel, select_videofile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1d3c3a3a86324074b3461a40bda983ea",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FileChooser(path='..', filename='', show_hidden='False')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "46b604d3ae4d4853aa0a644f9edfe463",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FileChooser(path='..', filename='', show_hidden='False')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9f31f72dfb7446b29e258b97fa8f1a2f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "FileChooser(path='..', filename='', show_hidden='False')"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "video_file = select_videofile('..')\n",
+    "prototxt, weights = select_caffemodel('..')\n",
+    "display(video_file, prototxt, weights)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "video = video_file.selected"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = {\"prototxt\": prototxt.selected,\n",
+    "         \"weights\": weights.selected,\n",
+    "         \"object_names\": {0: 'background', \n",
+    "                          1: 'aeroplane', \n",
+    "                          2: 'bicycle', \n",
+    "                          3: 'bird',\n",
+    "                          4: 'boat',\n",
+    "                          5: 'bottle',\n",
+    "                          6: 'bus', \n",
+    "                          7: 'car', \n",
+    "                          8: 'cat', \n",
+    "                          9: 'chair',\n",
+    "                          10: 'cow', \n",
+    "                          11: 'diningtable', \n",
+    "                          12: 'dog', \n",
+    "                          13: 'horse',\n",
+    "                          14: 'motorbike', \n",
+    "                          15: 'person', \n",
+    "                          16: 'pottedplant',\n",
+    "                          17: 'sheep', \n",
+    "                          18: 'sofa', \n",
+    "                          19: 'train',\n",
+    "                          20: 'tvmonitor'},\n",
+    "         \"threshold\": 0.2,\n",
+    "         \"confidence_threshold\": 0.2,\n",
+    "         \"pixel_std\":1/127.5,\n",
+    "         \"pixel_mean\": 127.5,\n",
+    "         \"input_size\": (300, 300)\n",
+    "        }\n",
+    "\n",
+    "max_object_lost_count = 5   # maximum number of object losts counted when the object is being tracked\n",
+    "\n",
+    "np.random.seed(12345)\n",
+    "bbox_colors = {key: np.random.randint(0, 255, size=(3,)).tolist() for key in model['object_names'].keys()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cap = cv.VideoCapture(video)\n",
+    "net = cv.dnn.readNetFromCaffe(model[\"prototxt\"], model[\"weights\"])\n",
+    "tracker = SimpleTracker(max_lost=max_object_lost_count)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "(H, W) = (None, None)\n",
+    "writer = None\n",
+    "\n",
+    "while True:\n",
+    "    ok, image = cap.read()\n",
+    "    \n",
+    "    if not ok:\n",
+    "        print(\"Cannot read the video feed.\")\n",
+    "        break\n",
+    "    \n",
+    "    if W is None or H is None: \n",
+    "        (H, W) = image.shape[:2]\n",
+    "    \n",
+    "    image_resized = cv.resize(image, model[\"input_size\"])\n",
+    "\n",
+    "    blob = cv.dnn.blobFromImage(image_resized, \n",
+    "                                 model[\"pixel_std\"], \n",
+    "                                 model[\"input_size\"], \n",
+    "                                 (model[\"pixel_mean\"], model[\"pixel_mean\"], model[\"pixel_mean\"]), \n",
+    "                                 False)\n",
+    "\n",
+    "    net.setInput(blob)\n",
+    "    detections = net.forward()\n",
+    "\n",
+    "    rows = image_resized.shape[0]\n",
+    "    cols = image_resized.shape[1]\n",
+    "    \n",
+    "    boxes, confidences, classIDs, detections_bbox = [], [], [], []\n",
+    "\n",
+    "    for i in range(detections.shape[2]):\n",
+    "        confidence = detections[0, 0, i, 2]\n",
+    "        if confidence > model['confidence_threshold']:\n",
+    "            class_id = int(detections[0, 0, i, 1])\n",
+    "\n",
+    "            # object location \n",
+    "            left = int(detections[0, 0, i, 3] * cols) \n",
+    "            top = int(detections[0, 0, i, 4] * rows)\n",
+    "            right = int(detections[0, 0, i, 5] * cols)\n",
+    "            bottom = int(detections[0, 0, i, 6] * rows)\n",
+    "            \n",
+    "            # scaling factor of image\n",
+    "            height_factor = image.shape[0]/float(model[\"input_size\"][0])\n",
+    "            width_factor = image.shape[1]/float(model[\"input_size\"][1])\n",
+    "            \n",
+    "            # scale object detection bounding box to original image\n",
+    "            left = int(width_factor * left) \n",
+    "            top = int(height_factor * top)\n",
+    "            right = int(width_factor * right)\n",
+    "            bottom = int(height_factor * bottom)\n",
+    "            \n",
+    "            width, height = right - left, bottom-top\n",
+    "            \n",
+    "            boxes.append([left, top, width, height])\n",
+    "            confidences.append(float(confidence))\n",
+    "            classIDs.append(int(class_id))\n",
+    "        \n",
+    "    indices = cv.dnn.NMSBoxes(boxes, confidences, model[\"confidence_threshold\"], model[\"threshold\"])\n",
+    "    \n",
+    "    if len(indices)>0:\n",
+    "        for i in indices.flatten():\n",
+    "            x, y, w, h = boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]\n",
+    "            \n",
+    "            detections_bbox.append((x, y, x+w, y+h))\n",
+    "            \n",
+    "            clr = [int(c) for c in bbox_colors[classIDs[i]]]\n",
+    "            cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)\n",
+    "            \n",
+    "            label = \"{}:{:.4f}\".format(model[\"object_names\"][classIDs[i]], confidences[i])\n",
+    "            (label_width, label_height), baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 2)\n",
+    "            y_label = max(y, label_height)\n",
+    "            cv.rectangle(image, (x, y_label-label_height),\n",
+    "                                 (x+label_width, y_label+baseLine), (255, 255, 255), cv.FILLED)\n",
+    "            cv.putText(image, label, (x, y_label), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)\n",
+    "        \n",
+    "    objects = tracker.update(detections_bbox)\n",
+    "    \n",
+    "    for (objectID, centroid) in objects.items():\n",
+    "        text = \"ID {}\".format(objectID)\n",
+    "        cv.putText(image, text, (centroid[0] - 10, centroid[1] - 10), cv.FONT_HERSHEY_SIMPLEX,\n",
+    "                    0.5, (0, 255, 0), 2)\n",
+    "        cv.circle(image, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)\n",
+    "        \n",
+    "    cv.imshow(\"image\", image)\n",
+    "    \n",
+    "    if cv.waitKey(1) & 0xFF == ord('q'):\n",
+    "        break\n",
+    "        \n",
+    "    if writer is None:\n",
+    "        fourcc = cv.VideoWriter_fourcc(*\"MJPG\")\n",
+    "        writer = cv.VideoWriter(\"output.avi\", fourcc, 30, (W, H), True)\n",
+    "    writer.write(image)\n",
+    "\n",
+    "writer.release()\n",
+    "cap.release()\n",
+    "cv.destroyWindow(\"image\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}