Skip to content

Commit 3a584b5

Browse files
committed
Bug Fix
1 parent 1e1bb74 commit 3a584b5

File tree

2 files changed

+308
-14
lines changed

2 files changed

+308
-14
lines changed
Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import numpy as np\n",
10+
"import cv2 as cv\n",
11+
"from motrackers import IOUTracker\n",
12+
"from motrackers.utils import select_caffemodel, select_videofile"
13+
]
14+
},
15+
{
16+
"cell_type": "markdown",
17+
"metadata": {},
18+
"source": [
19+
"### Setup \n",
20+
"\n",
21+
"#### Choose file paths\n"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": 2,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"# video_file = select_videofile('..')\n",
31+
"# prototxt, weights = select_caffemodel('..')\n",
32+
"# display(video_file, prototxt, weights)\n",
33+
"\n",
34+
"video_file = \"/home/adi/git_clones/multi-object-tracker/video_data/people.mp4\"\n",
35+
"prototxt = \"/home/adi/git_clones/multi-object-tracker/pretrained_models/caffemodel_weights/MobileNetSSD_deploy.prototxt\" \n",
36+
"weights = \"/home/adi/git_clones/multi-object-tracker/pretrained_models/caffemodel_weights/MobileNetSSD_deploy.caffemodel\""
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"#### Set hyper parameters and constants"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": 3,
49+
"metadata": {},
50+
"outputs": [],
51+
"source": [
52+
"print_tracks = False # print the tracker output if True\n",
53+
"\n",
54+
"video = video_file\n",
55+
"\n",
56+
"model = {\"prototxt\": prototxt,\n",
57+
" \"weights\": weights,\n",
58+
" \"object_names\": {0: 'background', \n",
59+
" 1: 'aeroplane', \n",
60+
" 2: 'bicycle', \n",
61+
" 3: 'bird',\n",
62+
" 4: 'boat',\n",
63+
" 5: 'bottle',\n",
64+
" 6: 'bus', \n",
65+
" 7: 'car', \n",
66+
" 8: 'cat', \n",
67+
" 9: 'chair',\n",
68+
" 10: 'cow', \n",
69+
" 11: 'diningtable', \n",
70+
" 12: 'dog', \n",
71+
" 13: 'horse',\n",
72+
" 14: 'motorbike', \n",
73+
" 15: 'person', \n",
74+
" 16: 'pottedplant',\n",
75+
" 17: 'sheep', \n",
76+
" 18: 'sofa', \n",
77+
" 19: 'train',\n",
78+
" 20: 'tvmonitor'},\n",
79+
" \"threshold\": 0.2,\n",
80+
" \"confidence_threshold\": 0.2,\n",
81+
" \"pixel_std\":1/127.5,\n",
82+
" \"pixel_mean\": 127.5,\n",
83+
" \"input_size\": (300, 300)\n",
84+
" }\n",
85+
"\n",
86+
"max_object_lost_count = 5 # maximum number of object losts counted when the object is being tracked\n",
87+
"\n",
88+
"np.random.seed(12345)\n",
89+
"bbox_colors = {key: np.random.randint(0, 255, size=(3,)).tolist() for key in model['object_names'].keys()}"
90+
]
91+
},
92+
{
93+
"cell_type": "markdown",
94+
"metadata": {},
95+
"source": [
96+
"### Initialize\n",
97+
"* Initialize video\n",
98+
"* Initialize network\n",
99+
"* Initialize tracker"
100+
]
101+
},
102+
{
103+
"cell_type": "code",
104+
"execution_count": 4,
105+
"metadata": {},
106+
"outputs": [],
107+
"source": [
108+
"cap = cv.VideoCapture(video)\n",
109+
"net = cv.dnn.readNetFromCaffe(model[\"prototxt\"], model[\"weights\"])"
110+
]
111+
},
112+
{
113+
"cell_type": "code",
114+
"execution_count": 5,
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"tracker = IOUTracker(max_lost=2, \n",
119+
" iou_threshold=0.5, \n",
120+
" min_detection_confidence=0.7,\n",
121+
" max_detection_confidence=0.9)"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 6,
127+
"metadata": {
128+
"scrolled": false
129+
},
130+
"outputs": [
131+
{
132+
"name": "stdout",
133+
"output_type": "stream",
134+
"text": [
135+
"Cannot read the video feed.\n"
136+
]
137+
}
138+
],
139+
"source": [
140+
"(H, W) = (None, None)\n",
141+
"writer = None\n",
142+
"\n",
143+
"while True:\n",
144+
" ok, image = cap.read()\n",
145+
" \n",
146+
" if not ok:\n",
147+
" print(\"Cannot read the video feed.\")\n",
148+
" break\n",
149+
" \n",
150+
" if W is None or H is None: \n",
151+
" (H, W) = image.shape[:2]\n",
152+
" \n",
153+
" image_resized = cv.resize(image, model[\"input_size\"])\n",
154+
"\n",
155+
" blob = cv.dnn.blobFromImage(\n",
156+
" image_resized, model[\"pixel_std\"], model[\"input_size\"], \n",
157+
" (model[\"pixel_mean\"], model[\"pixel_mean\"], model[\"pixel_mean\"]), False\n",
158+
" )\n",
159+
" net.setInput(blob)\n",
160+
" detections = net.forward()\n",
161+
"\n",
162+
" rows = image_resized.shape[0]\n",
163+
" cols = image_resized.shape[1]\n",
164+
" \n",
165+
" boxes, confidences, classIDs, detections_bbox = [], [], [], []\n",
166+
"\n",
167+
" for i in range(detections.shape[2]):\n",
168+
" confidence = detections[0, 0, i, 2]\n",
169+
" if confidence > model['confidence_threshold']:\n",
170+
" class_id = int(detections[0, 0, i, 1])\n",
171+
"\n",
172+
" # object location \n",
173+
" left = int(detections[0, 0, i, 3] * cols) \n",
174+
" top = int(detections[0, 0, i, 4] * rows)\n",
175+
" right = int(detections[0, 0, i, 5] * cols)\n",
176+
" bottom = int(detections[0, 0, i, 6] * rows)\n",
177+
" \n",
178+
" # scaling factor of image\n",
179+
" height_factor = image.shape[0]/float(model[\"input_size\"][0])\n",
180+
" width_factor = image.shape[1]/float(model[\"input_size\"][1])\n",
181+
" \n",
182+
" # scale object detection bounding box to original image\n",
183+
" left = int(width_factor * left) \n",
184+
" top = int(height_factor * top)\n",
185+
" \n",
186+
" right = int(width_factor * right)\n",
187+
" bottom = int(height_factor * bottom)\n",
188+
" \n",
189+
" width = right - left\n",
190+
" height = bottom - top\n",
191+
" \n",
192+
" boxes.append([left, top, width, height])\n",
193+
" confidences.append(float(confidence))\n",
194+
" classIDs.append(int(class_id))\n",
195+
" \n",
196+
" indices = cv.dnn.NMSBoxes(boxes, confidences, model[\"confidence_threshold\"], model[\"threshold\"])\n",
197+
" \n",
198+
" class_ids_to_track = []\n",
199+
" confidences_track = []\n",
200+
" if len(indices)>0:\n",
201+
" for i in indices.flatten():\n",
202+
" x, y, w, h = boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]\n",
203+
" \n",
204+
" detections_bbox.append((x, y, x+w, y+h))\n",
205+
" class_ids_to_track.append(classIDs[i])\n",
206+
" \n",
207+
" clr = [int(c) for c in bbox_colors[classIDs[i]]]\n",
208+
" cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)\n",
209+
" \n",
210+
" confidences_track.append(confidences[i])\n",
211+
" label = \"{}:{:.4f}\".format(model[\"object_names\"][classIDs[i]], confidences[i])\n",
212+
" \n",
213+
" (label_width, label_height), baseLine = cv.getTextSize(\n",
214+
" label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 2)\n",
215+
" \n",
216+
" y_label = max(y, label_height)\n",
217+
" \n",
218+
" cv.rectangle(image, \n",
219+
" (x, y_label-label_height),\n",
220+
" (x+label_width, y_label+baseLine), \n",
221+
" (255, 255, 255), \n",
222+
" cv.FILLED)\n",
223+
" \n",
224+
" cv.putText(image, label, (x, y_label), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)\n",
225+
" \n",
226+
" tracks = tracker.update(detections_bbox, class_ids_to_track, confidences_track)\n",
227+
" \n",
228+
" if print_tracks:\n",
229+
" print(\"Tracker Outputs:\\n\", tracks, \"\\n\")\n",
230+
" \n",
231+
" for track in tracks:\n",
232+
" track_count, trackid, class_id, centroid, track_bbox, track_info = track\n",
233+
" \n",
234+
" text = \"ID {}\".format(trackid)\n",
235+
" \n",
236+
" cv.putText(image, \n",
237+
" text, \n",
238+
" (centroid[0] - 10, centroid[1] - 10),\n",
239+
" cv.FONT_HERSHEY_SIMPLEX,\n",
240+
" 0.5, \n",
241+
" (0, 255, 0), \n",
242+
" 2)\n",
243+
" \n",
244+
" cv.circle(image, \n",
245+
" (centroid[0], centroid[1]), \n",
246+
" 4, \n",
247+
" (0, 255, 0), \n",
248+
" -1)\n",
249+
" \n",
250+
" cv.imshow(\"image\", image)\n",
251+
" \n",
252+
" if cv.waitKey(1) & 0xFF == ord('q'):\n",
253+
" break\n",
254+
" \n",
255+
" if writer is None:\n",
256+
" fourcc = cv.VideoWriter_fourcc(*\"MJPG\")\n",
257+
" writer = cv.VideoWriter(\"output.avi\", fourcc, 30, (W, H), True)\n",
258+
" writer.write(image)\n",
259+
"\n",
260+
"writer.release()\n",
261+
"cap.release()\n",
262+
"cv.destroyWindow(\"image\")"
263+
]
264+
},
265+
{
266+
"cell_type": "code",
267+
"execution_count": null,
268+
"metadata": {},
269+
"outputs": [],
270+
"source": []
271+
},
272+
{
273+
"cell_type": "code",
274+
"execution_count": null,
275+
"metadata": {},
276+
"outputs": [],
277+
"source": []
278+
}
279+
],
280+
"metadata": {
281+
"kernelspec": {
282+
"display_name": "Python 3",
283+
"language": "python",
284+
"name": "python3"
285+
},
286+
"language_info": {
287+
"codemirror_mode": {
288+
"name": "ipython",
289+
"version": 3
290+
},
291+
"file_extension": ".py",
292+
"mimetype": "text/x-python",
293+
"name": "python",
294+
"nbconvert_exporter": "python",
295+
"pygments_lexer": "ipython3",
296+
"version": "3.6.9"
297+
}
298+
},
299+
"nbformat": 4,
300+
"nbformat_minor": 2
301+
}

motrackers/iou_tracker.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -54,37 +54,30 @@ def update(self, bboxes: list, class_ids: list, detection_scores: list):
5454
new_bboxes = np.array(bboxes, dtype='int')
5555
new_class_ids = np.array(class_ids, dtype='int')
5656
new_detection_scores = np.array(detection_scores)
57-
5857
new_centroids = get_centroids(new_bboxes)
5958

60-
new_detections = list(zip(
61-
range(len(bboxes)), new_bboxes, new_class_ids, new_centroids, new_detection_scores
62-
))
63-
59+
new_detections = list(zip(new_bboxes, new_class_ids, new_centroids, new_detection_scores))
6460
track_ids = list(self.tracks.keys())
6561

6662
updated_tracks = []
6763
for track_id in track_ids:
6864
if len(new_detections) > 0:
69-
idx, bb, cid, ctrd, scr = max(new_detections, key=lambda x: iou(self.tracks[track_id].bbox, x[1]))
65+
idx, best_match = max(enumerate(new_detections), key=lambda x: iou(self.tracks[track_id].bbox, x[1][0]))
66+
(bb, cid, ctrd, scr) = best_match
7067

71-
if iou(self.tracks[track_id].bbox, bb) > self.iou_threshold and self.tracks[track_id].class_id == cid:
68+
if iou(self.tracks[track_id].bbox, bb) > self.iou_threshold:
7269
max_score = max(self.tracks[track_id].info['max_score'], scr)
7370
self._update_track(track_id, ctrd, bb, score=scr, max_score=max_score)
74-
7571
updated_tracks.append(track_id)
76-
7772
del new_detections[idx]
7873

7974
if len(updated_tracks) == 0 or track_id is not updated_tracks[-1]:
8075
self.tracks[track_id].lost += 1
81-
82-
if self.tracks[track_id].lost > self.max_lost and \
83-
self.tracks[track_id].info['max_score'] >= self.max_detection_confidence:
76+
if self.tracks[track_id].lost > self.max_lost:
8477
self._remove_track(track_id)
8578

86-
for idx, bb, cid, ctrd, scr in new_detections:
87-
self._add_track(ctrd, bb, cid, score=scr, max_score=scr)
79+
for bb_, cid_, ctrd_, scr_ in new_detections:
80+
self._add_track(ctrd_, bb_, cid_, score=scr_, max_score=scr_)
8881

8982
outputs = self._get_tracks(self.tracks)
9083
return outputs

0 commit comments

Comments
 (0)