From 0afb41053009c9a719670dbd2ab8c2ecb60b5b64 Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Thu, 24 Oct 2024 10:54:10 +0800 Subject: [PATCH 1/7] add hybrid sort and fix errors of OC sort --- README.md | 13 +- README_CN.md | 6 + tracker/track.py | 2 + tracker/trackers/basetrack.py | 6 +- tracker/trackers/hybridsort_tracker.py | 252 ++++++++++++++++++ .../kalman_filters/hybridsort_kalman.py | 144 ++++++++++ .../trackers/kalman_filters/ocsort_kalman.py | 1 - tracker/trackers/matching.py | 234 +++++++++++----- tracker/trackers/ocsort_tracker.py | 35 ++- tracker/trackers/tracklet.py | 197 +++++++++++++- 10 files changed, 794 insertions(+), 96 deletions(-) create mode 100644 tracker/trackers/hybridsort_tracker.py create mode 100644 tracker/trackers/kalman_filters/hybridsort_kalman.py diff --git a/README.md b/README.md index 17756c7..86e6a5b 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,10 @@ git checkout v2 # change to v2 branch !! +## 🗺️ Latest News + +- ***2024.10.24*** Add Hybrid SORT and fix some errors and bugs of OC-SORT. + ## ❤️ Introduction This repo is a toolbox that implements the **tracking-by-detection paradigm multi-object tracker**. The detector supports: @@ -38,6 +42,7 @@ and the tracker supports: - Strong SORT ([IEEE TMM 2023](https://arxiv.org/pdf/2202.13514)) - Sparse Track ([arxiv 2306](https://arxiv.org/pdf/2306.05238)) - UCMC Track ([AAAI 2024](http://arxiv.org/abs/2312.08952)) +- Hybrid SORT([AAAI 2024](https://ojs.aaai.org/index.php/AAAI/article/view/28471)) and the reid model supports: @@ -51,10 +56,6 @@ The highlights are: ![gif](figure/demo.gif) -## 🗺️ Roadmap - -- [ x ] Add UCMC Track -- [] Add more ReID modules. ## 🔨 Installation @@ -178,7 +179,7 @@ For example: - ByteTrack: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` -- OCSort: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker ocsort --kalman_format ocsort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- OCSort: `python tracker/track.py --dataset mot17 --detector yolox --tracker ocsort --kalman_format ocsort --detector_model_path weights/bytetrack_m_mot17.pth.tar` - C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` @@ -190,6 +191,8 @@ For example: - UCMC Track: `python tracker/track.py --dataset mot17 --detector yolox --tracker ucmctrack --kalman_format ucmc --detector_model_path weights/bytetrack_m_mot17.pth.tar --camera_parameter_folder ./tracker/cam_param_files` +- Hybrid SORT: `python tracker/track.py --dataset mot17 --detector yolox --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/bytetrack_m_mot17.pth.tar --save_images` + > **Important notes for UCMC Track:** > > 1. Camera parameters. The UCMC Track need the intrinsic and extrinsic parameter of camera. Please organize like the format of `tracker/cam_param_files/uavdt/M0101.txt`. One video sequence corresponds to one txt file. If you do not have the labelled parameters, you can refer to the estimating toolbox in original repo ([https://github.com/corfyi/UCMCTrack](https://github.com/corfyi/UCMCTrack)). diff --git a/README_CN.md b/README_CN.md index b20b3b8..483c5f8 100644 --- a/README_CN.md +++ b/README_CN.md @@ -13,6 +13,9 @@ git checkout v2 # change to v2 branch !! 🙌 ***如果您有任何关于添加跟踪器的建议***,请在Issues部分留言并附上论文标题或链接!欢迎大家一起来让这个repo变得更好 +## 🗺️ 最近更新 + +- ***2024.10.24*** 增加了 Hybrid SORT 并且修复了OC-SORT的一些bug和错误。 ## ❤️ 介绍 @@ -34,6 +37,7 @@ git checkout v2 # change to v2 branch !! - Strong SORT ([IEEE TMM 2023](https://arxiv.org/pdf/2202.13514)) - Sparse Track ([arxiv 2306](https://arxiv.org/pdf/2306.05238)) - UCMC Track ([AAAI 2024](http://arxiv.org/abs/2312.08952)) +- Hybrid SORT([AAAI 2024](https://ojs.aaai.org/index.php/AAAI/article/view/28471)) REID模型支持: @@ -182,6 +186,8 @@ python tracker/track.py --dataset ${dataset name, related with the yaml file} -- - UCMC Track: `python tracker/track.py --dataset mot17 --detector yolox --tracker ucmctrack --kalman_format ucmc --detector_model_path weights/bytetrack_m_mot17.pth.tar --camera_parameter_folder ./tracker/cam_param_files` +- Hybrid SORT: `python tracker/track.py --dataset mot17 --detector yolox --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/bytetrack_m_mot17.pth.tar --save_images` + >**UCMC Track的重要提示:** > > 1. 相机参数. UCMC Track需要相机的内参和外参. 请按照`tracker/cam_ram_files/uavdt/M0101.txt`的格式组织. 一个视频序列对应一个txt文件. 如果您没有标记的参数, 可以参考原始仓库中的估算工具箱([https://github.com/corfyi/UCMCTrack](https://github.com/corfyi/UCMCTrack)). diff --git a/tracker/track.py b/tracker/track.py index 45be9e0..fe102ed 100644 --- a/tracker/track.py +++ b/tracker/track.py @@ -29,6 +29,7 @@ from trackers.strongsort_tracker import StrongSortTracker from trackers.sparse_tracker import SparseTracker from trackers.ucmc_tracker import UCMCTracker +from trackers.hybridsort_tracker import HybridSortTracker # YOLOX modules try: @@ -73,6 +74,7 @@ 'strongsort': StrongSortTracker, 'sparsetrack': SparseTracker, 'ucmctrack': UCMCTracker, + 'hybridsort': HybridSortTracker } def get_args(): diff --git a/tracker/trackers/basetrack.py b/tracker/trackers/basetrack.py index 23afa9c..19f1aa5 100644 --- a/tracker/trackers/basetrack.py +++ b/tracker/trackers/basetrack.py @@ -56,11 +56,7 @@ def tlwh(self): """Get current position in bounding box format `(top left x, top left y, width, height)`. """ - if self.mean is None: - return self._tlwh.copy() - ret = self.mean[:4].copy() - ret[:2] -= ret[2:] / 2 - return ret + raise NotImplementedError # implented in class Tracklet(BaseTrack) @property def tlbr(self): diff --git a/tracker/trackers/hybridsort_tracker.py b/tracker/trackers/hybridsort_tracker.py new file mode 100644 index 0000000..711d293 --- /dev/null +++ b/tracker/trackers/hybridsort_tracker.py @@ -0,0 +1,252 @@ +""" +Hybrid Sort +""" + +import numpy as np +from collections import deque +from .basetrack import BaseTrack, TrackState +from .tracklet import Tracklet, Tracklet_w_velocity_four_corner +from .matching import * + +from cython_bbox import bbox_overlaps as bbox_ious + +class HybridSortTracker(object): + def __init__(self, args, frame_rate=30): + self.tracked_tracklets = [] # type: list[Tracklet] + self.lost_tracklets = [] # type: list[Tracklet] + self.removed_tracklets = [] # type: list[Tracklet] + + self.frame_id = 0 + self.args = args + + self.det_thresh = args.conf_thresh + 0.1 + self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) + self.max_time_lost = self.buffer_size + + self.motion = args.kalman_format + + self.delta_t = 3 + + @staticmethod + def k_previous_obs(observations, cur_age, k): + if len(observations) == 0: + return [-1, -1, -1, -1, -1] + for i in range(k): + dt = k - i + if cur_age - dt in observations: + return observations[cur_age-dt] + max_age = max(observations.keys()) + return observations[max_age] + + def update(self, output_results, img, ori_img): + """ + output_results: processed detections (scale to original size) tlbr format + """ + + self.frame_id += 1 + activated_tracklets = [] + refind_tracklets = [] + lost_tracklets = [] + removed_tracklets = [] + + scores = output_results[:, 4] + bboxes = output_results[:, :4] + categories = output_results[:, -1] + + remain_inds = scores > self.args.conf_thresh + inds_low = scores > 0.1 + inds_high = scores < self.args.conf_thresh + + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + dets = bboxes[remain_inds] + + cates = categories[remain_inds] + cates_second = categories[inds_second] + + scores_keep = scores[remain_inds] + scores_second = scores[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [Tracklet_w_velocity_four_corner(tlwh, s, cate, motion=self.motion) for + (tlwh, s, cate) in zip(dets, scores_keep, cates)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_tracklets''' + unconfirmed = [] + tracked_tracklets = [] # type: list[Tracklet] + for track in self.tracked_tracklets: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_tracklets.append(track) + + ''' Step 2: First association, Weak Cues (four corner confidence and score)''' + tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets) + + velocities = np.array( + [trk.get_velocity() for trk in tracklet_pool]) # (N, 4, 2) + + # last observation, obervation-centric + # last_boxes = np.array([trk.last_observation for trk in tracklet_pool]) + + # historical observations + k_observations = np.array( + [self.k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in tracklet_pool]) + + + # Predict the current location with Kalman + for tracklet in tracklet_pool: + tracklet.predict() + + # weak cues cost matrix (hmiou + four corner velocity) and assignment + matches, u_track, u_detection = association_weak_cues( + tracklets=tracklet_pool, detections=detections, velocities=velocities, + previous_obs=k_observations, vdc_weight=0.05) + + for itracked, idet in matches: + track = tracklet_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_tracklets.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_tracklets.append(track) + + ''' Step 3: Second association, with low score detection boxes''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [Tracklet_w_velocity_four_corner(tlwh, s, cate, motion=self.motion) for + (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)] + else: + detections_second = [] + r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] + + dists = hm_iou_distance(r_tracked_tracklets, detections_second) - score_distance(r_tracked_tracklets, detections_second) + + matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_tracklets[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_tracklets.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_tracklets.append(track) + + + '''Step 4: Third association, match high-conf remain detections with last observation of tracks''' + r_tracked_tracklets = [r_tracked_tracklets[i] for i in u_track] # remain tracklets from last step + r_detections = [detections[i] for i in u_detection] # high-conf remain detections + + dists = hm_iou_distance(atracks=[t.last_observation[: 4] for t in r_tracked_tracklets], # parse bbox directly + btracks=[d.tlbr for d in r_detections]) + + matches, u_track, u_detection = linear_assignment(dists, thresh=0.5) + + for itracked, idet in matches: + track = r_tracked_tracklets[itracked] + det = r_detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_tracklets.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_tracklets.append(track) + + # for tracks still failed, mark lost + for it in u_track: + track = r_tracked_tracklets[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_tracklets.append(track) + + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [r_detections[i] for i in u_detection] + dists = iou_distance(unconfirmed, detections) + + matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7) + + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_tracklets.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_tracklets.append(track) + + """ Step 4: Init new tracklets""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.frame_id) + activated_tracklets.append(track) + + """ Step 5: Update state""" + for track in self.lost_tracklets: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_tracklets.append(track) + + self.tracked_tracklets = [t for t in self.tracked_tracklets if t.state == TrackState.Tracked] + self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, activated_tracklets) + self.tracked_tracklets = joint_tracklets(self.tracked_tracklets, refind_tracklets) + self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.tracked_tracklets) + self.lost_tracklets.extend(lost_tracklets) + self.lost_tracklets = sub_tracklets(self.lost_tracklets, self.removed_tracklets) + self.removed_tracklets.extend(removed_tracklets) + self.tracked_tracklets, self.lost_tracklets = remove_duplicate_tracklets(self.tracked_tracklets, self.lost_tracklets) + # get scores of lost tracks + output_tracklets = [track for track in self.tracked_tracklets if track.is_activated] + + return output_tracklets + + + + +def joint_tracklets(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_tracklets(tlista, tlistb): + tracklets = {} + for t in tlista: + tracklets[t.track_id] = t + for t in tlistb: + tid = t.track_id + if tracklets.get(tid, 0): + del tracklets[tid] + return list(tracklets.values()) + + +def remove_duplicate_tracklets(trackletsa, trackletsb): + pdist = iou_distance(trackletsa, trackletsb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = trackletsa[p].frame_id - trackletsa[p].start_frame + timeq = trackletsb[q].frame_id - trackletsb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(trackletsa) if not i in dupa] + resb = [t for i, t in enumerate(trackletsb) if not i in dupb] + return resa, resb \ No newline at end of file diff --git a/tracker/trackers/kalman_filters/hybridsort_kalman.py b/tracker/trackers/kalman_filters/hybridsort_kalman.py new file mode 100644 index 0000000..751c185 --- /dev/null +++ b/tracker/trackers/kalman_filters/hybridsort_kalman.py @@ -0,0 +1,144 @@ +from .base_kalman import BaseKalman +import numpy as np +from copy import deepcopy + +class HybridSORTKalman(BaseKalman): + + def __init__(self, ): + + state_dim = 9 # [x, y, s, c, a, vx, vy, vs, vc] s: area c: confidence score + observation_dim = 5 # confidence score is additional + + F = np.eye(state_dim) + for i in range(4): + F[i, (state_dim + 1) // 2 + i] = 1 # x = x + vx, y = y + vy, s = s + vs, c = c + vc in predict step + + H = np.eye(state_dim // 2 + 1, state_dim) + + super().__init__(state_dim=state_dim, + observation_dim=observation_dim, + F=F, + H=H) + + # TODO check + # give high uncertainty to the unobservable initial velocities + self.kf.R[2:, 2:] *= 10 # [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 10, 0], [0, 0, 0, 10]] + self.kf.P[5:, 5:] *= 1000 + self.kf.P *= 10 + self.kf.Q[-1, -1] *= 0.01 # score + self.kf.Q[-2, -2] *= 0.01 + self.kf.Q[5:, 5:] *= 0.01 + + # keep all observations + self.history_obs = [] + self.attr_saved = None + self.observed = False + + def initialize(self, observation): + """ + Args: + observation: x-y-s-c-a + """ + self.kf.x = self.kf.x.flatten() + self.kf.x[:5] = observation + + + def predict(self, ): + """ predict step + + """ + + # s + vs + if (self.kf.x[7] + self.kf.x[2] <= 0): + self.kf.x[7] *= 0.0 + + self.kf.predict() + + def _freeze(self, ): + """ freeze all the param of Kalman + + """ + self.attr_saved = deepcopy(self.kf.__dict__) + + def _unfreeze(self, ): + """ when observe an lost object again, use the virtual trajectory + + """ + if self.attr_saved is not None: + new_history = deepcopy(self.history_obs) + self.kf.__dict__ = self.attr_saved + + self.history_obs = self.history_obs[:-1] + + occur = [int(d is None) for d in new_history] + indices = np.where(np.array(occur)==0)[0] + index1 = indices[-2] + index2 = indices[-1] + box1 = new_history[index1] + x1, y1, s1, c1, r1 = box1 + w1 = np.sqrt(s1 * r1) + h1 = np.sqrt(s1 / r1) + box2 = new_history[index2] + x2, y2, s2, c2, r2 = box2 + w2 = np.sqrt(s2 * r2) + h2 = np.sqrt(s2 / r2) + time_gap = index2 - index1 + dx = (x2-x1)/time_gap + dy = (y2-y1)/time_gap + dw = (w2-w1)/time_gap + dh = (h2-h1)/time_gap + dc = (c2-c1)/time_gap + + for i in range(index2 - index1): + """ + The default virtual trajectory generation is by linear + motion (constant speed hypothesis), you could modify this + part to implement your own. + """ + x = x1 + (i+1) * dx + y = y1 + (i+1) * dy + w = w1 + (i+1) * dw + h = h1 + (i+1) * dh + s = w * h + r = w / float(h) + + c = c1 + (i+1) * dc + new_box = np.array([x, y, s, c, r]).reshape((5, 1)) + """ + I still use predict-update loop here to refresh the parameters, + but this can be faster by directly modifying the internal parameters + as suggested in the paper. I keep this naive but slow way for + easy read and understanding + """ + self.kf.update(new_box) + if not i == (index2-index1-1): + self.kf.predict() + + + def update(self, z): + """ update step + + For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman + + Args: + z: observation x-y-s-a format + """ + + self.history_obs.append(z) + + if z is None: + if self.observed: + self._freeze() + self.observed = False + + self.kf.update(z) + + else: + if not self.observed: # Get observation, use online smoothing to re-update parameters + self._unfreeze() + + self.kf.update(z) + + self.observed = True + + diff --git a/tracker/trackers/kalman_filters/ocsort_kalman.py b/tracker/trackers/kalman_filters/ocsort_kalman.py index a83b5b7..6ded258 100644 --- a/tracker/trackers/kalman_filters/ocsort_kalman.py +++ b/tracker/trackers/kalman_filters/ocsort_kalman.py @@ -1,4 +1,3 @@ -from numpy.core.multiarray import zeros as zeros from .base_kalman import BaseKalman import numpy as np from copy import deepcopy diff --git a/tracker/trackers/matching.py b/tracker/trackers/matching.py index 4eb1997..6ff8147 100644 --- a/tracker/trackers/matching.py +++ b/tracker/trackers/matching.py @@ -18,34 +18,9 @@ 8: 15.507, 9: 16.919} - -def merge_matches(m1, m2, shape): - O,P,Q = shape - m1 = np.asarray(m1) - m2 = np.asarray(m2) - - M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) - M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) - - mask = M1*M2 - match = mask.nonzero() - match = list(zip(match[0], match[1])) - unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) - unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) - - return match, unmatched_O, unmatched_Q - - -def _indices_to_matches(cost_matrix, indices, thresh): - matched_cost = cost_matrix[tuple(zip(*indices))] - matched_mask = (matched_cost <= thresh) - - matches = indices[matched_mask] - unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) - unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) - - return matches, unmatched_a, unmatched_b - +""" +Some basic functions +""" def linear_assignment(cost_matrix, thresh): if cost_matrix.size == 0: @@ -90,7 +65,7 @@ def iou_distance(atracks, btracks): :rtype cost_matrix np.ndarray """ - if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): atlbrs = atracks btlbrs = btracks else: @@ -101,25 +76,6 @@ def iou_distance(atracks, btracks): return cost_matrix -def v_iou_distance(atracks, btracks): - """ - Compute cost based on IoU - :type atracks: list[STrack] - :type btracks: list[STrack] - - :rtype cost_matrix np.ndarray - """ - - if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): - atlbrs = atracks - btlbrs = btracks - else: - atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] - btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] - _ious = ious(atlbrs, btlbrs) - cost_matrix = 1 - _ious - - return cost_matrix def embedding_distance(tracks, detections, metric='cosine'): """ @@ -168,7 +124,8 @@ def fuse_iou(cost_matrix, tracks, detections): return fuse_cost -def fuse_score(cost_matrix, detections): +def fuse_det_score(cost_matrix, detections): + # weight detection score into cost matrix if cost_matrix.size == 0: return cost_matrix iou_sim = 1 - cost_matrix @@ -179,6 +136,22 @@ def fuse_score(cost_matrix, detections): return fuse_cost +def fuse_det_trk_score(cost_matrix, detections, tracks): + # weight detection and tracklet score into cost matrix + if cost_matrix.size == 0: + return cost_matrix + iou_sim = 1 - cost_matrix + + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + trk_scores = np.array([trk.score for trk in tracks]) + trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1) + mid_scores = (det_scores + trk_scores) / 2 + fuse_sim = iou_sim * mid_scores + fuse_cost = 1 - fuse_sim + + return fuse_cost + def greedy_assignment_iou(dist, thresh): matched_indices = [] if dist.shape[1] == 0: @@ -196,20 +169,6 @@ def greedy_assignment(dists, threshs): u_track = [d for d in range(dists.shape[0]) if not (d in matches[:, 0])] return matches, u_track, u_det -def fuse_score_matrix(cost_matrix, detections, tracks): - if cost_matrix.size == 0: - return cost_matrix - iou_sim = 1 - cost_matrix - - det_scores = np.array([det.score for det in detections]) - det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) - trk_scores = np.array([trk.score for trk in tracks]) - trk_scores = np.expand_dims(trk_scores, axis=1).repeat(cost_matrix.shape[1], axis=1) - mid_scores = (det_scores + trk_scores) / 2 - fuse_sim = iou_sim * mid_scores - fuse_cost = 1 - fuse_sim - - return fuse_cost """ calculate buffered IoU, used in C_BIoU_Tracker @@ -235,7 +194,7 @@ def buffered_iou_distance(atracks, btracks, level=1): """ observation centric association, with velocity, for OC Sort """ -def observation_centric_association(tracklets, detections, iou_threshold, velocities, previous_obs, vdc_weight): +def observation_centric_association(tracklets, detections, velocities, previous_obs, vdc_weight=0.05, iou_threshold=0.3): if(len(tracklets) == 0): return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections))) @@ -247,6 +206,10 @@ def observation_centric_association(tracklets, detections, iou_threshold, veloci iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs) + # NOTE for iou < iou_threshold, directly set to -inf, otherwise after solving the linear assignment, + # some matched pairs will have no overlaps + iou_matrix[iou_matrix < iou_threshold] = - 1e5 + Y, X = speed_direction_batch(det_tlbrs, previous_obs) inertia_Y, inertia_X = velocities[:,0], velocities[:,1] inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) @@ -265,18 +228,32 @@ def observation_centric_association(tracklets, detections, iou_threshold, veloci angle_diff_cost = (valid_mask * diff_angle) * vdc_weight angle_diff_cost = angle_diff_cost * scores.T - matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.9) + matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost), thresh=0.0) return matches, unmatched_a, unmatched_b """ -helper func of observation_centric_association +helper func of observation_centric_association (OC Sort) and association_weak_cues (Hybrid Sort) """ -def speed_direction_batch(dets, tracks): +def speed_direction_batch(dets, tracks, mode='center'): tracks = tracks[..., np.newaxis] - CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0 - CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0 + if mode == 'center': + CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0 + CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0 + elif mode == 'tl': + CX1, CY1 = dets[:,0], dets[:,1] + CX2, CY2 = tracks[:,0], tracks[:,1] + elif mode == 'tr': + CX1, CY1 = dets[:,2], dets[:,1] + CX2, CY2 = tracks[:,2], tracks[:,1] + elif mode == 'bl': + CX1, CY1 = dets[:,0], dets[:,3] + CX2, CY2 = tracks[:,0], tracks[:,3] + else: + CX1, CY1 = dets[:,2], dets[:,3] + CX2, CY2 = tracks[:,2], tracks[:,3] + dx = CX2 - CX1 dy = CY2 - CY1 norm = np.sqrt(dx**2 + dy**2) + 1e-6 @@ -284,6 +261,125 @@ def speed_direction_batch(dets, tracks): dy = dy / norm return dy, dx # size: num_track x num_det +""" +helper func of association_weak_cues (Hybrid Sort) +""" +def score_diff_batch(det_scores, track_scores): + """ + Args: + det_scores, np.ndarray, shape (N, ) + track_scores, np.ndarray, shape (M, ) + """ + track_scores = track_scores[:, None] + det_scores = det_scores[None, :] + return np.abs(track_scores - det_scores) + +def score_distance(atracks, btracks): + """ + calculate the confidence score difference, c_{i, j} = abs(atracks[i].score - btracks[j].score) + """ + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + ascores = atracks + bscores = btracks + else: + ascores = [track.score for track in atracks] + bscores = [track.score for track in btracks] + + return score_diff_batch(det_scores=np.ascontiguousarray(bscores), + track_scores=np.ascontiguousarray(ascores)) + +""" +calculate HM IoU, used in Hybrid Sort +""" +def hm_iou_distance(atracks, btracks): + # hm iou = iou * hright iou + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + + _ious = ious(atlbrs, btlbrs) # original iou + + if _ious.size == 0: + return _ious # case if len of tracks == 0, no need to further calculating + + if isinstance(atlbrs, list): atlbrs = np.ascontiguousarray(atlbrs) + if isinstance(btlbrs, list): btlbrs = np.ascontiguousarray(btlbrs) + + # height iou = (y2_min - y1_max) / (y2_max - y1_min) + atlbrs_ = np.expand_dims(atlbrs, axis=1) # (M, 4) -> (M, 1, 4) to apply boardcast mechanism + btlbrs_ = np.expand_dims(btlbrs, axis=0) # (N, 4) -> (1, N, 4) + + y2_min = np.minimum(atlbrs_[..., 3], btlbrs_[..., 3]) # (M, N) + y1_max = np.maximum(atlbrs_[..., 1], btlbrs_[..., 1]) + + y2_max = np.maximum(atlbrs_[..., 3], btlbrs_[..., 3]) + y1_min = np.minimum(atlbrs_[..., 1], btlbrs_[..., 1]) + + _h_ious = (y2_min - y1_max) / (y2_max - y1_min) + + return _ious * _h_ious + + +""" +observation centric association with four corner point velocity, confidence score and HM IoU, for Hybrid Sort +""" +def association_weak_cues(tracklets, detections, velocities, previous_obs, + score_diff_weight=1.0, vdc_weight=0.05, iou_threshold=0.25): + + if(len(tracklets) == 0): + return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections))) + + # get numpy format bboxes + trk_tlbrs = np.array([track.tlbr for track in tracklets]) + det_tlbrs = np.array([det.tlbr for det in detections]) + det_scores = np.array([det.score for det in detections]) + # Note that the kalman-predicted score is used in first round assocication + trk_scores = np.array([trk.kalman_score for trk in tracklets]) + + # hm iou + iou_matrix = hm_iou_distance(trk_tlbrs, det_tlbrs) + + # NOTE for iou < iou_threshold, directly set to -inf, otherwise after solving the linear assignment, + # some matched pairs will have no overlaps + iou_matrix[iou_matrix < iou_threshold] = - 1e5 + + # cal four corner distance + velocity_cost = np.zeros((len(tracklets), len(detections))) + for idx, corner in enumerate(['tl', 'tr', 'bl', 'br']): # tl, tr, bl, br + # get the velocity directoin between detections and historical observations + Y, X = speed_direction_batch(det_tlbrs, previous_obs, mode=corner) # shape (num track, num det) + inertia_Y, inertia_X = velocities[:, idx, 0], velocities[:, idx, 1] # velocities: shape (N, 4, 2) + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + + scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1) + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost * scores.T + + # add all angle diff cost from four corners + velocity_cost += angle_diff_cost + + # minus the score difference + velocity_cost -= score_diff_batch(det_scores, trk_scores) * score_diff_weight + + matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + velocity_cost), thresh=0.0) + + return matches, unmatched_a, unmatched_b + + def matching_cascade( distance_metric, matching_thresh, cascade_depth, tracks, detections, diff --git a/tracker/trackers/ocsort_tracker.py b/tracker/trackers/ocsort_tracker.py index cccbc84..648c04a 100644 --- a/tracker/trackers/ocsort_tracker.py +++ b/tracker/trackers/ocsort_tracker.py @@ -104,7 +104,7 @@ def update(self, output_results, img, ori_img): # Observation centric cost matrix and assignment matches, u_track, u_detection = observation_centric_association( tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, - velocities=velocities, previous_obs=k_observations, vdc_weight=0.2 + velocities=velocities, previous_obs=k_observations, vdc_weight=0.05 ) for itracked, idet in matches: @@ -125,13 +125,10 @@ def update(self, output_results, img, ori_img): (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)] else: detections_second = [] - r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] - # for unmatched tracks in the first round, use last obervation - r_tracked_tracklets_last_observ = [tracklet_pool[i].last_observation[:4] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] - detections_second_bbox = [det.tlbr for det in detections_second] + r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] - dists = 1. - ious(r_tracked_tracklets_last_observ, detections_second_bbox) + dists = iou_distance(r_tracked_tracklets, detections_second) matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) for itracked, idet in matches: @@ -144,14 +141,36 @@ def update(self, output_results, img, ori_img): track.re_activate(det, self.frame_id, new_id=False) refind_tracklets.append(track) + + '''Step 4: Third association, match high-conf remain detections with last observation of tracks''' + r_tracked_tracklets = [r_tracked_tracklets[i] for i in u_track] # remain tracklets from last step + r_detections = [detections[i] for i in u_detection] # high-conf remain detections + + dists = 1. - ious(atlbrs=[t.last_observation[: 4] for t in r_tracked_tracklets], # parse bbox directly + btlbrs=[d.tlbr for d in r_detections]) + + matches, u_track, u_detection = linear_assignment(dists, thresh=0.5) + + for itracked, idet in matches: + track = r_tracked_tracklets[itracked] + det = r_detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_tracklets.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_tracklets.append(track) + + # for tracks still failed, mark lost for it in u_track: track = r_tracked_tracklets[it] if not track.state == TrackState.Lost: track.mark_lost() - lost_tracklets.append(track) + lost_tracklets.append(track) + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' - detections = [detections[i] for i in u_detection] + detections = [r_detections[i] for i in u_detection] dists = iou_distance(unconfirmed, detections) matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7) diff --git a/tracker/trackers/tracklet.py b/tracker/trackers/tracklet.py index 255bee0..c657ef9 100644 --- a/tracker/trackers/tracklet.py +++ b/tracker/trackers/tracklet.py @@ -12,6 +12,7 @@ from .kalman_filters.sort_kalman import SORTKalman from .kalman_filters.strongsort_kalman import NSAKalman from .kalman_filters.ucmctrack_kalman import UCMCKalman +from .kalman_filters.hybridsort_kalman import HybridSORTKalman MOTION_MODEL_DICT = { 'sort': SORTKalman, @@ -20,6 +21,7 @@ 'ocsort': OCSORTKalman, 'strongsort': NSAKalman, 'ucmc': UCMCKalman, + 'hybridsort': HybridSORTKalman } STATE_CONVERT_DICT = { @@ -28,7 +30,8 @@ 'bot': 'xywh', 'ocsort': 'xysa', 'strongsort': 'xyah', - 'ucmc': 'ground' + 'ucmc': 'ground', + 'hybridsort': 'xysca' } class Tracklet(BaseTrack): @@ -187,7 +190,7 @@ def update(self, new_track, frame_id): class Tracklet_w_velocity(Tracklet): """ - Tracklet class with reid features, for ocsort. + Tracklet class with center point velocity, for ocsort. """ def __init__(self, tlwh, score, category, motion='byte', delta_t=3): @@ -201,6 +204,17 @@ def __init__(self, tlwh, score, category, motion='byte', delta_t=3): self.age = 0 # mark the age + @property + def tlwh(self): + """ + NOTE: note that for OC Sort, when querying tlwh, instead of returning the kalman state, + directly return the last observation (so is called observation-centric) + """ + if self.last_observation.sum() < 0: # no last observation + return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')() + + return self.tlbr_to_tlwh(self.last_observation[: 4]) + @staticmethod def speed_direction(bbox1, bbox2): cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0 @@ -228,14 +242,13 @@ def update(self, new_track, frame_id): self.time_since_update = 0 # update velocity and history buffer - new_tlbr = Tracklet_w_bbox_buffer.tlwh_to_tlbr(new_tlwh) + new_tlbr = self.tlwh_to_tlbr(new_tlwh) - if self.last_observation.sum() >= 0: # no previous observation + if self.last_observation.sum() >= 0: # exists previous observation previous_box = None - for i in range(self.delta_t): - dt = self.delta_t - i + for dt in range(self.delta_t, 0, -1): # from old to new if self.age - dt in self.observations: - previous_box = self.observations[self.age-dt] + previous_box = self.observations[self.age - dt] break if previous_box is None: previous_box = self.last_observation @@ -250,7 +263,175 @@ def update(self, new_track, frame_id): self.history_observations.append(new_observation) +class Tracklet_w_velocity_four_corner(Tracklet): + """ + Tracklet class with four corner points velocity and previous confidence, for hybrid sort. + """ + def __init__(self, tlwh, score, category, motion='byte', delta_t=3, score_thresh=0.4): + # initial position + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.is_activated = False + + self.score = score + self.category = category + + # kalman + self.motion = motion + self.kalman_filter = MOTION_MODEL_DICT[motion]() + + self.convert_func = self.__getattribute__('tlwh_to_' + STATE_CONVERT_DICT[motion]) + + # init kalman + self.kalman_filter.initialize(self.convert_func(np.r_[self._tlwh, self.score])) # confidence score is addtional + + self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder + self.observations = dict() + self.history_observations = [] + + # velocity of top-left, top-right, bottom-left, bottom-right + self.velocity_tl, self.velocity_tr, self.velocity_bl, self.velocity_br = None, None, None, None + # prev score + self.prev_score = None + + self.score_thresh = score_thresh # score threshold to limit the range of kalman-predicted score and observation score + + self.delta_t = delta_t + + self.age = 0 # mark the age + + @property + def tlwh(self): + """ + NOTE: note that for Hybrid Sort, same as OC Sort, when querying tlwh, instead of returning the kalman state, + directly return the last observation + """ + if self.last_observation.sum() < 0: # no last observation + return self.__getattribute__(STATE_CONVERT_DICT[self.motion] + '_to_tlwh')() + + return self.tlbr_to_tlwh(self.last_observation[: 4]) + + @staticmethod + def speed_direction(point1, point2): + """ + In order to jointly calculating the four corner velocity, parse point coordinate as input. + + Args: + point1, point2: list or np.ndarray, shape: (2, ) + """ + x1, y1 = point1[0], point1[1] + x2, y2 = point2[0], point2[1] + speed = np.array([y2 - y1, x2 - x1]) + norm = np.sqrt((y2 - y1)**2 + (x2 - x1)**2) + 1e-6 + return speed / norm + + def predict(self): + self.kalman_filter.predict() + + self.age += 1 + self.time_since_update += 1 + + # update score with linear model + if not self.prev_score: + self.score = np.clip(self.score, 0.1, self.score_thresh) + else: + self.score = np.clip(self.score + (self.score - self.prev_score), 0.1, self.score_thresh) + + def update(self, new_track, frame_id): + self.frame_id = frame_id + + new_tlwh = new_track.tlwh + self.prev_score = self.score # save previous score + self.score = new_track.score + + self.kalman_filter.update(self.convert_func(np.r_[new_tlwh, new_track.score])) + + self.state = TrackState.Tracked + self.is_activated = True + self.time_since_update = 0 + + # get four corner velocity + new_tlbr = self.tlwh_to_tlbr(new_tlwh) + + self.velocity_tl, self.velocity_tr = np.array([0, 0], dtype=float), np.array([0, 0], dtype=float) + self.velocity_bl, self.velocity_br = np.array([0, 0], dtype=float), np.array([0, 0], dtype=float) + + if self.last_observation.sum() >= 0: # exists previous observation + previous_box = None + for dt in range(1, self.delta_t + 1): # from new to old + if self.age - dt in self.observations: + previous_box = self.observations[self.age - dt] # t-l-b-r + + self.velocity_tl += self.speed_direction([previous_box[0], previous_box[1]], [new_tlbr[0], new_tlbr[1]]) + self.velocity_tr += self.speed_direction([previous_box[2], previous_box[1]], [new_tlbr[2], new_tlbr[1]]) + self.velocity_bl += self.speed_direction([previous_box[0], previous_box[3]], [new_tlbr[0], new_tlbr[3]]) + self.velocity_br += self.speed_direction([previous_box[2], previous_box[3]], [new_tlbr[2], new_tlbr[3]]) + + if previous_box is None: + previous_box = self.last_observation + + self.velocity_tl += self.speed_direction([previous_box[0], previous_box[1]], [new_tlbr[0], new_tlbr[1]]) + self.velocity_tr += self.speed_direction([previous_box[2], previous_box[1]], [new_tlbr[2], new_tlbr[1]]) + self.velocity_bl += self.speed_direction([previous_box[0], previous_box[3]], [new_tlbr[0], new_tlbr[3]]) + self.velocity_br += self.speed_direction([previous_box[2], previous_box[3]], [new_tlbr[2], new_tlbr[3]]) + + new_observation = np.r_[new_tlbr, new_track.score] + self.last_observation = new_observation + self.observations[self.age] = new_observation + self.history_observations.append(new_observation) + + def re_activate(self, new_track, frame_id, new_id=False): + + self.kalman_filter.update(self.convert_func(np.r_[new_track.tlwh, new_track.score])) + + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def get_velocity(self, ): + """ + Get four corner velocity + Return: + np.ndarray, shape (4, 2) + """ + if self.velocity_bl is None: + return np.zeros((4, 2)) + + return np.vstack([self.velocity_bl, + self.velocity_br, + self.velocity_tl, + self.velocity_tr]) + + @property + def kalman_score(self, ): + # return kalman-predicted score + return np.clip(self.kalman_filter.kf.x[3], self.score_thresh, 1.0) + def xysca_to_tlwh(self, ): + # used in @property tlwh() + x = self.kalman_filter.kf.x + ret = x[:5].copy() + ret[3], ret[4] = ret[4], ret[3] + ret = ret[:4] # xysa + + ret[2] = np.sqrt(x[2] * x[4]) # s * a = w + ret[3] = x[2] / ret[2] # s / w = h + + ret[:2] -= ret[2:] / 2 + + return ret + + @staticmethod + def tlwh_to_xysca(tlwh): + # note that tlwh is actually tlwhc + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2: 4] / 2 + ret[2] = tlwh[2] * tlwh[3] + ret[3] = tlwh[2] / tlwh[3] + ret[3], ret[4] = ret[4], ret[3] # xysac -> xysca + return ret class Tracklet_w_bbox_buffer(Tracklet): """ @@ -371,7 +552,7 @@ def deep_vec(self): class Tracklet_w_UCMC(Tracklet): """ - tracklet with a grounding map and uniform camera motion compensation + tracklet with a grounding map and uniform camera motion compensation, for UCMC Track """ configs = dict( From 0a18efb0c078c4f099001954f63d9aae6734eedb Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Thu, 31 Oct 2024 11:55:27 +0800 Subject: [PATCH 2/7] update track_demo --- tracker/track_demo.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tracker/track_demo.py b/tracker/track_demo.py index 40a940c..18df472 100644 --- a/tracker/track_demo.py +++ b/tracker/track_demo.py @@ -25,6 +25,10 @@ from trackers.c_biou_tracker import C_BIoUTracker from trackers.ocsort_tracker import OCSortTracker from trackers.deepsort_tracker import DeepSortTracker +from trackers.strongsort_tracker import StrongSortTracker +from trackers.sparse_tracker import SparseTracker +from trackers.ucmc_tracker import UCMCTracker +from trackers.hybridsort_tracker import HybridSortTracker # YOLOX modules try: @@ -65,7 +69,11 @@ 'botsort': BotTracker, 'c_bioutrack': C_BIoUTracker, 'ocsort': OCSortTracker, - 'deepsort': DeepSortTracker + 'deepsort': DeepSortTracker, + 'strongsort': StrongSortTracker, + 'sparsetrack': SparseTracker, + 'ucmctrack': UCMCTracker, + 'hybridsort': HybridSortTracker } def get_args(): From 8b01276ca1d15b6a229449d94963362561309883 Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Fri, 29 Nov 2024 14:10:12 +0800 Subject: [PATCH 3/7] fix bugs in c_biou track --- README.md | 1 + README_CN.md | 1 + tracker/trackers/c_biou_tracker.py | 44 +++++++--------------- tracker/trackers/tracklet.py | 60 ++++++++++++++++++------------ 4 files changed, 52 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index 86e6a5b..5674e79 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ Latest News +- ***2024.11.29*** Fix bugs of C-BIoU Track (the state prediction and updating bugs) - ***2024.10.24*** Add Hybrid SORT and fix some errors and bugs of OC-SORT. ## ❤️ Introduction diff --git a/README_CN.md b/README_CN.md index 483c5f8..8a92932 100644 --- a/README_CN.md +++ b/README_CN.md @@ -15,6 +15,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ 最近更新 +- ***2024.11.29*** 修复了C-BIoU Tracker中轨迹状态的更新和预测的错误 - ***2024.10.24*** 增加了 Hybrid SORT 并且修复了OC-SORT的一些bug和错误。 diff --git a/tracker/trackers/c_biou_tracker.py b/tracker/trackers/c_biou_tracker.py index e0f4b77..222570a 100644 --- a/tracker/trackers/c_biou_tracker.py +++ b/tracker/trackers/c_biou_tracker.py @@ -39,18 +39,11 @@ def update(self, output_results, img, ori_img): categories = output_results[:, -1] remain_inds = scores > self.args.conf_thresh - inds_low = scores > 0.1 - inds_high = scores < self.args.conf_thresh - - inds_second = np.logical_and(inds_low, inds_high) - dets_second = bboxes[inds_second] dets = bboxes[remain_inds] cates = categories[remain_inds] - cates_second = categories[inds_second] scores_keep = scores[remain_inds] - scores_second = scores[inds_second] if len(dets) > 0: '''Detections''' @@ -68,7 +61,7 @@ def update(self, output_results, img, ori_img): else: tracked_tracklets.append(track) - ''' Step 2: First association, with high score detection boxes''' + ''' Step 2: First association, with small buffer IoU''' tracklet_pool = joint_tracklets(tracked_tracklets, self.lost_tracklets) # Predict the current location with Kalman @@ -89,23 +82,18 @@ def update(self, output_results, img, ori_img): track.re_activate(det, self.frame_id, new_id=False) refind_tracklets.append(track) - ''' Step 3: Second association, with low score detection boxes''' - # association the untrack to the low score detections - if len(dets_second) > 0: - '''Detections''' - detections_second = [Tracklet_w_bbox_buffer(tlwh, s, cate, motion=self.motion) for - (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)] - else: - detections_second = [] - r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] + unmatched_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] + unmatched_detections = [detections[i] for i in u_detection] + '''Step 3: Second association, with large buffer IoU''' - dists = buffered_iou_distance(r_tracked_tracklets, detections_second, level=2) + dists = buffered_iou_distance(unmatched_tracklets, unmatched_detections, level=2) + + matches, u_track, u_detection = linear_assignment(dists, thresh=0.5) - matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) for itracked, idet in matches: - track = r_tracked_tracklets[itracked] - det = detections_second[idet] + track = unmatched_tracklets[itracked] + det = unmatched_detections[idet] if track.state == TrackState.Tracked: track.update(det, self.frame_id) activated_tracklets.append(track) @@ -113,16 +101,10 @@ def update(self, output_results, img, ori_img): track.re_activate(det, self.frame_id, new_id=False) refind_tracklets.append(track) - for it in u_track: - track = r_tracked_tracklets[it] - if not track.state == TrackState.Lost: - track.mark_lost() - lost_tracklets.append(track) - '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' - detections = [detections[i] for i in u_detection] + detections = [unmatched_detections[i] for i in u_detection] dists = buffered_iou_distance(unconfirmed, detections, level=1) - + matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7) for itracked, idet in matches: @@ -133,7 +115,7 @@ def update(self, output_results, img, ori_img): track.mark_removed() removed_tracklets.append(track) - """ Step 4: Init new tracklets""" + '''Step 4. Inital new tracks''' for inew in u_detection: track = detections[inew] if track.score < self.det_thresh: @@ -141,7 +123,7 @@ def update(self, output_results, img, ori_img): track.activate(self.frame_id) activated_tracklets.append(track) - """ Step 5: Update state""" + ''' Step 5: Update state''' for track in self.lost_tracklets: if self.frame_id - track.end_frame > self.max_time_lost: track.mark_removed() diff --git a/tracker/trackers/tracklet.py b/tracker/trackers/tracklet.py index c657ef9..50c2bc5 100644 --- a/tracker/trackers/tracklet.py +++ b/tracker/trackers/tracklet.py @@ -438,7 +438,14 @@ class Tracklet_w_bbox_buffer(Tracklet): Tracklet class with buffer of bbox, for C_BIoU track. """ def __init__(self, tlwh, score, category, motion='byte'): - super().__init__(tlwh, score, category, motion) + # initial position + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.is_activated = False + + self.score = score + self.category = category + + # Note in C-BIoU tracker the kalman filter is abandoned # params in motion state self.b1, self.b2, self.n = 0.3, 0.5, 5 @@ -448,6 +455,7 @@ def __init__(self, tlwh, score, category, motion='byte'): self.buffer_bbox1 = self.get_buffer_bbox(level=1) self.buffer_bbox2 = self.get_buffer_bbox(level=2) # motion state, s^{t + \delta} = o^t + (\delta / n) * \sum_{i=t-n+1}^t(o^i - o^{i-1}) = o^t + (\delta / n) * (o^t - o^{t - n}) + self.motion_state0 = self._tlwh # original tlwh self.motion_state1 = self.buffer_bbox1.copy() self.motion_state2 = self.buffer_bbox2.copy() @@ -483,22 +491,38 @@ def re_activate(self, new_track, frame_id, new_id=False): # update stored bbox if (len(self.origin_bbox_buffer) > self.n): self.origin_bbox_buffer.popleft() - self.origin_bbox_buffer.append(self._tlwh) - else: - self.origin_bbox_buffer.append(self._tlwh) + + self.origin_bbox_buffer.append(self._tlwh) self.buffer_bbox1 = self.get_buffer_bbox(level=1) self.buffer_bbox2 = self.get_buffer_bbox(level=2) + + self.motion_state0 = self._tlwh self.motion_state1 = self.buffer_bbox1.copy() self.motion_state2 = self.buffer_bbox2.copy() + def predict(self): + # Note that in C-BIoU Tracker, no need to use Kalman Filter + self.time_since_update += 1 + + # Average motion model: s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n}) + assert len(self.origin_bbox_buffer), 'The bbox buffer is empty' + + motion_state = self.origin_bbox_buffer[-1] + \ + (self.time_since_update / len(self.origin_bbox_buffer)) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0]) + + self.motion_state0 = motion_state + self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state) + self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state) + + def update(self, new_track, frame_id): self.frame_id = frame_id new_tlwh = new_track.tlwh self.score = new_track.score - self.kalman_filter.update(self.convert_func(new_tlwh)) + # self.kalman_filter.update(self.convert_func(new_tlwh)) # no need to use Kalman Filter self.state = TrackState.Tracked self.is_activated = True @@ -508,25 +532,15 @@ def update(self, new_track, frame_id): # update stored bbox if (len(self.origin_bbox_buffer) > self.n): self.origin_bbox_buffer.popleft() - self.origin_bbox_buffer.append(new_tlwh) - else: - self.origin_bbox_buffer.append(new_tlwh) - - # update motion state - if self.time_since_update: # have some unmatched frames - if len(self.origin_bbox_buffer) < self.n: - self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh) - self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh) - else: # s^{t + \delta} = o^t + (\delta / n) * (o^t - o^{t - n}) - motion_state = self.origin_bbox_buffer[-1] + \ - (self.time_since_update / self.n) * (self.origin_bbox_buffer[-1] - self.origin_bbox_buffer[0]) - self.motion_state1 = self.get_buffer_bbox(level=1, bbox=motion_state) - self.motion_state2 = self.get_buffer_bbox(level=2, bbox=motion_state) - - else: # no unmatched frames, use current detection as motion state - self.motion_state1 = self.get_buffer_bbox(level=1, bbox=new_tlwh) - self.motion_state2 = self.get_buffer_bbox(level=2, bbox=new_tlwh) + self.origin_bbox_buffer.append(new_tlwh) + # Drop kalman filter, rewrite the tlwh function + @property + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + return self.motion_state0 class Tracklet_w_depth(Tracklet): """ From c3d0c2f1daef8f647661bb637ffe7344f019c07b Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Thu, 3 Apr 2025 21:38:48 +0800 Subject: [PATCH 4/7] support the newest ultralytics --- README.md | 44 ++++++---- README_CN.md | 44 ++++++---- tracker/config_files/visdrone_part.yaml | 2 +- tracker/track.py | 23 ++--- tracker/track_demo.py | 19 +++-- tracker/tracker_dataloader.py | 12 +-- tracker/trackers/hybridsort_tracker.py | 6 +- .../kalman_filters/hybridsort_kalman.py | 84 +------------------ .../trackers/kalman_filters/ocsort_kalman.py | 2 + tracker/trackers/matching.py | 9 +- tracker/trackers/ocsort_tracker.py | 2 +- tracker/trackers/tracklet.py | 8 +- .../data_cfgs/airmot.yaml | 0 .../data_cfgs/uavdt.yaml | 0 .../data_cfgs/visdrone.yaml | 0 .../data_cfgs/visdrone_det.yaml | 0 .../postprocess.py | 0 .../train_yolo_ultralytics.py} | 6 +- 18 files changed, 109 insertions(+), 152 deletions(-) rename tracker/{yolov8_utils => yolo_ultralytics_utils}/data_cfgs/airmot.yaml (100%) rename tracker/{yolov8_utils => yolo_ultralytics_utils}/data_cfgs/uavdt.yaml (100%) rename tracker/{yolov8_utils => yolo_ultralytics_utils}/data_cfgs/visdrone.yaml (100%) rename tracker/{yolov8_utils => yolo_ultralytics_utils}/data_cfgs/visdrone_det.yaml (100%) rename tracker/{yolov8_utils => yolo_ultralytics_utils}/postprocess.py (100%) rename tracker/{yolov8_utils/train_yolov8.py => yolo_ultralytics_utils/train_yolo_ultralytics.py} (78%) diff --git a/README.md b/README.md index 5674e79..fe40c3a 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ Latest News -- ***2024.11.29*** Fix bugs of C-BIoU Track (the state prediction and updating bugs) -- ***2024.10.24*** Add Hybrid SORT and fix some errors and bugs of OC-SORT. +- ***2025.4.3*** Support the newest ultralytics version (YOLO v3 ~ v12) and fix some bugs of hybrid sort. ## ❤️ Introduction @@ -30,7 +29,7 @@ This repo is a toolbox that implements the **tracking-by-detection paradigm mult - YOLOX - YOLO v7 -- YOLO v8, +- YOLO v3 ~ v12 by [ultralytics](https://docs.ultralytics.com/), and the tracker supports: @@ -61,7 +60,7 @@ The highlights are: ## 🔨 Installation The basic env is: -- Ubuntu 18.04 +- Ubuntu 20.04 - Python:3.9, Pytorch: 1.12 Run following commond to install other packages: @@ -86,12 +85,14 @@ python3 setup.py develop There is no need to execute addtional steps as the repo itself is based on YOLOv7. -3. YOLO v8: +3. YOLO series by ultralytics: Please run: ```bash -pip3 install ultralytics==8.0.94 +pip3 install ultralytics +or +pip3 install --upgrade ultralytics ``` ### 📑 Data preparation @@ -148,7 +149,11 @@ Some references may help you: python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-size 16 --data data/visdrone_all.yaml --img 1280 1280 --cfg cfg/training/yolov7-w6.yaml --weights <$YOLO v7 pretrained model path$> --name yolov7-w6-custom --hyp data/hyp.scratch.custom.yaml ``` -- YOLO v8: `tracker/yolov8_utils/train_yolov8.py` +- YOLO series (YOLO v3 ~ v12) by ultralytics:: `tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py` + +```shell +python tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py --model_weight weights/yolo11m.pt --data_cfg tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml --epochs 30 --batch_size 8 --img_sz 1280 --device 0 +``` @@ -157,42 +162,47 @@ python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-s If you only want to run a demo: ```bash -python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images +python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov7 or yolo_ultra} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images ``` +> ❗❗Important Notes +> +> If you want to use the detector trained by **ultralytics**, the `--detector` argument **must include** the substring `ultra`, such as +> `--detector yolo_ultra`, `--detector yolo_ultra_v8`, `--detector yolov11_ultra`, `--detector yolo12_ultralytics`, etc. + For example: ```bash -python tracker/track_demo.py --obj M0203.mp4 --detector yolov8 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt --save_images +python tracker/track_demo.py --obj M0203.mp4 --detector yolo_ultra_v8 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt --save_images ``` If you want to run trackers on dataset: ```bash -python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolov8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} +python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolo_ultra_v8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} ``` For example: -- SORT: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` +- SORT: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` -- DeepSORT: `python tracker/track.py --dataset uavdt --detector yolov7 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov7_UAVDT_35epochs_20230507.pt` +- DeepSORT: `python tracker/track.py --dataset visdrone_part --detector yolov7 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_VisDroneDet_35epochs_20230605.pt` -- ByteTrack: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- ByteTrack: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` - OCSort: `python tracker/track.py --dataset mot17 --detector yolox --tracker ocsort --kalman_format ocsort --detector_model_path weights/bytetrack_m_mot17.pth.tar` -- C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` - BoT-SORT: `python tracker/track.py --dataset uavdt --detector yolox --tracker botsort --kalman_format bot --detector_model_path weights/yolox_m_uavdt_50epochs.pth.tar` -- Strong SORT: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker strongsort --kalman_format strongsort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- Strong SORT: `python tracker/track.py --dataset visdrone_part --detector yolo_ultra_v8 --tracker strongsort --kalman_format strongsort --detector_model_path weights/yolov8l_VisDrone_35epochs_20230509.pt` -- Sparse Track: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker sparsetrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- Sparse Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v11 --tracker sparsetrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` - UCMC Track: `python tracker/track.py --dataset mot17 --detector yolox --tracker ucmctrack --kalman_format ucmc --detector_model_path weights/bytetrack_m_mot17.pth.tar --camera_parameter_folder ./tracker/cam_param_files` -- Hybrid SORT: `python tracker/track.py --dataset mot17 --detector yolox --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/bytetrack_m_mot17.pth.tar --save_images` +- Hybrid SORT: `python tracker/track.py --dataset visdrone_part --detector yolo_ultra --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/yolov8l_VisDrone_35epochs_20230509.pt --save_images` > **Important notes for UCMC Track:** > diff --git a/README_CN.md b/README_CN.md index 8a92932..3920b75 100644 --- a/README_CN.md +++ b/README_CN.md @@ -15,8 +15,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ 最近更新 -- ***2024.11.29*** 修复了C-BIoU Tracker中轨迹状态的更新和预测的错误 -- ***2024.10.24*** 增加了 Hybrid SORT 并且修复了OC-SORT的一些bug和错误。 +- ***2025.4.3*** 增加了ultralytics库最新版本的支持,修复了hybrid sort中的一些bug. ## ❤️ 介绍 @@ -25,7 +24,7 @@ git checkout v2 # change to v2 branch !! - YOLOX - YOLO v7 -- YOLO v8, +- YOLO v3 ~ v12 by [ultralytics](https://docs.ultralytics.com/), 跟踪器支持: @@ -60,7 +59,7 @@ REID模型支持: ## 🔨 安装 基本环境是: -- Ubuntu 18.04 +- Ubuntu 20.04 - Python:3.9, Pytorch: 1.12 运行以下命令安装其他包: @@ -85,12 +84,14 @@ python3 setup.py develop 由于仓库本身就是基于YOLOv7的,因此无需执行额外的步骤。 -3. YOLO v8: +3. Ultralytics的YOLO系列模型: 请运行: ```bash -pip3 install ultralytics==8.0.94 +pip3 install ultralytics +or +pip3 install --upgrade ultralytics ``` ### 📑 数据准备 @@ -147,7 +148,11 @@ CATEGORY_DICT: python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-size 16 --data data/visdrone_all.yaml --img 1280 1280 --cfg cfg/training/yolov7-w6.yaml --weights <$YOLO v7 pretrained model path$> --name yolov7-w6-custom --hyp data/hyp.scratch.custom.yaml ``` -- YOLO v8: `tracker/yolov8_utils/train_yolov8.py` +- Ultralytics的YOLO系列模型 (YOLO v3 ~ v12): `tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py` + +```shell +python tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py --model_weight weights/yolo11m.pt --data_cfg tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml --epochs 30 --batch_size 8 --img_sz 1280 --device 0 +``` @@ -156,9 +161,14 @@ python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-s 如果你只是想运行一个demo: ```bash -python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images +python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov7 or yolo_ultra} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images ``` +> ❗❗重要提示 +> +> 如果你是通过 **ultralytics** 库训练检测模型, 命令里的`--detector`参数 **必须包含**`ultra`字段, 例如 +> `--detector yolo_ultra`, `--detector yolo_ultra_v8`, `--detector yolov11_ultra`, `--detector yolo12_ultralytics`, 等等. + 例如: ```bash @@ -168,26 +178,30 @@ python tracker/track_demo.py --obj M0203.mp4 --detector yolov8 --tracker deepsor 如果你想在数据集上测试: ```bash -python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolov8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} +python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolo_ultra_v8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} ``` 例如: -- SORT: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` +- SORT: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` -- DeepSORT: `python tracker/track.py --dataset uavdt --detector yolov7 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov7_UAVDT_35epochs_20230507.pt` +- DeepSORT: `python tracker/track.py --dataset visdrone_part --detector yolov7 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_VisDroneDet_35epochs_20230605.pt` -- ByteTrack: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- ByteTrack: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` -- OCSort: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker ocsort --kalman_format ocsort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- OCSort: `python tracker/track.py --dataset mot17 --detector yolox --tracker ocsort --kalman_format ocsort --detector_model_path weights/bytetrack_m_mot17.pth.tar` -- C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolov8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` - BoT-SORT: `python tracker/track.py --dataset uavdt --detector yolox --tracker botsort --kalman_format bot --detector_model_path weights/yolox_m_uavdt_50epochs.pth.tar` +- Strong SORT: `python tracker/track.py --dataset visdrone_part --detector yolo_ultra_v8 --tracker strongsort --kalman_format strongsort --detector_model_path weights/yolov8l_VisDroneDet_35epochs_20230605.pt` + +- Sparse Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v11 --tracker sparsetrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` + - UCMC Track: `python tracker/track.py --dataset mot17 --detector yolox --tracker ucmctrack --kalman_format ucmc --detector_model_path weights/bytetrack_m_mot17.pth.tar --camera_parameter_folder ./tracker/cam_param_files` -- Hybrid SORT: `python tracker/track.py --dataset mot17 --detector yolox --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/bytetrack_m_mot17.pth.tar --save_images` +- Hybrid SORT: `python tracker/track.py --dataset visdrone_part --detector yolo_ultra --tracker hybridsort --kalman_format hybridsort --detector_model_path weights/yolov8l_VisDrone_35epochs_20230509.pt --save_images` >**UCMC Track的重要提示:** > diff --git a/tracker/config_files/visdrone_part.yaml b/tracker/config_files/visdrone_part.yaml index 5b2ea60..3dbb70d 100644 --- a/tracker/config_files/visdrone_part.yaml +++ b/tracker/config_files/visdrone_part.yaml @@ -1,7 +1,7 @@ # Config file of VisDrone dataset DATASET_ROOT: '/data/wujiapeng/datasets/VisDrone2019/VisDrone2019' -SPLIT: test +SPLIT: val CATEGORY_NAMES: - 'pedestrain' - 'car' diff --git a/tracker/track.py b/tracker/track.py index fe102ed..1f91194 100644 --- a/tracker/track.py +++ b/tracker/track.py @@ -54,14 +54,14 @@ logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.') pass -# YOLOv8 modules +# Ultralytics YOLO modules (support YOLOv3 ~ YOLOv12) try: from ultralytics import YOLO - from yolov8_utils.postprocess import postprocess as postprocess_yolov8 + from yolo_ultralytics_utils.postprocess import postprocess as postprocess_ultralytics except Exception as e: logger.warning(e) - logger.warning('Load yolov8 fail. If you want to use yolov8, please check the installation.') + logger.warning('Load ultralytics fail. If you want to use ultralytics, please check the installation.') pass TRACKER_DICT = { @@ -83,7 +83,7 @@ def get_args(): """general""" parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.') - parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.') + parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.') parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc') parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort') @@ -170,7 +170,7 @@ def main(args, dataset_cfgs): logger.info(f'Now detector is on device {next(model.parameters()).device}') - elif args.detector == 'yolov8': + elif 'ultra' in args.detector: logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}") model = YOLO(args.detector_model_path) @@ -182,6 +182,9 @@ def main(args, dataset_cfgs): else: logger.error(f"detector {args.detector} is not supprted") + logger.error("If you want to use the yolo v8 by ultralytics, please specify the `--detector` \ + as the string including the substring `ultra`, \ + such as `yolo_ultra_v8` or `yolo11_ultralytics`") exit(0) """3. load sequences""" @@ -226,7 +229,7 @@ def main(args, dataset_cfgs): # start timing this frame timer.tic() - if args.detector == 'yolov8': + if 'ultra' in args.detector: img = img.squeeze(0).cpu().numpy() else: @@ -237,8 +240,8 @@ def main(args, dataset_cfgs): # get detector output with torch.no_grad(): - if args.detector == 'yolov8': - output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh) + if 'ultra' in args.detector: + output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh, verbose=False) else: output = model(img) @@ -250,8 +253,8 @@ def main(args, dataset_cfgs): elif args.detector == 'yolov7': output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape) - elif args.detector == 'yolov8': - output = postprocess_yolov8(output) + elif 'ultra' in args.detector: + output = postprocess_ultralytics(output) else: raise NotImplementedError diff --git a/tracker/track_demo.py b/tracker/track_demo.py index 18df472..1e29191 100644 --- a/tracker/track_demo.py +++ b/tracker/track_demo.py @@ -53,10 +53,10 @@ logger.warning('Load yolov7 fail. If you want to use yolov7, please check the installation.') pass -# YOLOv8 modules +# Ultralytics YOLO modules (support YOLOv3 ~ YOLOv12) try: from ultralytics import YOLO - from yolov8_utils.postprocess import postprocess as postprocess_yolov8 + from yolo_ultralytics_utils.postprocess import postprocess as postprocess_ultralytics except Exception as e: logger.warning(e) @@ -83,7 +83,7 @@ def get_args(): """general""" parser.add_argument('--obj', type=str, required=True, default='demo.mp4', help='video or images folder PATH') - parser.add_argument('--detector', type=str, default='yolov8', help='yolov7, yolox, etc.') + parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.') parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc') parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort') @@ -168,7 +168,7 @@ def main(args): logger.info(f'Now detector is on device {next(model.parameters()).device}') - elif args.detector == 'yolov8': + elif 'ultra' in args.detector: logger.info(f"loading detector {args.detector} checkpoint {args.detector_model_path}") model = YOLO(args.detector_model_path) @@ -180,6 +180,9 @@ def main(args): else: logger.error(f"detector {args.detector} is not supprted") + logger.error("If you want to use the yolo v8 by ultralytics, please specify the `--detector` \ + as the string including the substring `ultra`, \ + such as `yolo_ultra_v8` or `yolo11_ultralytics`") exit(0) """3. load sequences""" @@ -200,7 +203,7 @@ def main(args): """4. Tracking""" for frame_idx, (ori_img, img) in process_bar: - if args.detector == 'yolov8': + if 'ultra' in args.detector: img = img.squeeze(0).cpu().numpy() else: @@ -211,7 +214,7 @@ def main(args): # get detector output with torch.no_grad(): - if args.detector == 'yolov8': + if 'ultra' in args.detector: output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh) else: output = model(img) @@ -224,8 +227,8 @@ def main(args): elif args.detector == 'yolov7': output = postprocess_yolov7(output, args.conf_thresh, args.nms_thresh, img.shape[2:], ori_img.shape) - elif args.detector == 'yolov8': - output = postprocess_yolov8(output) + elif 'ultra' in args.detector: + output = postprocess_ultralytics(output) else: raise NotImplementedError diff --git a/tracker/tracker_dataloader.py b/tracker/tracker_dataloader.py index de5da0e..7490cb3 100644 --- a/tracker/tracker_dataloader.py +++ b/tracker/tracker_dataloader.py @@ -44,8 +44,8 @@ def __getitem__(self, idx): return self._getitem_yolox(idx) elif self.model == 'yolov7': return self._getitem_yolov7(idx) - elif self.model == 'yolov8': - return self._getitem_yolov8(idx) + else: + return self._getitem_yolo_ultralytics(idx) def _getitem_yolox(self, idx): @@ -67,7 +67,7 @@ def _getitem_yolov7(self, idx): return torch.from_numpy(img), img_resized - def _getitem_yolov8(self, idx): + def _getitem_yolo_ultralytics(self, idx): img = cv2.imread(osp.join(self.seq_path, self.imgs_in_seq[idx])) # (h, w, c) # img = self._preprocess_yolov8(img) @@ -196,8 +196,8 @@ def __getitem__(self, idx): return self._getitem_yolox(img) elif self.model == 'yolov7': return self._getitem_yolov7(img) - elif self.model == 'yolov8': - return self._getitem_yolov8(img) + else: + return self._getitem_yolo_ultralytics(img) def _getitem_yolox(self, img): @@ -216,7 +216,7 @@ def _getitem_yolov7(self, img): return torch.from_numpy(img), img_resized - def _getitem_yolov8(self, img): + def _getitem_yolo_ultralytics(self, img): # img = self._preprocess_yolov8(img) diff --git a/tracker/trackers/hybridsort_tracker.py b/tracker/trackers/hybridsort_tracker.py index 711d293..0dc54a8 100644 --- a/tracker/trackers/hybridsort_tracker.py +++ b/tracker/trackers/hybridsort_tracker.py @@ -34,7 +34,7 @@ def k_previous_obs(observations, cur_age, k): for i in range(k): dt = k - i if cur_age - dt in observations: - return observations[cur_age-dt] + return observations[cur_age - dt] max_age = max(observations.keys()) return observations[max_age] @@ -128,7 +128,7 @@ def update(self, output_results, img, ori_img): dists = hm_iou_distance(r_tracked_tracklets, detections_second) - score_distance(r_tracked_tracklets, detections_second) - matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) + matches, u_track, u_detection_second = linear_assignment(-1 * dists, thresh=0.0) for itracked, idet in matches: track = r_tracked_tracklets[itracked] det = detections_second[idet] @@ -147,7 +147,7 @@ def update(self, output_results, img, ori_img): dists = hm_iou_distance(atracks=[t.last_observation[: 4] for t in r_tracked_tracklets], # parse bbox directly btracks=[d.tlbr for d in r_detections]) - matches, u_track, u_detection = linear_assignment(dists, thresh=0.5) + matches, u_track, u_detection = linear_assignment(-1 * dists, thresh=0.0) for itracked, idet in matches: track = r_tracked_tracklets[itracked] diff --git a/tracker/trackers/kalman_filters/hybridsort_kalman.py b/tracker/trackers/kalman_filters/hybridsort_kalman.py index 751c185..fd6ff86 100644 --- a/tracker/trackers/kalman_filters/hybridsort_kalman.py +++ b/tracker/trackers/kalman_filters/hybridsort_kalman.py @@ -28,11 +28,6 @@ def __init__(self, ): self.kf.Q[-1, -1] *= 0.01 # score self.kf.Q[-2, -2] *= 0.01 self.kf.Q[5:, 5:] *= 0.01 - - # keep all observations - self.history_obs = [] - self.attr_saved = None - self.observed = False def initialize(self, observation): """ @@ -54,91 +49,14 @@ def predict(self, ): self.kf.predict() - def _freeze(self, ): - """ freeze all the param of Kalman - - """ - self.attr_saved = deepcopy(self.kf.__dict__) - - def _unfreeze(self, ): - """ when observe an lost object again, use the virtual trajectory - - """ - if self.attr_saved is not None: - new_history = deepcopy(self.history_obs) - self.kf.__dict__ = self.attr_saved - - self.history_obs = self.history_obs[:-1] - - occur = [int(d is None) for d in new_history] - indices = np.where(np.array(occur)==0)[0] - index1 = indices[-2] - index2 = indices[-1] - box1 = new_history[index1] - x1, y1, s1, c1, r1 = box1 - w1 = np.sqrt(s1 * r1) - h1 = np.sqrt(s1 / r1) - box2 = new_history[index2] - x2, y2, s2, c2, r2 = box2 - w2 = np.sqrt(s2 * r2) - h2 = np.sqrt(s2 / r2) - time_gap = index2 - index1 - dx = (x2-x1)/time_gap - dy = (y2-y1)/time_gap - dw = (w2-w1)/time_gap - dh = (h2-h1)/time_gap - dc = (c2-c1)/time_gap - - for i in range(index2 - index1): - """ - The default virtual trajectory generation is by linear - motion (constant speed hypothesis), you could modify this - part to implement your own. - """ - x = x1 + (i+1) * dx - y = y1 + (i+1) * dy - w = w1 + (i+1) * dw - h = h1 + (i+1) * dh - s = w * h - r = w / float(h) - - c = c1 + (i+1) * dc - new_box = np.array([x, y, s, c, r]).reshape((5, 1)) - """ - I still use predict-update loop here to refresh the parameters, - but this can be faster by directly modifying the internal parameters - as suggested in the paper. I keep this naive but slow way for - easy read and understanding - """ - self.kf.update(new_box) - if not i == (index2-index1-1): - self.kf.predict() - def update(self, z): """ update step - - For simplicity, directly change the self.kf as OCSORT modify the intrinsic Kalman Args: z: observation x-y-s-a format """ - self.history_obs.append(z) - - if z is None: - if self.observed: - self._freeze() - self.observed = False - - self.kf.update(z) - - else: - if not self.observed: # Get observation, use online smoothing to re-update parameters - self._unfreeze() - - self.kf.update(z) - - self.observed = True + self.kf.update(z) diff --git a/tracker/trackers/kalman_filters/ocsort_kalman.py b/tracker/trackers/kalman_filters/ocsort_kalman.py index 6ded258..c5e01a5 100644 --- a/tracker/trackers/kalman_filters/ocsort_kalman.py +++ b/tracker/trackers/kalman_filters/ocsort_kalman.py @@ -132,6 +132,8 @@ def update(self, z): self.kf.update(z) + return + else: if not self.observed: # Get observation, use online smoothing to re-update parameters self._unfreeze() diff --git a/tracker/trackers/matching.py b/tracker/trackers/matching.py index 6ff8147..c3c66d1 100644 --- a/tracker/trackers/matching.py +++ b/tracker/trackers/matching.py @@ -196,7 +196,7 @@ def buffered_iou_distance(atracks, btracks, level=1): """ def observation_centric_association(tracklets, detections, velocities, previous_obs, vdc_weight=0.05, iou_threshold=0.3): - if(len(tracklets) == 0): + if len(tracklets) == 0 or len(detections) == 0: return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections))) # get numpy format bboxes @@ -237,7 +237,12 @@ def observation_centric_association(tracklets, detections, velocities, previous_ helper func of observation_centric_association (OC Sort) and association_weak_cues (Hybrid Sort) """ def speed_direction_batch(dets, tracks, mode='center'): + # check the dim of dets + if len(dets.shape) == 1: + dets = dets[np.newaxis, ...] + tracks = tracks[..., np.newaxis] + if mode == 'center': CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:,1] + dets[:,3]) / 2.0 CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0 @@ -329,7 +334,7 @@ def hm_iou_distance(atracks, btracks): def association_weak_cues(tracklets, detections, velocities, previous_obs, score_diff_weight=1.0, vdc_weight=0.05, iou_threshold=0.25): - if(len(tracklets) == 0): + if len(tracklets) == 0 or len(detections) == 0: return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections))) # get numpy format bboxes diff --git a/tracker/trackers/ocsort_tracker.py b/tracker/trackers/ocsort_tracker.py index 648c04a..c2084cc 100644 --- a/tracker/trackers/ocsort_tracker.py +++ b/tracker/trackers/ocsort_tracker.py @@ -34,7 +34,7 @@ def k_previous_obs(observations, cur_age, k): for i in range(k): dt = k - i if cur_age - dt in observations: - return observations[cur_age-dt] + return observations[cur_age - dt] max_age = max(observations.keys()) return observations[max_age] diff --git a/tracker/trackers/tracklet.py b/tracker/trackers/tracklet.py index 50c2bc5..5acfa67 100644 --- a/tracker/trackers/tracklet.py +++ b/tracker/trackers/tracklet.py @@ -399,10 +399,10 @@ def get_velocity(self, ): if self.velocity_bl is None: return np.zeros((4, 2)) - return np.vstack([self.velocity_bl, - self.velocity_br, - self.velocity_tl, - self.velocity_tr]) + return np.vstack([self.velocity_tl, + self.velocity_tr, + self.velocity_bl, + self.velocity_br, ]) @property def kalman_score(self, ): diff --git a/tracker/yolov8_utils/data_cfgs/airmot.yaml b/tracker/yolo_ultralytics_utils/data_cfgs/airmot.yaml similarity index 100% rename from tracker/yolov8_utils/data_cfgs/airmot.yaml rename to tracker/yolo_ultralytics_utils/data_cfgs/airmot.yaml diff --git a/tracker/yolov8_utils/data_cfgs/uavdt.yaml b/tracker/yolo_ultralytics_utils/data_cfgs/uavdt.yaml similarity index 100% rename from tracker/yolov8_utils/data_cfgs/uavdt.yaml rename to tracker/yolo_ultralytics_utils/data_cfgs/uavdt.yaml diff --git a/tracker/yolov8_utils/data_cfgs/visdrone.yaml b/tracker/yolo_ultralytics_utils/data_cfgs/visdrone.yaml similarity index 100% rename from tracker/yolov8_utils/data_cfgs/visdrone.yaml rename to tracker/yolo_ultralytics_utils/data_cfgs/visdrone.yaml diff --git a/tracker/yolov8_utils/data_cfgs/visdrone_det.yaml b/tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml similarity index 100% rename from tracker/yolov8_utils/data_cfgs/visdrone_det.yaml rename to tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml diff --git a/tracker/yolov8_utils/postprocess.py b/tracker/yolo_ultralytics_utils/postprocess.py similarity index 100% rename from tracker/yolov8_utils/postprocess.py rename to tracker/yolo_ultralytics_utils/postprocess.py diff --git a/tracker/yolov8_utils/train_yolov8.py b/tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py similarity index 78% rename from tracker/yolov8_utils/train_yolov8.py rename to tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py index c1665d1..50534f9 100644 --- a/tracker/yolov8_utils/train_yolov8.py +++ b/tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py @@ -21,7 +21,7 @@ def main(args): if __name__ == '__main__': - parser = argparse.ArgumentParser("YOLO v8 train parser") + parser = argparse.ArgumentParser("YOLO train parser") parser.add_argument('--model', type=str, default='yolov8s.yaml', help='yaml or pt file') parser.add_argument('--model_weight', type=str, default='yolov8s.pt', help='') @@ -33,4 +33,6 @@ def main(args): args = parser.parse_args() - main(args) \ No newline at end of file + main(args) + +# python tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py --model_weight weights/yolo11m.pt --data_cfg tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml --epochs 30 --batch_size 8 --img_sz 1280 --device 0 \ No newline at end of file From 36581980604c80ebfa5c21c4cdda171f07d0582a Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Mon, 7 Apr 2025 20:48:56 +0800 Subject: [PATCH 5/7] add more reid models and fix bugs --- tracker/track.py | 6 +- tracker/track_demo.py | 5 +- tracker/trackers/basetrack.py | 16 +- tracker/trackers/botsort_tracker.py | 96 ++---- tracker/trackers/byte_tracker.py | 61 +++- tracker/trackers/c_biou_tracker.py | 3 + .../cmc.py} | 0 tracker/trackers/deepsort_tracker.py | 84 +----- tracker/trackers/hybridsort_tracker.py | 5 +- .../trackers/kalman_filters/botsort_kalman.py | 14 +- .../kalman_filters/bytetrack_kalman.py | 7 +- .../kalman_filters/hybridsort_kalman.py | 2 +- .../trackers/kalman_filters/ocsort_kalman.py | 2 +- .../trackers/kalman_filters/sort_kalman.py | 2 +- .../kalman_filters/strongsort_kalman.py | 2 +- .../kalman_filters/ucmctrack_kalman.py | 2 +- tracker/trackers/matching.py | 90 ++++++ tracker/trackers/ocsort_tracker.py | 75 ++++- .../{deepsort_reid.py => DeepsortReID.py} | 6 +- tracker/trackers/reid_models/MobileNetv2.py | 279 ++++++++++++++++++ tracker/trackers/reid_models/OSNet.py | 4 + tracker/trackers/reid_models/ShuffleNetv2.py | 263 +++++++++++++++++ tracker/trackers/reid_models/VehicleNet.py | 122 ++++++++ tracker/trackers/reid_models/engine.py | 209 +++++++++++++ .../trackers/reid_models/load_model_tools.py | 273 ----------------- tracker/trackers/sort_tracker.py | 3 + tracker/trackers/sparse_tracker.py | 36 +-- tracker/trackers/strongsort_tracker.py | 82 +---- tracker/trackers/tracklet.py | 51 +++- tracker/trackers/ucmc_tracker.py | 5 +- 30 files changed, 1247 insertions(+), 558 deletions(-) rename tracker/trackers/{camera_motion_compensation.py => camera_motion_compensation/cmc.py} (100%) rename tracker/trackers/reid_models/{deepsort_reid.py => DeepsortReID.py} (96%) create mode 100644 tracker/trackers/reid_models/MobileNetv2.py create mode 100644 tracker/trackers/reid_models/ShuffleNetv2.py create mode 100644 tracker/trackers/reid_models/VehicleNet.py create mode 100644 tracker/trackers/reid_models/engine.py delete mode 100644 tracker/trackers/reid_models/load_model_tools.py diff --git a/tracker/track.py b/tracker/track.py index 1f91194..849f7de 100644 --- a/tracker/track.py +++ b/tracker/track.py @@ -85,12 +85,14 @@ def get_args(): parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.') parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.') parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc') + parser.add_argument('--reid', action='store_true', help='enable reid model, work in bot, byte, ocsort and hybridsort') parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort') parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.') parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]') - parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks') + parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter detections') + parser.add_argument('--conf_thresh_low', type=float, default=0.1, help='filter low conf detections, used in two-stage association') parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS') parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks') @@ -108,7 +110,7 @@ def get_args(): """other options""" - parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part') + parser.add_argument('--fuse_detection_score', action='store_true', help='fuse detection conf with iou score') parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer') parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist') parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs') diff --git a/tracker/track_demo.py b/tracker/track_demo.py index 1e29191..fc0029d 100644 --- a/tracker/track_demo.py +++ b/tracker/track_demo.py @@ -85,6 +85,7 @@ def get_args(): parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.') parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc') + parser.add_argument('--reid', action='store_true', help='enable reid model, work in bot, byte, ocsort and hybridsort') parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort') parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.') @@ -109,7 +110,7 @@ def get_args(): """other options""" - parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part') + parser.add_argument('--fuse_detection_score', action='store_true', help='fuse detection conf with iou score') parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer') parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist') parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs') @@ -120,6 +121,8 @@ def get_args(): parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate') + """camera parameter""" + parser.add_argument('--camera_parameter_folder', type=str, default='./tracker/cam_param_files', help='folder path of camera parameter files') return parser.parse_args() def main(args): diff --git a/tracker/trackers/basetrack.py b/tracker/trackers/basetrack.py index 19f1aa5..78cd357 100644 --- a/tracker/trackers/basetrack.py +++ b/tracker/trackers/basetrack.py @@ -35,6 +35,10 @@ def end_frame(self): def next_id(): BaseTrack._count += 1 return BaseTrack._count + + @staticmethod + def clear_count(): + BaseTrack._count = 0 def activate(self, *args): raise NotImplementedError @@ -106,12 +110,6 @@ def tlwh_to_xysa(tlwh): ret[3] = tlwh[2] / tlwh[3] return ret - def to_xyah(self): - return self.tlwh_to_xyah(self.tlwh) - - def to_xywh(self): - return self.tlwh_to_xywh(self.tlwh) - @staticmethod def tlbr_to_tlwh(tlbr): ret = np.asarray(tlbr).copy() @@ -124,6 +122,12 @@ def tlwh_to_tlbr(tlwh): ret = np.asarray(tlwh).copy() ret[2:] += ret[:2] return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + def to_xywh(self): + return self.tlwh_to_xywh(self.tlwh) def __repr__(self): return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) diff --git a/tracker/trackers/botsort_tracker.py b/tracker/trackers/botsort_tracker.py index 30204e5..20b541d 100644 --- a/tracker/trackers/botsort_tracker.py +++ b/tracker/trackers/botsort_tracker.py @@ -13,36 +13,10 @@ from .tracklet import Tracklet, Tracklet_w_reid from .matching import * -from .reid_models.OSNet import * -from .reid_models.load_model_tools import load_pretrained_weights -from .reid_models.deepsort_reid import Extractor - -from .camera_motion_compensation import GMC - -REID_MODEL_DICT = { - 'osnet_x1_0': osnet_x1_0, - 'osnet_x0_75': osnet_x0_75, - 'osnet_x0_5': osnet_x0_5, - 'osnet_x0_25': osnet_x0_25, - 'deepsort': Extractor -} - - -def load_reid_model(reid_model, reid_model_path): - - if 'osnet' in reid_model: - func = REID_MODEL_DICT[reid_model] - model = func(num_classes=1, pretrained=False, ) - load_pretrained_weights(model, reid_model_path) - model.cuda().eval() - - elif 'deepsort' in reid_model: - model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True) +# for reid +from .reid_models.engine import load_reid_model, crop_and_resize, select_device - else: - raise NotImplementedError - - return model +from .camera_motion_compensation.cmc import GMC class BotTracker(object): def __init__(self, args, frame_rate=30): @@ -59,60 +33,34 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format - self.with_reid = not args.discard_reid + self.with_reid = args.reid - self.reid_model, self.crop_transforms = None, None + self.reid_model = None if self.with_reid: - self.reid_model = load_reid_model(args.reid_model, args.reid_model_path) - self.crop_transforms = T.Compose([ - # T.ToPILImage(), - # T.Resize(size=(256, 128)), - T.ToTensor(), # (c, 128, 256) - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - + self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device) + self.reid_model.eval() # camera motion compensation module self.gmc = GMC(method='orb', downscale=2, verbose=None) - def reid_preprocess(self, obj_bbox): - """ - preprocess cropped object bboxes - - obj_bbox: np.ndarray, shape=(h_obj, w_obj, c) - - return: - torch.Tensor of shape (c, 128, 256) - """ - obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128)) # shape: (128, 256, c) - - return self.crop_transforms(obj_bbox) + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + @torch.no_grad() def get_feature(self, tlwhs, ori_img): """ get apperance feature of an object tlwhs: shape (num_of_objects, 4) ori_img: original image, np.ndarray, shape(H, W, C) """ - obj_bbox = [] - - for tlwh in tlwhs: - tlwh = list(map(int, tlwh)) - # if any(tlbr_ == -1 for tlbr_ in tlwh): - # print(tlwh) - - tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]]) - obj_bbox.append(tlbr_tensor) - - if not obj_bbox: - return np.array([]) - - obj_bbox = torch.stack(obj_bbox, dim=0) - obj_bbox = obj_bbox.cuda() - - features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim) - return features.cpu().detach().numpy() + if tlwhs.size == 0: + return np.empty((0, 512)) + + crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128)) + features = self.reid_model(crop_bboxes).cpu().numpy() + + return features def update(self, output_results, img, ori_img): """ @@ -181,10 +129,13 @@ def update(self, output_results, img, ori_img): ious_dists = iou_distance(tracklet_pool, detections) ious_dists_mask = (ious_dists > 0.5) # high conf iou + # fuse detection conf into iou dist + if self.args.fuse_detection_score: + ious_dists = fuse_det_score(ious_dists, detections) + if self.with_reid: # mixed cost matrix emb_dists = embedding_distance(tracklet_pool, detections) / 2.0 - raw_emb_dists = emb_dists.copy() emb_dists[emb_dists > 0.25] = 1.0 emb_dists[ious_dists_mask] = 1.0 dists = np.minimum(ious_dists, emb_dists) @@ -238,9 +189,12 @@ def update(self, output_results, img, ori_img): ious_dists = iou_distance(unconfirmed, detections) ious_dists_mask = (ious_dists > 0.5) + # fuse detection conf into iou dist + if self.args.fuse_detection_score: + ious_dists = fuse_det_score(ious_dists, detections) + if self.with_reid: emb_dists = embedding_distance(unconfirmed, detections) / 2.0 - raw_emb_dists = emb_dists.copy() emb_dists[emb_dists > 0.25] = 1.0 emb_dists[ious_dists_mask] = 1.0 dists = np.minimum(ious_dists, emb_dists) diff --git a/tracker/trackers/byte_tracker.py b/tracker/trackers/byte_tracker.py index c820bd4..bd2598a 100644 --- a/tracker/trackers/byte_tracker.py +++ b/tracker/trackers/byte_tracker.py @@ -5,9 +5,14 @@ import numpy as np from collections import deque from .basetrack import BaseTrack, TrackState -from .tracklet import Tracklet +from .tracklet import Tracklet, Tracklet_w_reid from .matching import * +# for reid +import torch +import torchvision.transforms as T +from .reid_models.engine import load_reid_model, crop_and_resize + class ByteTracker(object): def __init__(self, args, frame_rate=30): self.tracked_tracklets = [] # type: list[Tracklet] @@ -23,6 +28,31 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format + # whether to use reid + self.with_reid = args.reid + self.reid_model = None + if self.with_reid: + self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device) + + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + + @torch.no_grad() + def get_feature(self, tlwhs, ori_img): + """ + get apperance feature of an object + tlwhs: shape (num_of_objects, 4) + ori_img: original image, np.ndarray, shape(H, W, C) + """ + + if tlwhs.size == 0: + return np.empty((0, 512)) + + crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128)) + features = self.reid_model(crop_bboxes).cpu().numpy() + + return features + def update(self, output_results, img, ori_img): """ output_results: processed detections (scale to original size) tlbr format @@ -39,7 +69,7 @@ def update(self, output_results, img, ori_img): categories = output_results[:, -1] remain_inds = scores > self.args.conf_thresh - inds_low = scores > 0.1 + inds_low = scores > self.args.conf_thresh_low inds_high = scores < self.args.conf_thresh inds_second = np.logical_and(inds_low, inds_high) @@ -52,10 +82,17 @@ def update(self, output_results, img, ori_img): scores_keep = scores[remain_inds] scores_second = scores[inds_second] + """Step 1: Extract reid features""" + if self.with_reid: + features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img) + if len(dets) > 0: - '''Detections''' - detections = [Tracklet(tlwh, s, cate, motion=self.motion) for - (tlwh, s, cate) in zip(dets, scores_keep, cates)] + if self.with_reid: + detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for + (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)] + else: + detections = [Tracklet(tlwh, s, cate, motion=self.motion) for + (tlwh, s, cate) in zip(dets, scores_keep, cates)] else: detections = [] @@ -76,6 +113,16 @@ def update(self, output_results, img, ori_img): tracklet.predict() dists = iou_distance(tracklet_pool, detections) + + # fuse detection conf into iou dist + if self.args.fuse_detection_score: + dists = fuse_det_score(dists, detections) + + if self.with_reid: + # eq. 11 in Bot-SORT paper, i.e., the common method of + # fusing reid and motion. you can adjust the weight here + emb_dists = embedding_distance(tracklet_pool, detections) + dists = 0.9 * dists + 0.1 * emb_dists matches, u_track, u_detection = linear_assignment(dists, thresh=0.9) @@ -119,6 +166,10 @@ def update(self, output_results, img, ori_img): '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection] dists = iou_distance(unconfirmed, detections) + + # fuse detection conf into iou dist + if self.args.fuse_detection_score: + dists = fuse_det_score(dists, detections) matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7) diff --git a/tracker/trackers/c_biou_tracker.py b/tracker/trackers/c_biou_tracker.py index 222570a..bbbd107 100644 --- a/tracker/trackers/c_biou_tracker.py +++ b/tracker/trackers/c_biou_tracker.py @@ -23,6 +23,9 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + def update(self, output_results, img, ori_img): """ output_results: processed detections (scale to original size) tlbr format diff --git a/tracker/trackers/camera_motion_compensation.py b/tracker/trackers/camera_motion_compensation/cmc.py similarity index 100% rename from tracker/trackers/camera_motion_compensation.py rename to tracker/trackers/camera_motion_compensation/cmc.py diff --git a/tracker/trackers/deepsort_tracker.py b/tracker/trackers/deepsort_tracker.py index e81b145..69475e4 100644 --- a/tracker/trackers/deepsort_tracker.py +++ b/tracker/trackers/deepsort_tracker.py @@ -13,35 +13,8 @@ from .tracklet import Tracklet, Tracklet_w_reid from .matching import * -from .reid_models.OSNet import * -from .reid_models.load_model_tools import load_pretrained_weights -from .reid_models.deepsort_reid import Extractor - -REID_MODEL_DICT = { - 'osnet_x1_0': osnet_x1_0, - 'osnet_x0_75': osnet_x0_75, - 'osnet_x0_5': osnet_x0_5, - 'osnet_x0_25': osnet_x0_25, - 'deepsort': Extractor -} - - -def load_reid_model(reid_model, reid_model_path): - - if 'osnet' in reid_model: - func = REID_MODEL_DICT[reid_model] - model = func(num_classes=1, pretrained=False, ) - load_pretrained_weights(model, reid_model_path) - model.cuda().eval() - - elif 'deepsort' in reid_model: - model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True) - - else: - raise NotImplementedError - - return model - +# for reid +from .reid_models.engine import load_reid_model, crop_and_resize, select_device class DeepSortTracker(object): @@ -59,61 +32,34 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format - self.with_reid = not args.discard_reid + self.with_reid = args.reid - self.reid_model, self.crop_transforms = None, None + self.reid_model = None if self.with_reid: - self.reid_model = load_reid_model(args.reid_model, args.reid_model_path) - self.crop_transforms = T.Compose([ - # T.ToPILImage(), - # T.Resize(size=(256, 128)), - T.ToTensor(), # (c, 128, 256) - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) + self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device) + self.reid_model.eval() + self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128) - - def reid_preprocess(self, obj_bbox): - """ - preprocess cropped object bboxes + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() - obj_bbox: np.ndarray, shape=(h_obj, w_obj, c) - - return: - torch.Tensor of shape (c, 128, 256) - """ - - obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c) - - return self.crop_transforms(obj_bbox) + @torch.no_grad() def get_feature(self, tlwhs, ori_img): """ get apperance feature of an object tlwhs: shape (num_of_objects, 4) ori_img: original image, np.ndarray, shape(H, W, C) """ - obj_bbox = [] + if tlwhs.size == 0: + return np.empty((0, 512)) - for tlwh in tlwhs: - tlwh = list(map(int, tlwh)) + crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128)) + features = self.reid_model(crop_bboxes).cpu().numpy() - # limit to the legal range - tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0) - - tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]]) - - obj_bbox.append(tlbr_tensor) - - if not obj_bbox: - return np.array([]) - - obj_bbox = torch.stack(obj_bbox, dim=0) - obj_bbox = obj_bbox.cuda() - - features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim) - return features.cpu().detach().numpy() + return features def update(self, output_results, img, ori_img): """ diff --git a/tracker/trackers/hybridsort_tracker.py b/tracker/trackers/hybridsort_tracker.py index 0dc54a8..90ab115 100644 --- a/tracker/trackers/hybridsort_tracker.py +++ b/tracker/trackers/hybridsort_tracker.py @@ -27,6 +27,9 @@ def __init__(self, args, frame_rate=30): self.delta_t = 3 + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + @staticmethod def k_previous_obs(observations, cur_age, k): if len(observations) == 0: @@ -54,7 +57,7 @@ def update(self, output_results, img, ori_img): categories = output_results[:, -1] remain_inds = scores > self.args.conf_thresh - inds_low = scores > 0.1 + inds_low = scores > self.args.conf_thresh_low inds_high = scores < self.args.conf_thresh inds_second = np.logical_and(inds_low, inds_high) diff --git a/tracker/trackers/kalman_filters/botsort_kalman.py b/tracker/trackers/kalman_filters/botsort_kalman.py index f9fdfe8..dae5a26 100644 --- a/tracker/trackers/kalman_filters/botsort_kalman.py +++ b/tracker/trackers/kalman_filters/botsort_kalman.py @@ -12,8 +12,8 @@ def __init__(self, ): F = np.eye(state_dim, state_dim) ''' - [1, 0, 0, 0, 1, 0, 0] - [0, 1, 0, 0, 0, 1, 0] + [1, 0, 0, 0, 1, 0, 0, 0] + [0, 1, 0, 0, 0, 1, 0, 0] ... ''' for i in range(state_dim // 2): @@ -54,13 +54,19 @@ def initialize(self, observation): self.kf.P = np.diag(np.square(std)) # P_{0, 0} - def predict(self, ): + def predict(self, is_activated=True): """ predict step x_{n + 1, n} = F * x_{n, n} P_{n + 1, n} = F * P_{n, n} * F^T + Q """ + + if not is_activated: + # if not activated, set the velocity of w and h to 0 + self.kf.x[6] = 0.0 + self.kf.x[7] = 0.0 + std_pos = [ self._std_weight_position * self.kf.x[2], self._std_weight_position * self.kf.x[3], @@ -80,7 +86,7 @@ def update(self, z): """ update step Args: - z: observation x-y-a-h format + z: observation x-y-w-h format K_n = P_{n, n - 1} * H^T * (H P_{n, n - 1} H^T + R)^{-1} x_{n, n} = x_{n, n - 1} + K_n * (z - H * x_{n, n - 1}) diff --git a/tracker/trackers/kalman_filters/bytetrack_kalman.py b/tracker/trackers/kalman_filters/bytetrack_kalman.py index e37d3aa..53f563c 100644 --- a/tracker/trackers/kalman_filters/bytetrack_kalman.py +++ b/tracker/trackers/kalman_filters/bytetrack_kalman.py @@ -52,13 +52,18 @@ def initialize(self, observation): self.kf.P = np.diag(np.square(std)) # P_{0, 0} - def predict(self, ): + def predict(self, is_activated=True): """ predict step x_{n + 1, n} = F * x_{n, n} P_{n + 1, n} = F * P_{n, n} * F^T + Q """ + + if not is_activated: + # if not activated, set the velocity of h to 0 + self.kf.x[7] = 0.0 + std_pos = [ self._std_weight_position * self.kf.x[3], self._std_weight_position * self.kf.x[3], diff --git a/tracker/trackers/kalman_filters/hybridsort_kalman.py b/tracker/trackers/kalman_filters/hybridsort_kalman.py index fd6ff86..5edf320 100644 --- a/tracker/trackers/kalman_filters/hybridsort_kalman.py +++ b/tracker/trackers/kalman_filters/hybridsort_kalman.py @@ -38,7 +38,7 @@ def initialize(self, observation): self.kf.x[:5] = observation - def predict(self, ): + def predict(self, is_activated=True): """ predict step """ diff --git a/tracker/trackers/kalman_filters/ocsort_kalman.py b/tracker/trackers/kalman_filters/ocsort_kalman.py index c5e01a5..58a0cdd 100644 --- a/tracker/trackers/kalman_filters/ocsort_kalman.py +++ b/tracker/trackers/kalman_filters/ocsort_kalman.py @@ -46,7 +46,7 @@ def initialize(self, observation): self.kf.x[:4] = observation - def predict(self, ): + def predict(self, is_activated=True): """ predict step """ diff --git a/tracker/trackers/kalman_filters/sort_kalman.py b/tracker/trackers/kalman_filters/sort_kalman.py index c593bfa..d8cd4fd 100644 --- a/tracker/trackers/kalman_filters/sort_kalman.py +++ b/tracker/trackers/kalman_filters/sort_kalman.py @@ -47,7 +47,7 @@ def initialize(self, observation): self.kf.x[:4] = observation - def predict(self, ): + def predict(self, is_activated=True): """ predict step """ diff --git a/tracker/trackers/kalman_filters/strongsort_kalman.py b/tracker/trackers/kalman_filters/strongsort_kalman.py index dee8394..e3e8f75 100644 --- a/tracker/trackers/kalman_filters/strongsort_kalman.py +++ b/tracker/trackers/kalman_filters/strongsort_kalman.py @@ -52,7 +52,7 @@ def initialize(self, observation): self.kf.P = np.diag(np.square(std)) # P_{0, 0} - def predict(self, ): + def predict(self, is_activated=True): """ predict step x_{n + 1, n} = F * x_{n, n} diff --git a/tracker/trackers/kalman_filters/ucmctrack_kalman.py b/tracker/trackers/kalman_filters/ucmctrack_kalman.py index a8f648f..ef1a11d 100644 --- a/tracker/trackers/kalman_filters/ucmctrack_kalman.py +++ b/tracker/trackers/kalman_filters/ucmctrack_kalman.py @@ -54,7 +54,7 @@ def initialize(self, observation, R): self.kf.R = R - def predict(self): + def predict(self, is_activated=True): self.kf.predict() def update(self, z, R): diff --git a/tracker/trackers/matching.py b/tracker/trackers/matching.py index c3c66d1..d3c16e0 100644 --- a/tracker/trackers/matching.py +++ b/tracker/trackers/matching.py @@ -233,6 +233,96 @@ def observation_centric_association(tracklets, detections, velocities, previous_ return matches, unmatched_a, unmatched_b +""" +observation centric association, with velocity and reid feature, for Deep OC Sort +""" +def compute_aw_max_metric(embed_cost, w_association_emb, bottom=0.5): + ''' + helper func of observation_centric_association_w_reid + ''' + w_emb = np.full_like(embed_cost, w_association_emb) + + for idx in range(embed_cost.shape[0]): + inds = np.argsort(-embed_cost[idx]) + # If there's less than two matches, just keep original weight + if len(inds) < 2: + continue + if embed_cost[idx, inds[0]] == 0: + row_weight = 0 + else: + row_weight = 1 - max( + (embed_cost[idx, inds[1]] / embed_cost[idx, inds[0]]) - bottom, 0 + ) / (1 - bottom) + w_emb[idx] *= row_weight + + for idj in range(embed_cost.shape[1]): + inds = np.argsort(-embed_cost[:, idj]) + # If there's less than two matches, just keep original weight + if len(inds) < 2: + continue + if embed_cost[inds[0], idj] == 0: + col_weight = 0 + else: + col_weight = 1 - max( + (embed_cost[inds[1], idj] / embed_cost[inds[0], idj]) - bottom, 0 + ) / (1 - bottom) + w_emb[:, idj] *= col_weight + + return w_emb * embed_cost + +def observation_centric_association_w_reid(tracklets, detections, velocities, previous_obs, vdc_weight=0.05, iou_threshold=0.3, + aw_off=False, w_assoc_emb=0.5, aw_param=0.5): + + if len(tracklets) == 0 or len(detections) == 0: + return np.empty((0, 2), dtype=int), tuple(range(len(tracklets))), tuple(range(len(detections))) + + # get numpy format bboxes + trk_tlbrs = np.array([track.tlbr for track in tracklets]) + det_tlbrs = np.array([det.tlbr for det in detections]) + det_scores = np.array([det.score for det in detections]) + + iou_matrix = bbox_ious(trk_tlbrs, det_tlbrs) + + # cal embedding distance + embed_cost = 1. - embedding_distance(tracklets, detections, metric='cosine') + + # NOTE for iou < iou_threshold, directly set to -inf, otherwise after solving the linear assignment, + # some matched pairs will have no overlaps + iou_matrix[iou_matrix < iou_threshold] = - 1e5 + + Y, X = speed_direction_batch(det_tlbrs, previous_obs) + inertia_Y, inertia_X = velocities[:,0], velocities[:,1] + inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1) + inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1) + diff_angle_cos = inertia_X * X + inertia_Y * Y + diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1) + diff_angle = np.arccos(diff_angle_cos) + diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi + + valid_mask = np.ones(previous_obs.shape[0]) + valid_mask[np.where(previous_obs[:, 4] < 0)] = 0 + + scores = np.repeat(det_scores[:, np.newaxis], trk_tlbrs.shape[0], axis=1) + valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1) + + angle_diff_cost = (valid_mask * diff_angle) * vdc_weight + angle_diff_cost = angle_diff_cost * scores.T + + + # cal embedding cost, eq. 4~6 in paper + embed_cost[iou_matrix <= 0] = 0 + if not aw_off: + embed_cost = compute_aw_max_metric(embed_cost, w_assoc_emb, bottom=aw_param) + else: + embed_cost *= w_assoc_emb + + matches, unmatched_a, unmatched_b = linear_assignment(- (iou_matrix + angle_diff_cost + embed_cost), thresh=0.0) + + + return matches, unmatched_a, unmatched_b + + + """ helper func of observation_centric_association (OC Sort) and association_weak_cues (Hybrid Sort) """ diff --git a/tracker/trackers/ocsort_tracker.py b/tracker/trackers/ocsort_tracker.py index c2084cc..bdf46c9 100644 --- a/tracker/trackers/ocsort_tracker.py +++ b/tracker/trackers/ocsort_tracker.py @@ -10,6 +10,11 @@ from cython_bbox import bbox_overlaps as bbox_ious +# for reid +import torch +import torchvision.transforms as T +from .reid_models.engine import load_reid_model, crop_and_resize + class OCSortTracker(object): def __init__(self, args, frame_rate=30): self.tracked_tracklets = [] # type: list[Tracklet] @@ -27,6 +32,31 @@ def __init__(self, args, frame_rate=30): self.delta_t = 3 + # whether to use reid + self.with_reid = args.reid + self.reid_model = None + if self.with_reid: + self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device) + + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + + @torch.no_grad() + def get_feature(self, tlwhs, ori_img): + """ + get apperance feature of an object + tlwhs: shape (num_of_objects, 4) + ori_img: original image, np.ndarray, shape(H, W, C) + """ + + if tlwhs.size == 0: + return np.empty((0, 512)) + + crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128)) + features = self.reid_model(crop_bboxes).cpu().numpy() + + return features + @staticmethod def k_previous_obs(observations, cur_age, k): if len(observations) == 0: @@ -67,10 +97,20 @@ def update(self, output_results, img, ori_img): scores_keep = scores[remain_inds] scores_second = scores[inds_second] + """Step 1: Extract reid features""" + if self.with_reid: + features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img) + features_second = self.get_feature(tlwhs=dets_second[:, :4], ori_img=ori_img) + # in deep oc sort, low conf detections also need reid features + if len(dets) > 0: '''Detections''' - detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for - (tlwh, s, cate) in zip(dets, scores_keep, cates)] + if self.with_reid: + detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion, feat=feat) for + (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)] + else: + detections = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for + (tlwh, s, cate) in zip(dets, scores_keep, cates)] else: detections = [] @@ -102,10 +142,17 @@ def update(self, output_results, img, ori_img): tracklet.predict() # Observation centric cost matrix and assignment - matches, u_track, u_detection = observation_centric_association( - tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, - velocities=velocities, previous_obs=k_observations, vdc_weight=0.05 - ) + if self.with_reid: + matches, u_track, u_detection = observation_centric_association_w_reid( + tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, + velocities=velocities, previous_obs=k_observations, vdc_weight=0.05 + ) + + else: + matches, u_track, u_detection = observation_centric_association( + tracklets=tracklet_pool, detections=detections, iou_threshold=0.3, + velocities=velocities, previous_obs=k_observations, vdc_weight=0.05 + ) for itracked, idet in matches: track = tracklet_pool[itracked] @@ -121,16 +168,24 @@ def update(self, output_results, img, ori_img): # association the untrack to the low score detections if len(dets_second) > 0: '''Detections''' - detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for - (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)] + if self.with_reid: + detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion, feat=feat) for + (tlwh, s, cate, feat) in zip(dets_second, scores_second, cates_second, features_second)] + else: + detections_second = [Tracklet_w_velocity(tlwh, s, cate, motion=self.motion) for + (tlwh, s, cate) in zip(dets_second, scores_second, cates_second)] else: detections_second = [] r_tracked_tracklets = [tracklet_pool[i] for i in u_track if tracklet_pool[i].state == TrackState.Tracked] - dists = iou_distance(r_tracked_tracklets, detections_second) + dists = 1. - iou_distance(r_tracked_tracklets, detections_second) + if self.with_reid: # for low confidence detections, we also use reid and add directly + # note that embedding_distance calculate the 1. - cosine, not cosine + emb_dists = 1. - embedding_distance(r_tracked_tracklets, detections_second, metric='cosine') + dists = dists + emb_dists - matches, u_track, u_detection_second = linear_assignment(dists, thresh=0.5) + matches, u_track, u_detection_second = linear_assignment(-1 * dists, thresh=0.0) for itracked, idet in matches: track = r_tracked_tracklets[itracked] det = detections_second[idet] diff --git a/tracker/trackers/reid_models/deepsort_reid.py b/tracker/trackers/reid_models/DeepsortReID.py similarity index 96% rename from tracker/trackers/reid_models/deepsort_reid.py rename to tracker/trackers/reid_models/DeepsortReID.py index 6571a28..8a10133 100644 --- a/tracker/trackers/reid_models/deepsort_reid.py +++ b/tracker/trackers/reid_models/DeepsortReID.py @@ -110,9 +110,9 @@ def forward(self, x): class Extractor(object): - def __init__(self, model_path, use_cuda=True): + def __init__(self, model_path, device=None): self.net = Net(reid=True) - self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + self.device = device state_dict = torch.load(model_path, map_location=torch.device(self.device))[ 'net_dict'] self.net.load_state_dict(state_dict) @@ -146,7 +146,7 @@ def _resize(im, size): return im_batch def __call__(self, im_crops): - if isinstance(im_crops, list): + if isinstance(im_crops, list): # always false because im_crops is a tensor im_batch = self._preprocess(im_crops) else: im_batch = im_crops diff --git a/tracker/trackers/reid_models/MobileNetv2.py b/tracker/trackers/reid_models/MobileNetv2.py new file mode 100644 index 0000000..cbc730b --- /dev/null +++ b/tracker/trackers/reid_models/MobileNetv2.py @@ -0,0 +1,279 @@ +''' +Copied from torch-reid repo https://github.com/KaiyangZhou/deep-person-reid/ +''' + + +from __future__ import division, absolute_import +import torch.utils.model_zoo as model_zoo +from torch import nn +from torch.nn import functional as F + +__all__ = ['mobilenetv2_x1_0', 'mobilenetv2_x1_4'] + +model_urls = { + # 1.0: top-1 71.3 + 'mobilenetv2_x1_0': + 'https://drive.google.com/uc?id=1q1WU2FETRJ3BXcpVtfJUuqq4z3psetds', + # 1.4: top-1 73.9 + 'mobilenetv2_x1_4': + 'https://drive.google.com/uc?id=12uD5FeVqLg9-AFDju2L7SQxjmPb4zpBN', +} + + +class ConvBlock(nn.Module): + """Basic convolutional block. + + convolution (bias discarded) + batch normalization + relu6. + + Args: + in_c (int): number of input channels. + out_c (int): number of output channels. + k (int or tuple): kernel size. + s (int or tuple): stride. + p (int or tuple): padding. + g (int): number of blocked connections from input channels + to output channels (default: 1). + """ + + def __init__(self, in_c, out_c, k, s=1, p=0, g=1): + super(ConvBlock, self).__init__() + self.conv = nn.Conv2d( + in_c, out_c, k, stride=s, padding=p, bias=False, groups=g + ) + self.bn = nn.BatchNorm2d(out_c) + + def forward(self, x): + return F.relu6(self.bn(self.conv(x))) + + +class Bottleneck(nn.Module): + + def __init__(self, in_channels, out_channels, expansion_factor, stride=1): + super(Bottleneck, self).__init__() + mid_channels = in_channels * expansion_factor + self.use_residual = stride == 1 and in_channels == out_channels + self.conv1 = ConvBlock(in_channels, mid_channels, 1) + self.dwconv2 = ConvBlock( + mid_channels, mid_channels, 3, stride, 1, g=mid_channels + ) + self.conv3 = nn.Sequential( + nn.Conv2d(mid_channels, out_channels, 1, bias=False), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x): + m = self.conv1(x) + m = self.dwconv2(m) + m = self.conv3(m) + if self.use_residual: + return x + m + else: + return m + + +class MobileNetV2(nn.Module): + """MobileNetV2. + + Reference: + Sandler et al. MobileNetV2: Inverted Residuals and + Linear Bottlenecks. CVPR 2018. + + Public keys: + - ``mobilenetv2_x1_0``: MobileNetV2 x1.0. + - ``mobilenetv2_x1_4``: MobileNetV2 x1.4. + """ + + def __init__( + self, + num_classes, + width_mult=1, + loss='softmax', + fc_dims=None, + dropout_p=None, + **kwargs + ): + super(MobileNetV2, self).__init__() + self.loss = loss + self.in_channels = int(32 * width_mult) + self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280 + + # construct layers + self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1) + self.conv2 = self._make_layer( + Bottleneck, 1, int(16 * width_mult), 1, 1 + ) + self.conv3 = self._make_layer( + Bottleneck, 6, int(24 * width_mult), 2, 2 + ) + self.conv4 = self._make_layer( + Bottleneck, 6, int(32 * width_mult), 3, 2 + ) + self.conv5 = self._make_layer( + Bottleneck, 6, int(64 * width_mult), 4, 2 + ) + self.conv6 = self._make_layer( + Bottleneck, 6, int(96 * width_mult), 3, 1 + ) + self.conv7 = self._make_layer( + Bottleneck, 6, int(160 * width_mult), 3, 2 + ) + self.conv8 = self._make_layer( + Bottleneck, 6, int(320 * width_mult), 1, 1 + ) + self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1) + + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = self._construct_fc_layer( + fc_dims, self.feature_dim, dropout_p + ) + self.classifier = nn.Linear(self.feature_dim, num_classes) + + self._init_params() + + def _make_layer(self, block, t, c, n, s): + # t: expansion factor + # c: output channels + # n: number of blocks + # s: stride for first layer + layers = [] + layers.append(block(self.in_channels, c, t, s)) + self.in_channels = c + for i in range(1, n): + layers.append(block(self.in_channels, c, t)) + return nn.Sequential(*layers) + + def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): + """Constructs fully connected layer. + + Args: + fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed + input_dim (int): input dimension + dropout_p (float): dropout probability, if None, dropout is unused + """ + if fc_dims is None: + self.feature_dim = input_dim + return None + + assert isinstance( + fc_dims, (list, tuple) + ), 'fc_dims must be either list or tuple, but got {}'.format( + type(fc_dims) + ) + + layers = [] + for dim in fc_dims: + layers.append(nn.Linear(input_dim, dim)) + layers.append(nn.BatchNorm1d(dim)) + layers.append(nn.ReLU(inplace=True)) + if dropout_p is not None: + layers.append(nn.Dropout(p=dropout_p)) + input_dim = dim + + self.feature_dim = fc_dims[-1] + + return nn.Sequential(*layers) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_( + m.weight, mode='fan_out', nonlinearity='relu' + ) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm1d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + x = self.conv6(x) + x = self.conv7(x) + x = self.conv8(x) + x = self.conv9(x) + return x + + def forward(self, x): + f = self.featuremaps(x) + v = self.global_avgpool(f) + v = v.view(v.size(0), -1) + + if self.fc is not None: + v = self.fc(v) + + if not self.training: + return v + + y = self.classifier(v) + + if self.loss == 'softmax': + return y + elif self.loss == 'triplet': + return y, v + else: + raise KeyError("Unsupported loss: {}".format(self.loss)) + + +def init_pretrained_weights(model, model_url): + """Initializes model with pretrained weights. + + Layers that don't match with pretrained layers in name or size are kept unchanged. + """ + pretrain_dict = model_zoo.load_url(model_url) + model_dict = model.state_dict() + pretrain_dict = { + k: v + for k, v in pretrain_dict.items() + if k in model_dict and model_dict[k].size() == v.size() + } + model_dict.update(pretrain_dict) + model.load_state_dict(model_dict) + + +def mobilenetv2_x1_0(num_classes, loss='softmax', pretrained=True, **kwargs): + model = MobileNetV2( + num_classes, + loss=loss, + width_mult=1, + fc_dims=None, + dropout_p=None, + **kwargs + ) + if pretrained: + # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0']) + import warnings + warnings.warn( + 'The imagenet pretrained weights need to be manually downloaded from {}' + .format(model_urls['mobilenetv2_x1_0']) + ) + return model + + +def mobilenetv2_x1_4(num_classes, loss='softmax', pretrained=True, **kwargs): + model = MobileNetV2( + num_classes, + loss=loss, + width_mult=1.4, + fc_dims=None, + dropout_p=None, + **kwargs + ) + if pretrained: + # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4']) + import warnings + warnings.warn( + 'The imagenet pretrained weights need to be manually downloaded from {}' + .format(model_urls['mobilenetv2_x1_4']) + ) + return model \ No newline at end of file diff --git a/tracker/trackers/reid_models/OSNet.py b/tracker/trackers/reid_models/OSNet.py index b77388f..618ad32 100644 --- a/tracker/trackers/reid_models/OSNet.py +++ b/tracker/trackers/reid_models/OSNet.py @@ -1,3 +1,7 @@ +''' +Copied from torch-reid repo https://github.com/KaiyangZhou/deep-person-reid/ +''' + from __future__ import division, absolute_import import warnings import torch diff --git a/tracker/trackers/reid_models/ShuffleNetv2.py b/tracker/trackers/reid_models/ShuffleNetv2.py new file mode 100644 index 0000000..9d9cfd9 --- /dev/null +++ b/tracker/trackers/reid_models/ShuffleNetv2.py @@ -0,0 +1,263 @@ +''' +Copied from torch-reid repo https://github.com/KaiyangZhou/deep-person-reid/ +''' + +from __future__ import division, absolute_import +import torch +import torch.utils.model_zoo as model_zoo +from torch import nn + +__all__ = [ + 'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5', + 'shufflenet_v2_x2_0' +] + +model_urls = { + 'shufflenetv2_x0.5': + 'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth', + 'shufflenetv2_x1.0': + 'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth', + 'shufflenetv2_x1.5': None, + 'shufflenetv2_x2.0': None, +} + + +def channel_shuffle(x, groups): + batchsize, num_channels, height, width = x.data.size() + channels_per_group = num_channels // groups + + # reshape + x = x.view(batchsize, groups, channels_per_group, height, width) + + x = torch.transpose(x, 1, 2).contiguous() + + # flatten + x = x.view(batchsize, -1, height, width) + + return x + + +class InvertedResidual(nn.Module): + + def __init__(self, inp, oup, stride): + super(InvertedResidual, self).__init__() + + if not (1 <= stride <= 3): + raise ValueError('illegal stride value') + self.stride = stride + + branch_features = oup // 2 + assert (self.stride != 1) or (inp == branch_features << 1) + + if self.stride > 1: + self.branch1 = nn.Sequential( + self.depthwise_conv( + inp, inp, kernel_size=3, stride=self.stride, padding=1 + ), + nn.BatchNorm2d(inp), + nn.Conv2d( + inp, + branch_features, + kernel_size=1, + stride=1, + padding=0, + bias=False + ), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + self.branch2 = nn.Sequential( + nn.Conv2d( + inp if (self.stride > 1) else branch_features, + branch_features, + kernel_size=1, + stride=1, + padding=0, + bias=False + ), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + self.depthwise_conv( + branch_features, + branch_features, + kernel_size=3, + stride=self.stride, + padding=1 + ), + nn.BatchNorm2d(branch_features), + nn.Conv2d( + branch_features, + branch_features, + kernel_size=1, + stride=1, + padding=0, + bias=False + ), + nn.BatchNorm2d(branch_features), + nn.ReLU(inplace=True), + ) + + @staticmethod + def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False): + return nn.Conv2d( + i, o, kernel_size, stride, padding, bias=bias, groups=i + ) + + def forward(self, x): + if self.stride == 1: + x1, x2 = x.chunk(2, dim=1) + out = torch.cat((x1, self.branch2(x2)), dim=1) + else: + out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) + + out = channel_shuffle(out, 2) + + return out + + +class ShuffleNetV2(nn.Module): + """ShuffleNetV2. + + Reference: + Ma et al. ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design. ECCV 2018. + + Public keys: + - ``shufflenet_v2_x0_5``: ShuffleNetV2 x0.5. + - ``shufflenet_v2_x1_0``: ShuffleNetV2 x1.0. + - ``shufflenet_v2_x1_5``: ShuffleNetV2 x1.5. + - ``shufflenet_v2_x2_0``: ShuffleNetV2 x2.0. + """ + + def __init__( + self, num_classes, loss, stages_repeats, stages_out_channels, **kwargs + ): + super(ShuffleNetV2, self).__init__() + self.loss = loss + + if len(stages_repeats) != 3: + raise ValueError( + 'expected stages_repeats as list of 3 positive ints' + ) + if len(stages_out_channels) != 5: + raise ValueError( + 'expected stages_out_channels as list of 5 positive ints' + ) + self._stage_out_channels = stages_out_channels + + input_channels = 3 + output_channels = self._stage_out_channels[0] + self.conv1 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + input_channels = output_channels + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + stage_names = ['stage{}'.format(i) for i in [2, 3, 4]] + for name, repeats, output_channels in zip( + stage_names, stages_repeats, self._stage_out_channels[1:] + ): + seq = [InvertedResidual(input_channels, output_channels, 2)] + for i in range(repeats - 1): + seq.append( + InvertedResidual(output_channels, output_channels, 1) + ) + setattr(self, name, nn.Sequential(*seq)) + input_channels = output_channels + + output_channels = self._stage_out_channels[-1] + self.conv5 = nn.Sequential( + nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False), + nn.BatchNorm2d(output_channels), + nn.ReLU(inplace=True), + ) + self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) + + self.classifier = nn.Linear(output_channels, num_classes) + + def featuremaps(self, x): + x = self.conv1(x) + x = self.maxpool(x) + x = self.stage2(x) + x = self.stage3(x) + x = self.stage4(x) + x = self.conv5(x) + return x + + def forward(self, x): + f = self.featuremaps(x) + v = self.global_avgpool(f) + v = v.view(v.size(0), -1) + + if not self.training: + return v + + y = self.classifier(v) + + if self.loss == 'softmax': + return y + elif self.loss == 'triplet': + return y, v + else: + raise KeyError("Unsupported loss: {}".format(self.loss)) + + +def init_pretrained_weights(model, model_url): + """Initializes model with pretrained weights. + + Layers that don't match with pretrained layers in name or size are kept unchanged. + """ + if model_url is None: + import warnings + warnings.warn( + 'ImageNet pretrained weights are unavailable for this model' + ) + return + pretrain_dict = model_zoo.load_url(model_url) + model_dict = model.state_dict() + pretrain_dict = { + k: v + for k, v in pretrain_dict.items() + if k in model_dict and model_dict[k].size() == v.size() + } + model_dict.update(pretrain_dict) + model.load_state_dict(model_dict) + + +def shufflenet_v2_x0_5(num_classes, loss='softmax', pretrained=True, **kwargs): + model = ShuffleNetV2( + num_classes, loss, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs + ) + if pretrained: + init_pretrained_weights(model, model_urls['shufflenetv2_x0.5']) + return model + + +def shufflenet_v2_x1_0(num_classes, loss='softmax', pretrained=True, **kwargs): + model = ShuffleNetV2( + num_classes, loss, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs + ) + if pretrained: + init_pretrained_weights(model, model_urls['shufflenetv2_x1.0']) + return model + + +def shufflenet_v2_x1_5(num_classes, loss='softmax', pretrained=True, **kwargs): + model = ShuffleNetV2( + num_classes, loss, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs + ) + if pretrained: + init_pretrained_weights(model, model_urls['shufflenetv2_x1.5']) + return model + + +def shufflenet_v2_x2_0(num_classes, loss='softmax', pretrained=True, **kwargs): + model = ShuffleNetV2( + num_classes, loss, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs + ) + if pretrained: + init_pretrained_weights(model, model_urls['shufflenetv2_x2.0']) + return model \ No newline at end of file diff --git a/tracker/trackers/reid_models/VehicleNet.py b/tracker/trackers/reid_models/VehicleNet.py new file mode 100644 index 0000000..6a3035e --- /dev/null +++ b/tracker/trackers/reid_models/VehicleNet.py @@ -0,0 +1,122 @@ +''' +Vehicle net for AICity challenge 2020 +https://github.com/layumi/AICIty-reID-2020/ +''' +import torch +import torch.nn as nn +from torch.nn import init +from torchvision.models import resnet50 + +def weights_init_kaiming(m): + classname = m.__class__.__name__ + # print(classname) + if classname.find('Conv') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal. + elif classname.find('Linear') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_out') + init.constant_(m.bias.data, 0.0) + elif classname.find('BatchNorm1d') != -1: + init.normal_(m.weight.data, 1.0, 0.02) + init.constant_(m.bias.data, 0.0) + +def weights_init_classifier(m): + classname = m.__class__.__name__ + if classname.find('Linear') != -1: + init.normal_(m.weight.data, std=0.001) + init.constant_(m.bias.data, 0.0) + +class ClassBlock(nn.Module): + def __init__(self, input_dim, num_classes, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True, return_f = False): + super(ClassBlock, self).__init__() + self.return_f = return_f + add_block = [] + if linear: + add_block += [nn.Linear(input_dim, num_bottleneck)] + else: + num_bottleneck = input_dim + if bnorm: + add_block += [nn.BatchNorm1d(num_bottleneck)] + if relu: + add_block += [nn.LeakyReLU(0.1)] + if droprate>0: + add_block += [nn.Dropout(p=droprate)] + add_block = nn.Sequential(*add_block) + add_block.apply(weights_init_kaiming) + + classifier = [] + classifier += [nn.Linear(num_bottleneck, num_classes)] + classifier = nn.Sequential(*classifier) + classifier.apply(weights_init_classifier) + + self.add_block = add_block + self.classifier = classifier + def forward(self, x): + x = self.add_block(x) + if self.return_f: + f = x + x = self.classifier(x) + return x,f + else: + x = self.classifier(x) + return x + +class ft_net(nn.Module): + + def __init__(self, num_classes, droprate=0.5, stride=2, init_model=None, pool='avg', pretrained=False): + super(ft_net, self).__init__() + model_ft = resnet50(pretrained=pretrained) + # avg pooling to global pooling + if stride == 1: + model_ft.layer4[0].downsample[0].stride = (1,1) + model_ft.layer4[0].conv2.stride = (1,1) + + self.pool = pool + if pool == 'avg+max': # False + model_ft.avgpool2 = nn.AdaptiveAvgPool2d((1,1)) + model_ft.maxpool2 = nn.AdaptiveMaxPool2d((1,1)) + self.model = model_ft + self.classifier = ClassBlock(4096, num_classes, droprate) + elif pool == 'avg': + model_ft.avgpool = nn.AdaptiveAvgPool2d((1,1)) + self.model = model_ft + self.classifier = ClassBlock(2048, num_classes, droprate) + + self.flag = False + if init_model is not None: # False + self.flag = True + self.model = init_model.model + self.pool = init_model.pool + self.classifier.add_block = init_model.classifier.add_block + self.new_dropout = nn.Sequential(nn.Dropout(p=droprate)) + # avg pooling to global pooling + + def forward(self, x): + x = self.model.conv1(x) + x = self.model.bn1(x) + x = self.model.relu(x) + x = self.model.maxpool(x) + x = self.model.layer1(x) + x = self.model.layer2(x) + x = self.model.layer3(x) + x = self.model.layer4(x) + if self.pool == 'avg+max': + x1 = self.model.avgpool2(x) + x2 = self.model.maxpool2(x) + x = torch.cat((x1,x2), dim = 1) + x = x.view(x.size(0), x.size(1)) + elif self.pool == 'avg': + x = self.model.avgpool(x) + x = x.view(x.size(0), x.size(1)) + + + # return embedding in test stage + if not self.training: + return x + + if self.flag: + x = self.classifier.add_block(x) + x = self.new_dropout(x) + x = self.classifier.classifier(x) + else: + x = self.classifier(x) + return x \ No newline at end of file diff --git a/tracker/trackers/reid_models/engine.py b/tracker/trackers/reid_models/engine.py new file mode 100644 index 0000000..45f9e12 --- /dev/null +++ b/tracker/trackers/reid_models/engine.py @@ -0,0 +1,209 @@ +""" +load reid model according to model name and checkpoint +""" + +import os +import os.path as osp + +import pickle +from functools import partial +import torch +import torch.nn as nn + +from collections import OrderedDict +from loguru import logger + +import cv2 +import numpy as np +from .OSNet import * +from .DeepsortReID import Extractor +from .ShuffleNetv2 import * +from .MobileNetv2 import * +from .VehicleNet import ft_net + +# All reid models +REID_MODEL_DICT = { + 'osnet_x1_0': osnet_x1_0, + 'osnet_x0_75': osnet_x0_75, + 'osnet_x0_5': osnet_x0_5, + 'osnet_x0_25': osnet_x0_25, + 'shufflenet_v2_x0_5': shufflenet_v2_x0_5, + 'shufflenet_v2_x1_0': shufflenet_v2_x1_0, + 'shufflenet_v2_x1_5': shufflenet_v2_x1_5, + 'shufflenet_v2_x2_0': shufflenet_v2_x2_0, + 'mobilenetv2_x1_0': mobilenetv2_x1_0, + 'mobilenetv2_x1_4': mobilenetv2_x1_4, + 'vehiclenet': ft_net, + 'deepsort': Extractor +} + + +def load_reid_model(reid_model, reid_model_path, device=None): + """ + load reid model according to model name and checkpoint + """ + + device = select_device(device) + + if not reid_model in REID_MODEL_DICT.keys(): + raise NotImplementedError + + if 'deepsort' in reid_model: + model = REID_MODEL_DICT[reid_model](reid_model_path, device=device) + + else: + func = REID_MODEL_DICT[reid_model] + model = func(num_classes=1, pretrained=False, ) + load_pretrained_weights(model, reid_model_path) + model.to(device).eval() + + return model + +def crop_and_resize(bboxes, ori_img, input_format='tlwh', sz=(128, 256), + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + """ + crop the bounding boxes from original image + + Arguments: + bboxes: np.ndarray: (n, 4) + ori_img: np.ndarray: (h, w, c) + sz: tuple: (w, h) + + Returns: + cropped_img: torch.Tensor: (n, c, h, w) + """ + # clone the bboxes to avoid modifying the original one + bboxes = bboxes.copy() + + # convert bbox to xyxy first + if not input_format == 'tlbr': + bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] + bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] + + img_h, img_w = ori_img.shape[:2] + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + mean_array = torch.tensor(mean, device=device).view(1, 3, 1, 1) + std_array = torch.tensor(std, device=device).view(1, 3, 1, 1) + + num_crops = len(bboxes) + crops = torch.empty((num_crops, 3, sz[1], sz[0]), + dtype=torch.float, device=device) + + for i, box in enumerate(bboxes): + x1, y1, x2, y2 = box.round().astype('int') + x1, y1, x2, y2 = max(0, x1), max(0, y1), min(img_w, x2), min(img_h, y2) + crop = ori_img[y1:y2, x1:x2] + + # Resize and convert color in one step + crop = cv2.resize(crop, sz, interpolation=cv2.INTER_LINEAR) + crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) + + # Convert to tensor and normalize (convert to [0, 1] by dividing by 255 in batch later) + crop = torch.from_numpy(crop).to(device, dtype=torch.float) + crops[i] = torch.permute(crop, (2, 0, 1)) # Change to (C, H, W) + + crops = crops / 255.0 + + # Normalize the crops as experience + crops = (crops - mean_array) / std_array + + return crops + + + +# auxiliary functions +def load_checkpoint(fpath): + """ + loads checkpoint + copy from https://github.com/KaiyangZhou/deep-person-reid + """ + if fpath is None: + raise ValueError('File path is None') + fpath = osp.abspath(osp.expanduser(fpath)) + if not osp.exists(fpath): + raise FileNotFoundError('File is not found at "{}"'.format(fpath)) + map_location = None if torch.cuda.is_available() else 'cpu' + try: + checkpoint = torch.load(fpath, map_location=map_location) + except UnicodeDecodeError: + pickle.load = partial(pickle.load, encoding="latin1") + pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") + checkpoint = torch.load( + fpath, pickle_module=pickle, map_location=map_location + ) + except Exception: + print('Unable to load checkpoint from "{}"'.format(fpath)) + raise + return checkpoint + +def load_pretrained_weights(model, weight_path): + """ + load pretrained weights for OSNet + """ + checkpoint = load_checkpoint(weight_path) + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith('module.'): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + logger.warning( + 'The pretrained weights "{}" cannot be loaded, ' + 'please check the key names manually ' + '(** ignored and continue **)'.format(weight_path) + ) + else: + print( + 'Successfully loaded pretrained weights from "{}"'. + format(weight_path) + ) + if len(discarded_layers) > 0: + print( + '** The following layers are discarded ' + 'due to unmatched keys or layer size: {}'. + format(discarded_layers) + ) + +def select_device(device): + """ set device + same as the function in tracker/tracking_utils/envs.py + Args: + device: str, 'cpu' or '0' or '1,2,3'-like + + Return: + torch.device + + """ + + if device == 'cpu': + logger.info('Use CPU for training') + + elif ',' in device: # multi-gpu + logger.error('Multi-GPU currently not supported') + + else: + logger.info(f'set gpu {device}') + os.environ['CUDA_VISIBLE_DEVICES'] = device + assert torch.cuda.is_available() + + cuda = device != 'cpu' and torch.cuda.is_available() + device = torch.device('cuda:0' if cuda else 'cpu') + return device \ No newline at end of file diff --git a/tracker/trackers/reid_models/load_model_tools.py b/tracker/trackers/reid_models/load_model_tools.py deleted file mode 100644 index 49cb0fe..0000000 --- a/tracker/trackers/reid_models/load_model_tools.py +++ /dev/null @@ -1,273 +0,0 @@ -""" -load checkpoint file -copied from https://github.com/mikel-brostrom/Yolov5_StrongSORT_OSNet -""" -from __future__ import division, print_function, absolute_import -import pickle -import shutil -import os.path as osp -import warnings -from functools import partial -from collections import OrderedDict -import torch -import torch.nn as nn - - -__all__ = [ - 'save_checkpoint', 'load_checkpoint', 'resume_from_checkpoint', - 'open_all_layers', 'open_specified_layers', 'count_num_param', - 'load_pretrained_weights' -] - -def load_checkpoint(fpath): - r"""Loads checkpoint. - - ``UnicodeDecodeError`` can be well handled, which means - python2-saved files can be read from python3. - - Args: - fpath (str): path to checkpoint. - - Returns: - dict - - Examples:: - >>> from torchreid.utils import load_checkpoint - >>> fpath = 'log/my_model/model.pth.tar-10' - >>> checkpoint = load_checkpoint(fpath) - """ - if fpath is None: - raise ValueError('File path is None') - fpath = osp.abspath(osp.expanduser(fpath)) - if not osp.exists(fpath): - raise FileNotFoundError('File is not found at "{}"'.format(fpath)) - map_location = None if torch.cuda.is_available() else 'cpu' - try: - checkpoint = torch.load(fpath, map_location=map_location) - except UnicodeDecodeError: - pickle.load = partial(pickle.load, encoding="latin1") - pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") - checkpoint = torch.load( - fpath, pickle_module=pickle, map_location=map_location - ) - except Exception: - print('Unable to load checkpoint from "{}"'.format(fpath)) - raise - return checkpoint - - -def resume_from_checkpoint(fpath, model, optimizer=None, scheduler=None): - r"""Resumes training from a checkpoint. - - This will load (1) model weights and (2) ``state_dict`` - of optimizer if ``optimizer`` is not None. - - Args: - fpath (str): path to checkpoint. - model (nn.Module): model. - optimizer (Optimizer, optional): an Optimizer. - scheduler (LRScheduler, optional): an LRScheduler. - - Returns: - int: start_epoch. - - Examples:: - >>> from torchreid.utils import resume_from_checkpoint - >>> fpath = 'log/my_model/model.pth.tar-10' - >>> start_epoch = resume_from_checkpoint( - >>> fpath, model, optimizer, scheduler - >>> ) - """ - print('Loading checkpoint from "{}"'.format(fpath)) - checkpoint = load_checkpoint(fpath) - model.load_state_dict(checkpoint['state_dict']) - print('Loaded model weights') - if optimizer is not None and 'optimizer' in checkpoint.keys(): - optimizer.load_state_dict(checkpoint['optimizer']) - print('Loaded optimizer') - if scheduler is not None and 'scheduler' in checkpoint.keys(): - scheduler.load_state_dict(checkpoint['scheduler']) - print('Loaded scheduler') - start_epoch = checkpoint['epoch'] - print('Last epoch = {}'.format(start_epoch)) - if 'rank1' in checkpoint.keys(): - print('Last rank1 = {:.1%}'.format(checkpoint['rank1'])) - return start_epoch - - -def adjust_learning_rate( - optimizer, - base_lr, - epoch, - stepsize=20, - gamma=0.1, - linear_decay=False, - final_lr=0, - max_epoch=100 -): - r"""Adjusts learning rate. - - Deprecated. - """ - if linear_decay: - # linearly decay learning rate from base_lr to final_lr - frac_done = epoch / max_epoch - lr = frac_done*final_lr + (1.-frac_done) * base_lr - else: - # decay learning rate by gamma for every stepsize - lr = base_lr * (gamma**(epoch // stepsize)) - - for param_group in optimizer.param_groups: - param_group['lr'] = lr - - -def set_bn_to_eval(m): - r"""Sets BatchNorm layers to eval mode.""" - # 1. no update for running mean and var - # 2. scale and shift parameters are still trainable - classname = m.__class__.__name__ - if classname.find('BatchNorm') != -1: - m.eval() - - -def open_all_layers(model): - r"""Opens all layers in model for training. - - Examples:: - >>> from torchreid.utils import open_all_layers - >>> open_all_layers(model) - """ - model.train() - for p in model.parameters(): - p.requires_grad = True - - -def open_specified_layers(model, open_layers): - r"""Opens specified layers in model for training while keeping - other layers frozen. - - Args: - model (nn.Module): neural net model. - open_layers (str or list): layers open for training. - - Examples:: - >>> from torchreid.utils import open_specified_layers - >>> # Only model.classifier will be updated. - >>> open_layers = 'classifier' - >>> open_specified_layers(model, open_layers) - >>> # Only model.fc and model.classifier will be updated. - >>> open_layers = ['fc', 'classifier'] - >>> open_specified_layers(model, open_layers) - """ - if isinstance(model, nn.DataParallel): - model = model.module - - if isinstance(open_layers, str): - open_layers = [open_layers] - - for layer in open_layers: - assert hasattr( - model, layer - ), '"{}" is not an attribute of the model, please provide the correct name'.format( - layer - ) - - for name, module in model.named_children(): - if name in open_layers: - module.train() - for p in module.parameters(): - p.requires_grad = True - else: - module.eval() - for p in module.parameters(): - p.requires_grad = False - - -def count_num_param(model): - r"""Counts number of parameters in a model while ignoring ``self.classifier``. - - Args: - model (nn.Module): network model. - - Examples:: - >>> from torchreid.utils import count_num_param - >>> model_size = count_num_param(model) - - .. warning:: - - This method is deprecated in favor of - ``torchreid.utils.compute_model_complexity``. - """ - warnings.warn( - 'This method is deprecated and will be removed in the future.' - ) - - num_param = sum(p.numel() for p in model.parameters()) - - if isinstance(model, nn.DataParallel): - model = model.module - - if hasattr(model, - 'classifier') and isinstance(model.classifier, nn.Module): - # we ignore the classifier because it is unused at test time - num_param -= sum(p.numel() for p in model.classifier.parameters()) - - return num_param - - -def load_pretrained_weights(model, weight_path): - r"""Loads pretrianed weights to model. - - Features:: - - Incompatible layers (unmatched in name or size) will be ignored. - - Can automatically deal with keys containing "module.". - - Args: - model (nn.Module): network model. - weight_path (str): path to pretrained weights. - - Examples:: - >>> from torchreid.utils import load_pretrained_weights - >>> weight_path = 'log/my_model/model-best.pth.tar' - >>> load_pretrained_weights(model, weight_path) - """ - checkpoint = load_checkpoint(weight_path) - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - else: - state_dict = checkpoint - - model_dict = model.state_dict() - new_state_dict = OrderedDict() - matched_layers, discarded_layers = [], [] - - for k, v in state_dict.items(): - if k.startswith('module.'): - k = k[7:] # discard module. - - if k in model_dict and model_dict[k].size() == v.size(): - new_state_dict[k] = v - matched_layers.append(k) - else: - discarded_layers.append(k) - - model_dict.update(new_state_dict) - model.load_state_dict(model_dict) - - if len(matched_layers) == 0: - warnings.warn( - 'The pretrained weights "{}" cannot be loaded, ' - 'please check the key names manually ' - '(** ignored and continue **)'.format(weight_path) - ) - else: - print( - 'Successfully loaded pretrained weights from "{}"'. - format(weight_path) - ) - if len(discarded_layers) > 0: - print( - '** The following layers are discarded ' - 'due to unmatched keys or layer size: {}'. - format(discarded_layers) - ) diff --git a/tracker/trackers/sort_tracker.py b/tracker/trackers/sort_tracker.py index 3d40410..a93e99c 100644 --- a/tracker/trackers/sort_tracker.py +++ b/tracker/trackers/sort_tracker.py @@ -23,6 +23,9 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + def update(self, output_results, img, ori_img): """ output_results: processed detections (scale to original size) tlbr format diff --git a/tracker/trackers/sparse_tracker.py b/tracker/trackers/sparse_tracker.py index 0a4d7ee..39e4fd0 100644 --- a/tracker/trackers/sparse_tracker.py +++ b/tracker/trackers/sparse_tracker.py @@ -13,36 +13,7 @@ from .tracklet import Tracklet, Tracklet_w_depth from .matching import * -from .reid_models.OSNet import * -from .reid_models.load_model_tools import load_pretrained_weights -from .reid_models.deepsort_reid import Extractor - -from .camera_motion_compensation import GMC - -REID_MODEL_DICT = { - 'osnet_x1_0': osnet_x1_0, - 'osnet_x0_75': osnet_x0_75, - 'osnet_x0_5': osnet_x0_5, - 'osnet_x0_25': osnet_x0_25, - 'deepsort': Extractor -} - - -def load_reid_model(reid_model, reid_model_path): - - if 'osnet' in reid_model: - func = REID_MODEL_DICT[reid_model] - model = func(num_classes=1, pretrained=False, ) - load_pretrained_weights(model, reid_model_path) - model.cuda().eval() - - elif 'deepsort' in reid_model: - model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True) - - else: - raise NotImplementedError - - return model +from .camera_motion_compensation.cmc import GMC class SparseTracker(object): def __init__(self, args, frame_rate=30): @@ -62,6 +33,9 @@ def __init__(self, args, frame_rate=30): # camera motion compensation module self.gmc = GMC(method='orb', downscale=2, verbose=None) + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + def get_deep_range(self, obj, step): col = [] for t in obj: @@ -178,7 +152,7 @@ def update(self, output_results, img, ori_img): categories = output_results[:, -1] remain_inds = scores > self.args.conf_thresh - inds_low = scores > 0.1 + inds_low = scores > self.args.conf_thresh_low inds_high = scores < self.args.conf_thresh inds_second = np.logical_and(inds_low, inds_high) diff --git a/tracker/trackers/strongsort_tracker.py b/tracker/trackers/strongsort_tracker.py index e2c9dce..3e759ed 100644 --- a/tracker/trackers/strongsort_tracker.py +++ b/tracker/trackers/strongsort_tracker.py @@ -13,35 +13,8 @@ from .tracklet import Tracklet, Tracklet_w_reid from .matching import * -from .reid_models.OSNet import * -from .reid_models.load_model_tools import load_pretrained_weights -from .reid_models.deepsort_reid import Extractor - -REID_MODEL_DICT = { - 'osnet_x1_0': osnet_x1_0, - 'osnet_x0_75': osnet_x0_75, - 'osnet_x0_5': osnet_x0_5, - 'osnet_x0_25': osnet_x0_25, - 'deepsort': Extractor -} - - -def load_reid_model(reid_model, reid_model_path): - - if 'osnet' in reid_model: - func = REID_MODEL_DICT[reid_model] - model = func(num_classes=1, pretrained=False, ) - load_pretrained_weights(model, reid_model_path) - model.cuda().eval() - - elif 'deepsort' in reid_model: - model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True) - - else: - raise NotImplementedError - - return model - +# for reid +from .reid_models.engine import load_reid_model, crop_and_resize, select_device class StrongSortTracker(object): @@ -61,61 +34,36 @@ def __init__(self, args, frame_rate=30): self.with_reid = not args.discard_reid - self.reid_model, self.crop_transforms = None, None + self.with_reid = args.reid + + self.reid_model = None if self.with_reid: - self.reid_model = load_reid_model(args.reid_model, args.reid_model_path) - self.crop_transforms = T.Compose([ - # T.ToPILImage(), - # T.Resize(size=(256, 128)), - T.ToTensor(), # (c, 128, 256) - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) + self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device) + self.reid_model.eval() self.bbox_crop_size = (64, 128) if 'deepsort' in args.reid_model else (128, 128) self.lambda_ = 0.98 # the coef of cost mix in eq. 10 in paper - - def reid_preprocess(self, obj_bbox): - """ - preprocess cropped object bboxes + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() - obj_bbox: np.ndarray, shape=(h_obj, w_obj, c) - - return: - torch.Tensor of shape (c, 128, 256) - """ - - obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=self.bbox_crop_size) # shape: (h, w, c) - - return self.crop_transforms(obj_bbox) + @torch.no_grad() def get_feature(self, tlwhs, ori_img): """ get apperance feature of an object tlwhs: shape (num_of_objects, 4) ori_img: original image, np.ndarray, shape(H, W, C) """ - obj_bbox = [] - for tlwh in tlwhs: - tlwh = list(map(int, tlwh)) + if tlwhs.size == 0: + return np.empty((0, 512)) - # limit to the legal range - tlwh[0], tlwh[1] = max(tlwh[0], 0), max(tlwh[1], 0) - - tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]]) + crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128)) + features = self.reid_model(crop_bboxes).cpu().numpy() - obj_bbox.append(tlbr_tensor) - - if not obj_bbox: - return np.array([]) - - obj_bbox = torch.stack(obj_bbox, dim=0) - obj_bbox = obj_bbox.cuda() - - features = self.reid_model(obj_bbox) # shape: (num_of_objects, feature_dim) - return features.cpu().detach().numpy() + return features def update(self, output_results, img, ori_img): """ diff --git a/tracker/trackers/tracklet.py b/tracker/trackers/tracklet.py index 5acfa67..413c80a 100644 --- a/tracker/trackers/tracklet.py +++ b/tracker/trackers/tracklet.py @@ -54,7 +54,7 @@ def __init__(self, tlwh, score, category, motion='byte'): self.kalman_filter.initialize(self.convert_func(self._tlwh)) def predict(self): - self.kalman_filter.predict() + self.kalman_filter.predict(is_activated=self.state == TrackState.Tracked) self.time_since_update += 1 def activate(self, frame_id): @@ -68,13 +68,14 @@ def activate(self, frame_id): def re_activate(self, new_track, frame_id, new_id=False): + self.frame_id = frame_id # TODO different convert self.kalman_filter.update(self.convert_func(new_track.tlwh)) self.state = TrackState.Tracked self.is_activated = True - self.frame_id = frame_id + if new_id: self.track_id = self.next_id() self.score = new_track.score @@ -139,13 +140,17 @@ def __init__(self, tlwh, score, category, motion='byte', self.alpha = 0.9 - def update_features(self, feat): + def update_features(self, feat, alpha=None): + ''' + alpha: if specified, use alpha instead of self.alpha + ''' feat /= np.linalg.norm(feat) self.curr_feat = feat if self.smooth_feat is None: self.smooth_feat = feat else: - self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + alpha_ = self.alpha if alpha is None else alpha + self.smooth_feat = alpha_ * self.smooth_feat + (1 - alpha_) * feat self.features.append(feat) self.smooth_feat /= np.linalg.norm(self.smooth_feat) @@ -190,10 +195,11 @@ def update(self, new_track, frame_id): class Tracklet_w_velocity(Tracklet): """ - Tracklet class with center point velocity, for ocsort. + Tracklet class with center point velocity, for ocsort or deep ocsort """ - def __init__(self, tlwh, score, category, motion='byte', delta_t=3): + def __init__(self, tlwh, score, category, motion='byte', delta_t=3, + feat=None, feat_history=50, det_conf_thresh=0.1): super().__init__(tlwh, score, category, motion) self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder @@ -204,6 +210,31 @@ def __init__(self, tlwh, score, category, motion='byte', delta_t=3): self.age = 0 # mark the age + # reid featurs, for deep ocsort + self.features = deque([], maxlen=feat_history) # all features + self.smooth_feat = None # EMA feature + self.curr_feat = None # current feature + if feat is not None: + self.update_features(feat) + + # the dynamic alpha in eq.2~3 in deep ocsort paper + self.alpha_fixed_emb = 0.95 # defult param + trust = (score - det_conf_thresh) / (1. - det_conf_thresh) + self.dynamic_alpha = self.alpha_fixed_emb + (1. - self.alpha_fixed_emb) * (1. - trust) + + def update_features(self, feat, alpha=1.0): + ''' + alpha: if specified, use alpha instead of self.alpha + ''' + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = alpha * self.smooth_feat + (1 - alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + @property def tlwh(self): """ @@ -224,7 +255,7 @@ def speed_direction(bbox1, bbox2): return speed / norm def predict(self): - self.kalman_filter.predict() + self.kalman_filter.predict(is_activated=self.state == TrackState.Tracked) self.age += 1 self.time_since_update += 1 @@ -262,6 +293,10 @@ def update(self, new_track, frame_id): self.observations[self.age] = new_observation self.history_observations.append(new_observation) + # update reid features + if self.curr_feat is not None: + self.update_features(self.curr_feat, alpha=new_track.dynamic_alpha) + class Tracklet_w_velocity_four_corner(Tracklet): """ @@ -325,7 +360,7 @@ def speed_direction(point1, point2): return speed / norm def predict(self): - self.kalman_filter.predict() + self.kalman_filter.predict(is_activated=self.state == TrackState.Tracked) self.age += 1 self.time_since_update += 1 diff --git a/tracker/trackers/ucmc_tracker.py b/tracker/trackers/ucmc_tracker.py index 58b9c3c..4930fa6 100644 --- a/tracker/trackers/ucmc_tracker.py +++ b/tracker/trackers/ucmc_tracker.py @@ -45,6 +45,9 @@ def __init__(self, args, frame_rate=30): Tracklet_w_UCMC.A = A Tracklet_w_UCMC.InvA = InvA + # once init, clear all trackid count to avoid large id + BaseTrack.clear_count() + def _read_cam_param(self, ): """ read the camera param, borrowed from https://github.com/corfyi/UCMCTrack @@ -106,7 +109,7 @@ def update(self, output_results, img, ori_img): categories = output_results[:, -1] remain_inds = scores > self.args.conf_thresh - inds_low = scores > 0.1 + inds_low = scores > self.args.conf_thresh_low inds_high = scores < self.args.conf_thresh inds_second = np.logical_and(inds_low, inds_high) From 96bc57592907e18a2dbb374ade1bc85a1f0de92b Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Mon, 7 Apr 2025 23:06:22 +0800 Subject: [PATCH 6/7] update readme and optimize the save_dir --- README.md | 37 +++++++++++++++++++++---- README_CN.md | 49 +++++++++++++++++++++++++-------- tracker/track.py | 8 ++++-- tracker/track_demo.py | 1 + tracker/tracking_utils/tools.py | 11 ++++---- 5 files changed, 80 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index fe40c3a..f8b62ab 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ Latest News +- ***2025.4.7*** Add more Re-ID modules (ShuffleNet, VehicleNet, MobileNet), fix some bugs (such as abandon bbox aspect ratio updating if the tracklet is not activated), and add some functions (customized low filter threshold, fuse detection score, etc.) - ***2025.4.3*** Support the newest ultralytics version (YOLO v3 ~ v12) and fix some bugs of hybrid sort. ## ❤️ Introduction @@ -35,9 +36,10 @@ and the tracker supports: - SORT - DeepSORT -- ByteTrack ([ECCV2022](https://arxiv.org/pdf/2110.06864)) -- Bot-SORT ([arxiv2206](https://arxiv.org/pdf/2206.14651.pdf)) +- ByteTrack ([ECCV2022](https://arxiv.org/pdf/2110.06864)) and ByetTrack-ReID +- Bot-SORT ([arxiv2206](https://arxiv.org/pdf/2206.14651.pdf)) and Bot-SORT-ReID - OCSORT ([CVPR2023](https://openaccess.thecvf.com/content/CVPR2023/papers/Cao_Observation-Centric_SORT_Rethinking_SORT_for_Robust_Multi-Object_Tracking_CVPR_2023_paper.pdf)) +- DeepOCSORT ([ICIP2023](https://arxiv.org/abs/2302.11813)) - C_BIoU Track ([arxiv2211](https://arxiv.org/pdf/2211.14317v2.pdf)) - Strong SORT ([IEEE TMM 2023](https://arxiv.org/pdf/2202.13514)) - Sparse Track ([arxiv 2306](https://arxiv.org/pdf/2306.05238)) @@ -46,8 +48,17 @@ and the tracker supports: and the reid model supports: +Pedestrain Re-ID: - OSNet - Extractor from DeepSort +- ShuffleNet +- MobileNet + +Vehicle Re-ID: +- VehicleNet ([AICIty-reID-2020](https://github.com/layumi/AICIty-reID-2020)) + +> **checkpoitns of some Re-ID models**: [Baidu Disk](https://pan.baidu.com/s/1QbVoBz4mPpf4Qsqq1PYXkQ) Code: c655 + The highlights are: - Supporting more trackers than MMTracking @@ -155,11 +166,11 @@ python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-s python tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py --model_weight weights/yolo11m.pt --data_cfg tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml --epochs 30 --batch_size 8 --img_sz 1280 --device 0 ``` - +> The training of Re-ID model please refer to its original paper or github repo. The pedestrain Re-ID model such as ShuffleNet, OSNet please refer to [torchreid](https://github.com/KaiyangZhou/deep-person-reid), the vehicle Re-ID model please refer to [AICIty-reID-2020](https://github.com/layumi/AICIty-reID-2020). ### 😊 Tracking ! -If you only want to run a demo: +**If you only want to run a demo**: ```bash python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov7 or yolo_ultra} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images @@ -176,13 +187,27 @@ For example: python tracker/track_demo.py --obj M0203.mp4 --detector yolo_ultra_v8 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt --save_images ``` -If you want to run trackers on dataset: +**If you want to run trackers on dataset**: ```bash python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolo_ultra_v8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} ``` -For example: +In addition, you can also specify + +`--reid`: Enable the reid model (currently useful for ByteTrack, BoT-SORT, OCSORT) + +`--reid_model`: Which model to use: Refer to `REID_MODEL_DICT` in `tracker/trackers/reid_models/engine.py` to select + +`--reid_model_path`: Loaded re-identification model weight path + +`--conf_thresh_low`: For two-stage association models (ByteTrack, BoT-SORT, etc.), the minimum confidence threshold (default 0.1) + +`--fuse_detection_score`: If added, the IoU value and the detection confidence value are fused, for example, the source code of BoT-SORT does this + +`--save_images`: Save the result image + +***Examples of tracking algorithms***: - SORT: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` diff --git a/README_CN.md b/README_CN.md index 3920b75..fc33815 100644 --- a/README_CN.md +++ b/README_CN.md @@ -15,6 +15,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ 最近更新 +- ***2025.4.7*** 增加更多Re-ID模型 (ShuffleNet, VehicleNet, MobileNet), 修复一些bug (例如在轨迹为非活动状态时停止更新边界框长宽), 增加一些小功能 (例如可以修改两阶段关联策略的最低阈值,原来是固定的0.1; 增加将IoU和检测置信度融合的选项) - ***2025.4.3*** 增加了ultralytics库最新版本的支持,修复了hybrid sort中的一些bug. @@ -30,9 +31,10 @@ git checkout v2 # change to v2 branch !! - SORT - DeepSORT -- ByteTrack ([ECCV2022](https://arxiv.org/pdf/2110.06864)) -- Bot-SORT ([arxiv2206](https://arxiv.org/pdf/2206.14651.pdf)) +- ByteTrack ([ECCV2022](https://arxiv.org/pdf/2110.06864)) 以及 ByetTrack-ReID +- Bot-SORT ([arxiv2206](https://arxiv.org/pdf/2206.14651.pdf)) 以及 Bot-SORT-ReID - OCSORT ([CVPR2023](https://openaccess.thecvf.com/content/CVPR2023/papers/Cao_Observation-Centric_SORT_Rethinking_SORT_for_Robust_Multi-Object_Tracking_CVPR_2023_paper.pdf)) +- DeepOCSORT ([ICIP2023](https://arxiv.org/abs/2302.11813)) - C_BIoU Track ([arxiv2211](https://arxiv.org/pdf/2211.14317v2.pdf)) - Strong SORT ([IEEE TMM 2023](https://arxiv.org/pdf/2202.13514)) - Sparse Track ([arxiv 2306](https://arxiv.org/pdf/2306.05238)) @@ -41,8 +43,17 @@ git checkout v2 # change to v2 branch !! REID模型支持: +行人重识别模型: - OSNet -- DeepSORT中的 +- Extractor from DeepSort +- ShuffleNet +- MobileNet + +车辆重识别模型: + +- VehicleNet ([AICIty-reID-2020](https://github.com/layumi/AICIty-reID-2020)) + +> **部分重识别模型的权重**: [百度网盘](https://pan.baidu.com/s/1QbVoBz4mPpf4Qsqq1PYXkQ) 提取码: c655 亮点包括: - 支持的跟踪器比MMTracking多 @@ -51,10 +62,6 @@ REID模型支持: ![gif](figure/demo.gif) -## 🗺️ 路线图 - -- [ x ] Add UCMC Track -- [] Add more ReID modules. ## 🔨 安装 @@ -154,11 +161,11 @@ python train_aux.py --dataset visdrone --workers 8 --device <$GPU_id$> --batch-s python tracker/yolo_ultralytics_utils/train_yolo_ultralytics.py --model_weight weights/yolo11m.pt --data_cfg tracker/yolo_ultralytics_utils/data_cfgs/visdrone_det.yaml --epochs 30 --batch_size 8 --img_sz 1280 --device 0 ``` - +> 关于重识别模型的训练, 请先参照对应模型的原论文或代码. 行人重识别模型例如 ShuffleNet, OSNet 参考 [torchreid](https://github.com/KaiyangZhou/deep-person-reid), 车辆重识别模型参考 [AICIty-reID-2020](https://github.com/layumi/AICIty-reID-2020). ### 😊 跟踪! -如果你只是想运行一个demo: +**如果你只是想运行一个demo**: ```bash python tracker/track_demo.py --obj ${video path or images folder path} --detector ${yolox, yolov7 or yolo_ultra} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} --save_images @@ -175,13 +182,27 @@ python tracker/track_demo.py --obj ${video path or images folder path} --detecto python tracker/track_demo.py --obj M0203.mp4 --detector yolov8 --tracker deepsort --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt --save_images ``` -如果你想在数据集上测试: +**如果你想在数据集上测试**: ```bash python tracker/track.py --dataset ${dataset name, related with the yaml file} --detector ${yolox, yolo_ultra_v8 or yolov7} --tracker ${tracker name} --kalman_format ${kalman format, sort, byte, ...} --detector_model_path ${detector weight path} ``` -例如: +此外, 还可以指定 + +`--reid`: 启用reid模型(目前对ByteTrack, BoT-SORT, OCSORT有用) + +`--reid_model`: 采用那种模型: 参照`tracker/trackers/reid_models/engine.py`中的`REID_MODEL_DICT`选取 + +`--reid_model_path`: 加载的重识别模型权重路径 + +`--conf_thresh_low`: 对于两阶段关联模型(ByteTrack, BoT-SORT等), 最低置信度阈值(默认0.1) + +`--fuse_detection_score`: 如果加上, 就融合IoU的值和检测置信度的值, 例如BoT-SORT的源码是这样做的 + +`--save_images`: 保存结果图片 + +***各种跟踪算法运行示例***: - SORT: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker sort --kalman_format sort --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt ` @@ -189,12 +210,18 @@ python tracker/track.py --dataset ${dataset name, related with the yaml file} -- - ByteTrack: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` +- ByteTrack-ReID: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker bytetrack --kalman_format byte --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt --reid --reid_model osnet_x0_25 --reid_model_path weights/osnet_x0_25.pth` + - OCSort: `python tracker/track.py --dataset mot17 --detector yolox --tracker ocsort --kalman_format ocsort --detector_model_path weights/bytetrack_m_mot17.pth.tar` +- DeepOCSORT: `python tracker/track.py --dataset mot17 --detector yolox --tracker ocsort --kalman_format ocsort --detector_model_path weights/bytetrack_m_mot17.pth.tar --reid --reid_model shufflenet_v2_x1_0 --reid_model_path shufflenetv2_x1-5666bf0f80.pth` + - C-BIoU Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v8 --tracker c_bioutrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` - BoT-SORT: `python tracker/track.py --dataset uavdt --detector yolox --tracker botsort --kalman_format bot --detector_model_path weights/yolox_m_uavdt_50epochs.pth.tar` +- BoT-SORT-ReID: `python tracker/track.py --dataset uavdt --detector yolox --tracker botsort --kalman_format bot --detector_model_path weights/yolox_m_uavdt_50epochs.pth.tar --reid --reid_model vehiclenet --reid_model_path vehicle_net_resnet50.pth` + - Strong SORT: `python tracker/track.py --dataset visdrone_part --detector yolo_ultra_v8 --tracker strongsort --kalman_format strongsort --detector_model_path weights/yolov8l_VisDroneDet_35epochs_20230605.pt` - Sparse Track: `python tracker/track.py --dataset uavdt --detector yolo_ultra_v11 --tracker sparsetrack --kalman_format bot --detector_model_path weights/yolov8l_UAVDT_60epochs_20230509.pt` diff --git a/tracker/track.py b/tracker/track.py index 849f7de..a08b6d8 100644 --- a/tracker/track.py +++ b/tracker/track.py @@ -115,7 +115,7 @@ def get_args(): parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist') parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs') - parser.add_argument('--save_dir', type=str, default='track_results/{dataset_name}/{split}') + parser.add_argument('--save_dir', type=str, default='track_results/{tracker_name}/{dataset_name}/{split}') parser.add_argument('--save_images', action='store_true', help='save tracking results (image)') parser.add_argument('--save_videos', action='store_true', help='save tracking results (video)') @@ -200,7 +200,9 @@ def main(args, dataset_cfgs): logger.info(f'Total {len(seqs)} seqs will be tracked: {seqs}') - save_dir = args.save_dir.format(dataset_name=args.dataset, split=SPLIT) + save_dir = args.save_dir.format(tracker_name=args.tracker, dataset_name=args.dataset, split=SPLIT) + if not os.path.exists(save_dir): + os.makedirs(save_dir) """4. Tracking""" @@ -292,7 +294,7 @@ def main(args, dataset_cfgs): plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls], save_dir=os.path.join(save_dir, 'vis_results')) - save_results(folder_name=os.path.join(args.dataset, SPLIT), + save_results(save_dir=save_dir, seq_name=seq, results=results) diff --git a/tracker/track_demo.py b/tracker/track_demo.py index fc0029d..62c85db 100644 --- a/tracker/track_demo.py +++ b/tracker/track_demo.py @@ -92,6 +92,7 @@ def get_args(): parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]') parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks') + parser.add_argument('--conf_thresh_low', type=float, default=0.1, help='filter low conf detections, used in two-stage association') parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS') parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks') diff --git a/tracker/tracking_utils/tools.py b/tracker/tracking_utils/tools.py index 9c42d46..05dc5fa 100644 --- a/tracker/tracking_utils/tools.py +++ b/tracker/tracking_utils/tools.py @@ -2,7 +2,7 @@ import cv2 import os -def save_results(folder_name, seq_name, results, data_type='default'): +def save_results(save_dir, seq_name, results, data_type='default'): """ write results to txt file @@ -12,15 +12,14 @@ def save_results(folder_name, seq_name, results, data_type='default'): """ assert len(results) - if not os.path.exists(f'./track_results/{folder_name}'): + if not os.path.exists(save_dir): + os.makedirs(save_dir) - os.makedirs(f'./track_results/{folder_name}') - - with open(os.path.join('./track_results', folder_name, seq_name + '.txt'), 'w') as f: + with open(os.path.join(save_dir, seq_name + '.txt'), 'w') as f: for frame_id, target_ids, tlwhs, clses, scores in results: for id, tlwh, score in zip(target_ids, tlwhs, scores): f.write(f'{frame_id},{id},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{score:.2f},-1,-1,-1\n') f.close() - return folder_name + return save_dir From ae281420953a40230a5c97689bcabd53614f9e65 Mon Sep 17 00:00:00 2001 From: JackWoo0831 Date: Mon, 14 Apr 2025 16:21:53 +0800 Subject: [PATCH 7/7] fix some bugs of demo and sort --- README.md | 1 + README_CN.md | 1 + tracker/track_demo.py | 20 +++++++++++--------- tracker/tracker_dataloader.py | 2 ++ tracker/trackers/deepsort_tracker.py | 2 +- tracker/trackers/sort_tracker.py | 6 ++++++ 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index f8b62ab..ec3b613 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ Latest News +- ***2025.4.14*** Fix some minor bugs [issue#144](https://github.com/JackWoo0831/Yolov7-tracker/issues/144), and fix the lost tracklet bugs in sort. - ***2025.4.7*** Add more Re-ID modules (ShuffleNet, VehicleNet, MobileNet), fix some bugs (such as abandon bbox aspect ratio updating if the tracklet is not activated), and add some functions (customized low filter threshold, fuse detection score, etc.) - ***2025.4.3*** Support the newest ultralytics version (YOLO v3 ~ v12) and fix some bugs of hybrid sort. diff --git a/README_CN.md b/README_CN.md index fc33815..23aee18 100644 --- a/README_CN.md +++ b/README_CN.md @@ -15,6 +15,7 @@ git checkout v2 # change to v2 branch !! ## 🗺️ 最近更新 +- ***2025.4.14*** 修复[issue#144](https://github.com/JackWoo0831/Yolov7-tracker/issues/144)中提到的一些bug,修复sort对丢失轨迹处理的bug. - ***2025.4.7*** 增加更多Re-ID模型 (ShuffleNet, VehicleNet, MobileNet), 修复一些bug (例如在轨迹为非活动状态时停止更新边界框长宽), 增加一些小功能 (例如可以修改两阶段关联策略的最低阈值,原来是固定的0.1; 增加将IoU和检测置信度融合的选项) - ***2025.4.3*** 增加了ultralytics库最新版本的支持,修复了hybrid sort中的一些bug. diff --git a/tracker/track_demo.py b/tracker/track_demo.py index 62c85db..449de54 100644 --- a/tracker/track_demo.py +++ b/tracker/track_demo.py @@ -134,6 +134,11 @@ def main(args): if args.save_videos: args.save_images = True + save_dir = args.save_dir + save_obj_name = args.obj.replace('/', '_') # save seq name w.r.t. the obj name, but replace '/' + logger.info(f'demo result will be saved in {os.path.join(save_dir), save_obj_name}.txt') + logger.info(f"images and videos (if you enable it) will be saved in {os.path.join(save_dir, save_obj_name, 'vis_results')}") + """2. load detector""" device = select_device(args.device) @@ -196,9 +201,6 @@ def main(args): tracker = TRACKER_DICT[args.tracker](args, ) - - save_dir = args.save_dir - process_bar = enumerate(data_loader) process_bar = tqdm(process_bar, total=len(data_loader), ncols=150) @@ -219,7 +221,7 @@ def main(args): # get detector output with torch.no_grad(): if 'ultra' in args.detector: - output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh) + output = model.predict(img, conf=args.conf_thresh, iou=args.nms_thresh, verbose=False) else: output = model(img) @@ -264,14 +266,14 @@ def main(args): if args.save_images: plot_img(img=ori_img, frame_id=frame_idx, results=[cur_tlwh, cur_id, cur_cls], - save_dir=os.path.join(save_dir, 'vis_results')) + save_dir=os.path.join(save_dir, save_obj_name, 'vis_results')) - save_results(folder_name=os.path.join(save_dir, 'txt_results'), - seq_name='demo', - results=results) + save_results(save_dir=save_dir, + seq_name=save_obj_name, + results=results) if args.save_videos: - save_video(images_path=os.path.join(save_dir, 'vis_results')) + save_video(images_path=os.path.join(save_dir, save_obj_name, 'vis_results')) logger.info(f'save video done') if __name__ == '__main__': diff --git a/tracker/tracker_dataloader.py b/tracker/tracker_dataloader.py index 7490cb3..a4a6647 100644 --- a/tracker/tracker_dataloader.py +++ b/tracker/tracker_dataloader.py @@ -185,6 +185,8 @@ def __init__(self, file_name, img_size=[640, 640], model='yolox', legacy_yolox=T self.legacy = legacy_yolox + self.other_param = kwargs + def __getitem__(self, idx): if not self.is_video: diff --git a/tracker/trackers/deepsort_tracker.py b/tracker/trackers/deepsort_tracker.py index 69475e4..a9d43f6 100644 --- a/tracker/trackers/deepsort_tracker.py +++ b/tracker/trackers/deepsort_tracker.py @@ -32,7 +32,7 @@ def __init__(self, args, frame_rate=30): self.motion = args.kalman_format - self.with_reid = args.reid + self.with_reid = True # In deepsort, reid model must be included self.reid_model = None if self.with_reid: diff --git a/tracker/trackers/sort_tracker.py b/tracker/trackers/sort_tracker.py index a93e99c..8d32353 100644 --- a/tracker/trackers/sort_tracker.py +++ b/tracker/trackers/sort_tracker.py @@ -86,6 +86,12 @@ def update(self, output_results, img, ori_img): track.re_activate(det, self.frame_id, new_id=False) refind_tracklets.append(track) + for it in u_track: + track = tracklet_pool[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_tracklets.append(track) + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' detections = [detections[i] for i in u_detection]