JackWoo0831
diff --git a/‎tracker/track.py‎
Lines changed: 4 additions & 2 deletions b/‎tracker/track.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎tracker/track_demo.py‎
Lines changed: 4 additions & 1 deletion b/‎tracker/track_demo.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎tracker/trackers/basetrack.py‎
Lines changed: 10 additions & 6 deletions b/‎tracker/trackers/basetrack.py‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎tracker/trackers/botsort_tracker.py‎
Lines changed: 25 additions & 71 deletions b/‎tracker/trackers/botsort_tracker.py‎
Lines changed: 25 additions & 71 deletions
diff --git a/‎tracker/trackers/byte_tracker.py‎
Lines changed: 56 additions & 5 deletions b/‎tracker/trackers/byte_tracker.py‎
Lines changed: 56 additions & 5 deletions
diff --git a/‎tracker/trackers/c_biou_tracker.py‎
Lines changed: 3 additions & 0 deletions b/‎tracker/trackers/c_biou_tracker.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tracker/trackers/camera_motion_compensation.py‎ renamed to ‎tracker/trackers/camera_motion_compensation/cmc.py‎ b/‎tracker/trackers/camera_motion_compensation.py‎ renamed to ‎tracker/trackers/camera_motion_compensation/cmc.py‎
@@ -85,12 +85,14 @@ def get_args():
     parser.add_argument('--dataset', type=str, default='visdrone_part', help='visdrone, mot17, etc.')
     parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.')
     parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
+    parser.add_argument('--reid', action='store_true', help='enable reid model, work in bot, byte, ocsort and hybridsort')
     parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
 
     parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
     parser.add_argument('--img_size', type=int, default=1280, help='image size, [h, w]')
 
-    parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter tracks')
+    parser.add_argument('--conf_thresh', type=float, default=0.2, help='filter detections')
+    parser.add_argument('--conf_thresh_low', type=float, default=0.1, help='filter low conf detections, used in two-stage association')
     parser.add_argument('--nms_thresh', type=float, default=0.7, help='thresh for NMS')
     parser.add_argument('--iou_thresh', type=float, default=0.5, help='IOU thresh to filter tracks')
 
@@ -108,7 +110,7 @@ def get_args():
 
 
     """other options"""
-    parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
+    parser.add_argument('--fuse_detection_score', action='store_true', help='fuse detection conf with iou score')
     parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
     parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
     parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
 
@@ -85,6 +85,7 @@ def get_args():
 
     parser.add_argument('--detector', type=str, default='yolo_ultralytics_v8', help='yolov7, yolox, etc.')
     parser.add_argument('--tracker', type=str, default='sort', help='sort, deepsort, etc')
+    parser.add_argument('--reid', action='store_true', help='enable reid model, work in bot, byte, ocsort and hybridsort')
     parser.add_argument('--reid_model', type=str, default='osnet_x0_25', help='osnet or deppsort')
 
     parser.add_argument('--kalman_format', type=str, default='default', help='use what kind of Kalman, sort, deepsort, byte, etc.')
@@ -109,7 +110,7 @@ def get_args():
 
 
     """other options"""
-    parser.add_argument('--discard_reid', action='store_true', help='discard reid model, only work in bot-sort etc. which need a reid part')
+    parser.add_argument('--fuse_detection_score', action='store_true', help='fuse detection conf with iou score')
     parser.add_argument('--track_buffer', type=int, default=30, help='tracking buffer')
     parser.add_argument('--gamma', type=float, default=0.1, help='param to control fusing motion and apperance dist')
     parser.add_argument('--min_area', type=float, default=150, help='use to filter small bboxs')
@@ -120,6 +121,8 @@ def get_args():
 
     parser.add_argument('--track_eval', type=bool, default=True, help='Use TrackEval to evaluate')
 
+    """camera parameter"""
+    parser.add_argument('--camera_parameter_folder', type=str, default='./tracker/cam_param_files', help='folder path of camera parameter files')
     return parser.parse_args()
 
 def main(args):
 
@@ -35,6 +35,10 @@ def end_frame(self):
     def next_id():
         BaseTrack._count += 1
         return BaseTrack._count
+    
+    @staticmethod
+    def clear_count():
+        BaseTrack._count = 0
 
     def activate(self, *args):
         raise NotImplementedError
@@ -106,12 +110,6 @@ def tlwh_to_xysa(tlwh):
         ret[3] = tlwh[2] / tlwh[3]
         return ret
 
-    def to_xyah(self):
-        return self.tlwh_to_xyah(self.tlwh)
-    
-    def to_xywh(self):
-        return self.tlwh_to_xywh(self.tlwh)
-
     @staticmethod
     def tlbr_to_tlwh(tlbr):
         ret = np.asarray(tlbr).copy()
@@ -124,6 +122,12 @@ def tlwh_to_tlbr(tlwh):
         ret = np.asarray(tlwh).copy()
         ret[2:] += ret[:2]
         return ret
+    
+    def to_xyah(self):
+        return self.tlwh_to_xyah(self.tlwh)
+    
+    def to_xywh(self):
+        return self.tlwh_to_xywh(self.tlwh)
 
     def __repr__(self):
         return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)
@@ -13,36 +13,10 @@
 from .tracklet import Tracklet, Tracklet_w_reid
 from .matching import *
 
-from .reid_models.OSNet import *
-from .reid_models.load_model_tools import load_pretrained_weights
-from .reid_models.deepsort_reid import Extractor
-
-from .camera_motion_compensation import GMC
-
-REID_MODEL_DICT = {
-    'osnet_x1_0': osnet_x1_0, 
-    'osnet_x0_75': osnet_x0_75, 
-    'osnet_x0_5': osnet_x0_5, 
-    'osnet_x0_25': osnet_x0_25, 
-    'deepsort': Extractor
-}
-
-
-def load_reid_model(reid_model, reid_model_path):
-    
-    if 'osnet' in reid_model:
-        func = REID_MODEL_DICT[reid_model]
-        model = func(num_classes=1, pretrained=False, )
-        load_pretrained_weights(model, reid_model_path)
-        model.cuda().eval()
-        
-    elif 'deepsort' in reid_model:
-        model = REID_MODEL_DICT[reid_model](reid_model_path, use_cuda=True)
+# for reid
+from .reid_models.engine import load_reid_model, crop_and_resize, select_device
 
-    else:
-        raise NotImplementedError
-    
-    return model
+from .camera_motion_compensation.cmc import GMC
 
 class BotTracker(object):
     def __init__(self, args, frame_rate=30):
@@ -59,60 +33,34 @@ def __init__(self, args, frame_rate=30):
 
         self.motion = args.kalman_format
 
-        self.with_reid = not args.discard_reid
+        self.with_reid = args.reid
 
-        self.reid_model, self.crop_transforms = None, None 
+        self.reid_model = None
         if self.with_reid:
-            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path)
-            self.crop_transforms = T.Compose([
-            # T.ToPILImage(),
-            # T.Resize(size=(256, 128)),
-            T.ToTensor(),  # (c, 128, 256)
-            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-        ])
-            
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device)
+            self.reid_model.eval()            
 
         # camera motion compensation module
         self.gmc = GMC(method='orb', downscale=2, verbose=None)
 
-    def reid_preprocess(self, obj_bbox):
-        """
-        preprocess cropped object bboxes 
-        
-        obj_bbox: np.ndarray, shape=(h_obj, w_obj, c)
-
-        return: 
-        torch.Tensor of shape (c, 128, 256)
-        """
-        obj_bbox = cv2.resize(obj_bbox.astype(np.float32) / 255.0, dsize=(128, 128))  # shape: (128, 256, c)
-
-        return self.crop_transforms(obj_bbox)
+        # once init, clear all trackid count to avoid large id
+        BaseTrack.clear_count()
 
+    @torch.no_grad()
     def get_feature(self, tlwhs, ori_img):
         """
         get apperance feature of an object
         tlwhs: shape (num_of_objects, 4)
         ori_img: original image, np.ndarray, shape(H, W, C)
         """
-        obj_bbox = []
-
-        for tlwh in tlwhs:
-            tlwh = list(map(int, tlwh))
-            # if any(tlbr_ == -1 for tlbr_ in tlwh):
-            #     print(tlwh)
-            
-            tlbr_tensor = self.reid_preprocess(ori_img[tlwh[1]: tlwh[1] + tlwh[3], tlwh[0]: tlwh[0] + tlwh[2]])
-            obj_bbox.append(tlbr_tensor)
-        
-        if not obj_bbox:
-            return np.array([])
-        
-        obj_bbox = torch.stack(obj_bbox, dim=0)
-        obj_bbox = obj_bbox.cuda()  
-        
-        features = self.reid_model(obj_bbox)  # shape: (num_of_objects, feature_dim)
-        return features.cpu().detach().numpy()
 
+        if tlwhs.size == 0:
+            return np.empty((0, 512))
+
+        crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128))
+        features = self.reid_model(crop_bboxes).cpu().numpy()
+
+        return features
 
     def update(self, output_results, img, ori_img):
         """
@@ -181,10 +129,13 @@ def update(self, output_results, img, ori_img):
         ious_dists = iou_distance(tracklet_pool, detections)
         ious_dists_mask = (ious_dists > 0.5)  # high conf iou
 
+        # fuse detection conf into iou dist
+        if self.args.fuse_detection_score:
+            ious_dists = fuse_det_score(ious_dists, detections)
+
         if self.with_reid:
             # mixed cost matrix
             emb_dists = embedding_distance(tracklet_pool, detections) / 2.0
-            raw_emb_dists = emb_dists.copy()
             emb_dists[emb_dists > 0.25] = 1.0
             emb_dists[ious_dists_mask] = 1.0
             dists = np.minimum(ious_dists, emb_dists)
@@ -238,9 +189,12 @@ def update(self, output_results, img, ori_img):
         ious_dists = iou_distance(unconfirmed, detections)
         ious_dists_mask = (ious_dists > 0.5)
 
+        # fuse detection conf into iou dist
+        if self.args.fuse_detection_score:
+            ious_dists = fuse_det_score(ious_dists, detections)
+
         if self.with_reid:
             emb_dists = embedding_distance(unconfirmed, detections) / 2.0
-            raw_emb_dists = emb_dists.copy()
             emb_dists[emb_dists > 0.25] = 1.0
             emb_dists[ious_dists_mask] = 1.0
             dists = np.minimum(ious_dists, emb_dists)
 
@@ -5,9 +5,14 @@
 import numpy as np
 from collections import deque
 from .basetrack import BaseTrack, TrackState
-from .tracklet import Tracklet
+from .tracklet import Tracklet, Tracklet_w_reid
 from .matching import *
 
+# for reid
+import torch
+import torchvision.transforms as T
+from .reid_models.engine import load_reid_model, crop_and_resize
+
 class ByteTracker(object):
     def __init__(self, args, frame_rate=30):
         self.tracked_tracklets = []  # type: list[Tracklet]
@@ -23,6 +28,31 @@ def __init__(self, args, frame_rate=30):
 
         self.motion = args.kalman_format
 
+        # whether to use reid 
+        self.with_reid = args.reid
+        self.reid_model = None
+        if self.with_reid:
+            self.reid_model = load_reid_model(args.reid_model, args.reid_model_path, device=args.device)        
+
+        # once init, clear all trackid count to avoid large id
+        BaseTrack.clear_count()
+
+    @torch.no_grad()
+    def get_feature(self, tlwhs, ori_img):
+        """
+        get apperance feature of an object
+        tlwhs: shape (num_of_objects, 4)
+        ori_img: original image, np.ndarray, shape(H, W, C)
+        """
+
+        if tlwhs.size == 0:
+            return np.empty((0, 512))
+
+        crop_bboxes = crop_and_resize(tlwhs, ori_img, input_format='tlwh', sz=(64, 128))
+        features = self.reid_model(crop_bboxes).cpu().numpy()
+
+        return features
+
     def update(self, output_results, img, ori_img):
         """
         output_results: processed detections (scale to original size) tlbr format
@@ -39,7 +69,7 @@ def update(self, output_results, img, ori_img):
         categories = output_results[:, -1]
 
         remain_inds = scores > self.args.conf_thresh
-        inds_low = scores > 0.1
+        inds_low = scores > self.args.conf_thresh_low
         inds_high = scores < self.args.conf_thresh
 
         inds_second = np.logical_and(inds_low, inds_high)
@@ -52,10 +82,17 @@ def update(self, output_results, img, ori_img):
         scores_keep = scores[remain_inds]
         scores_second = scores[inds_second]
 
+        """Step 1: Extract reid features"""
+        if self.with_reid:
+            features_keep = self.get_feature(tlwhs=dets[:, :4], ori_img=ori_img)
+
         if len(dets) > 0:
-            '''Detections'''
-            detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
-                          (tlwh, s, cate) in zip(dets, scores_keep, cates)]
+            if self.with_reid:
+                detections = [Tracklet_w_reid(tlwh, s, cate, motion=self.motion, feat=feat) for
+                            (tlwh, s, cate, feat) in zip(dets, scores_keep, cates, features_keep)]
+            else:
+                detections = [Tracklet(tlwh, s, cate, motion=self.motion) for
+                            (tlwh, s, cate) in zip(dets, scores_keep, cates)]
         else:
             detections = []
 
@@ -76,6 +113,16 @@ def update(self, output_results, img, ori_img):
             tracklet.predict()
 
         dists = iou_distance(tracklet_pool, detections)
+
+        # fuse detection conf into iou dist
+        if self.args.fuse_detection_score:
+            dists = fuse_det_score(dists, detections)
+
+        if self.with_reid:
+            # eq. 11 in Bot-SORT paper, i.e., the common method of 
+            # fusing reid and motion. you can adjust the weight here
+            emb_dists = embedding_distance(tracklet_pool, detections)
+            dists = 0.9 * dists + 0.1 * emb_dists
 
         matches, u_track, u_detection = linear_assignment(dists, thresh=0.9)
 
@@ -119,6 +166,10 @@ def update(self, output_results, img, ori_img):
         '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
         detections = [detections[i] for i in u_detection]
         dists = iou_distance(unconfirmed, detections)
+        
+        # fuse detection conf into iou dist
+        if self.args.fuse_detection_score:
+            dists = fuse_det_score(dists, detections)
 
         matches, u_unconfirmed, u_detection = linear_assignment(dists, thresh=0.7)
 
 
@@ -23,6 +23,9 @@ def __init__(self, args, frame_rate=30):
 
         self.motion = args.kalman_format
 
+        # once init, clear all trackid count to avoid large id
+        BaseTrack.clear_count()
+
     def update(self, output_results, img, ori_img):
         """
         output_results: processed detections (scale to original size) tlbr format