alex4men
diff --git a/‎README.md‎
Lines changed: 183 additions & 55 deletions b/‎README.md‎
Lines changed: 183 additions & 55 deletions
diff --git a/‎cvpr19.sh‎
Lines changed: 15 additions & 0 deletions b/‎cvpr19.sh‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎demo.py‎
Lines changed: 42 additions & 11 deletions b/‎demo.py‎
Lines changed: 42 additions & 11 deletions
diff --git a/‎run_tracker.m‎
Lines changed: 28 additions & 5 deletions b/‎run_tracker.m‎
Lines changed: 28 additions & 5 deletions
diff --git a/‎util.py‎
Lines changed: 147 additions & 12 deletions b/‎util.py‎
Lines changed: 147 additions & 12 deletions
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+# set these variables according to your setup
+seq_dir=/path/to/cvpr19/train # base directory of the split (cvpr19/train, cvpr19/test etc.)
+results_dir=results/cvpr19    # output directory, will be created if not existing
+
+
+mkdir -p ${results_dir}
+
+options="-v KCF2 -sl 0.3 -sh 0.8 -si 0.4 -tm 5 --ttl 20 -hr 0.3 -fmt motchallenge"
+for seq in $(ls $seq_dir); do
+  echo $seq
+  python demo.py -f ${seq_dir}/${seq}/img1/{:06d}.jpg -d ${seq_dir}/${seq}/det/det.txt \
+  -o ${results_dir}/${seq}.txt ${options}
+done
@@ -7,33 +7,53 @@
 # Written by Erik Bochinski
 # ---------------------------------------------------------
 
-from time import time
 import argparse
 
 from iou_tracker import track_iou
+from viou_tracker import track_viou
 from util import load_mot, save_to_csv
 
 
 def main(args):
-    detections = load_mot(args.detection_path)
+    formats = ['motchallenge', 'visdrone']
+    assert args.format in formats, "format '{}' unknown supported formats are: {}".format(args.format, formats)
 
-    start = time()
-    tracks = track_iou(detections, args.sigma_l, args.sigma_h, args.sigma_iou, args.t_min)
-    end = time()
+    with_classes = False
+    if args.format == 'visdrone':
+        with_classes = True
+    detections = load_mot(args.detection_path, nms_overlap_thresh=args.nms, with_classes=with_classes)
 
-    num_frames = len(detections)
-    print("finished at " + str(int(num_frames / (end - start))) + " fps!")
+    if args.visual:
+        tracks = track_viou(args.frames_path, detections, args.sigma_l, args.sigma_h, args.sigma_iou, args.t_min,
+                            args.ttl, args.visual, args.keep_upper_height_ratio)
+    else:
+        if with_classes:
+            # track_viou can also be used without visual tracking, but note that the speed will be much slower compared
+            # to track_iou. However, this way supports the optimal LAP solving and the handling of multiple object classes:
+            tracks = track_viou(args.frames_path, detections, args.sigma_l, args.sigma_h, args.sigma_iou, args.t_min,
+                                args.ttl, 'NONE', args.keep_upper_height_ratio)
+        else:
+            tracks = track_iou(detections, args.sigma_l, args.sigma_h, args.sigma_iou, args.t_min)
 
-    save_to_csv(args.output_path, tracks)
+    save_to_csv(args.output_path, tracks, fmt=args.format)
 
 
 if __name__ == '__main__':
-
-    parser = argparse.ArgumentParser(description="IOU Tracker demo script")
+    parser = argparse.ArgumentParser(description="IOU/V-IOU Tracker demo script")
+    parser.add_argument('-v', '--visual', type=str, help="visual tracker for V-IOU. Currently supported are "
+                                                         "[BOOSTING, MIL, KCF, KCF2, TLD, MEDIANFLOW, GOTURN, NONE] "
+                                                         "see README.md for furthert details")
+    parser.add_argument('-hr', '--keep_upper_height_ratio', type=float, default=1.0,
+                        help="Ratio of height of the object to track to the total height of the object "
+                             "for visual tracking. e.g. upper 30%%")
+    parser.add_argument('-f', '--frames_path', type=str,
+                        help="sequence frames with format '/path/to/frames/frame_{:04d}.jpg' where '{:04d}' will "
+                             "be replaced with the frame id. (zero_padded to 4 digits, use {:05d} for 5 etc.)")
     parser.add_argument('-d', '--detection_path', type=str, required=True,
                         help="full path to CSV file containing the detections")
     parser.add_argument('-o', '--output_path', type=str, required=True,
-                        help="output path to store the tracking results (MOT challenge devkit compatible format)")
+                        help="output path to store the tracking results "
+                             "(MOT challenge/Visdrone devkit compatible format)")
     parser.add_argument('-sl', '--sigma_l', type=float, default=0,
                         help="low detection threshold")
     parser.add_argument('-sh', '--sigma_h', type=float, default=0.5,
@@ -42,6 +62,17 @@ def main(args):
                         help="intersection-over-union threshold")
     parser.add_argument('-tm', '--t_min', type=float, default=2,
                         help="minimum track length")
+    parser.add_argument('-ttl', '--ttl', type=int, default=1,
+                        help="time to live parameter for v-iou")
+    parser.add_argument('-nms', '--nms', type=float, default=None,
+                        help="nms for loading multi-class detections")
+    parser.add_argument('-fmt', '--format', type=str, default='motchallenge',
+                        help='format of the detections [motchallenge, visdrone]')
 
     args = parser.parse_args()
+    assert not args.visual or args.visual and args.frames_path, "visual tracking requires video frames, " \
+                                                                "please specify via --frames_path"
+
+    assert 0.0 < args.keep_upper_height_ratio <= 1.0, "only values between 0 and 1 are allowed"
+    assert args.nms is None or 0.0 <= args.nms <= 1.0, "only values between 0 and 1 are allowed"
     main(args)
@@ -1,11 +1,31 @@
 function [stateInfo, speed] = run_tracker(curSequence, baselinedetections)
 %% tracker configuration
+ttl = 0;
+tracker_type = '';
 
-%% Mask R-CNN (frcnn)
+%% v-iou tracker configurations
+% %% Mask R-CNN (frcnn)
 sigma_l = 0;
-sigma_h = 0.95;
+sigma_h = 0.98;
 sigma_iou = 0.6;
-t_min = 7;
+t_min = 13;
+ttl=6;
+tracker_type='KCF2';
+
+% %% CompACT
+%sigma_l = 0;
+%sigma_h = 0.3;
+%sigma_iou = 0.5;
+%t_min = 3;
+%ttl=12;
+%tracker_type='KCF2';
+
+%% iou tracker configurations
+% %% Mask R-CNN (frcnn)
+%sigma_l = 0;
+%sigma_h = 0.95;
+%sigma_iou = 0.6;
+%t_min = 7;
 
 % %% R-CNN
 % sigma_l = 0;
@@ -33,8 +53,11 @@
 
 %% running tracking algorithm
 try
-    ret = py.iou_tracker.track_iou_matlab_wrapper(py.numpy.array(baselinedetections(:).'), sigma_l, sigma_h, sigma_iou, t_min);
-    
+    if strcmp(tracker_type, '')
+        ret = py.iou_tracker.track_iou_matlab_wrapper(py.numpy.array(baselinedetections(:).'), sigma_l, sigma_h, sigma_iou, t_min);
+    else
+        ret = py.viou_tracker.track_viou_matlab_wrapper(curSequence.imgFolder, py.numpy.array(baselinedetections(:).'), sigma_l, sigma_h, sigma_iou, t_min, ttl, tracker_type);
+    end
 catch exception
     disp('error while calling the python tracking module: ')
     disp(' ')
 
@@ -9,21 +9,33 @@
 import csv
 
 
-def load_mot(detections):
+visdrone_classes = {'car': 4, 'bus': 9, 'truck': 6, 'pedestrian': 1, 'van': 5}
+
+
+def load_mot(detections, nms_overlap_thresh=None, with_classes=True, nms_per_class=False):
     """
     Loads detections stored in a mot-challenge like formatted CSV or numpy array (fieldNames = ['frame', 'id', 'x', 'y',
     'w', 'h', 'score']).
 
     Args:
-        detections
+        detections (str, numpy.ndarray): path to csv file containing the detections or numpy array containing them.
+        nms_overlap_thresh (float, optional): perform non-maximum suppression on the input detections with this thrshold.
+                                              no nms is performed if this parameter is not specified.
+        with_classes (bool, optional): indicates if the detections have classes or not. set to false for motchallange.
+        nms_per_class (bool, optional): perform non-maximum suppression for each class separately
 
     Returns:
         list: list containing the detections for each frame.
     """
+    if nms_overlap_thresh:
+        assert with_classes, "currently only works with classes available"
 
     data = []
     if type(detections) is str:
         raw = np.genfromtxt(detections, delimiter=',', dtype=np.float32)
+        if np.isnan(raw).all():
+            raw = np.genfromtxt(detections, delimiter=' ', dtype=np.float32)
+
     else:
         # assume it is an array
         assert isinstance(detections, np.ndarray), "only numpy arrays or *.csv paths are supported as detections."
@@ -34,16 +46,127 @@ def load_mot(detections):
         idx = raw[:, 0] == i
         bbox = raw[idx, 2:6]
         bbox[:, 2:4] += bbox[:, 0:2]  # x1, y1, w, h -> x1, y1, x2, y2
+        bbox -= 1  # correct 1,1 matlab offset
         scores = raw[idx, 6]
+
+        if with_classes:
+            classes = raw[idx, 7]
+
+            bbox_filtered = None
+            scores_filtered = None
+            classes_filtered = None
+            for coi in visdrone_classes:
+                cids = classes==visdrone_classes[coi]
+                if nms_per_class and nms_overlap_thresh:
+                    bbox_tmp, scores_tmp = nms(bbox[cids], scores[cids], nms_overlap_thresh)
+                else:
+                    bbox_tmp, scores_tmp = bbox[cids], scores[cids]
+
+                if bbox_filtered is None:
+                    bbox_filtered = bbox_tmp
+                    scores_filtered = scores_tmp
+                    classes_filtered = [coi]*bbox_filtered.shape[0]
+                elif len(bbox_tmp) > 0:
+                    bbox_filtered = np.vstack((bbox_filtered, bbox_tmp))
+                    scores_filtered = np.hstack((scores_filtered, scores_tmp))
+                    classes_filtered += [coi] * bbox_tmp.shape[0]
+
+            if bbox_filtered is not None:
+                bbox = bbox_filtered
+                scores = scores_filtered
+                classes = classes_filtered
+
+            if nms_per_class is False and nms_overlap_thresh:
+                bbox, scores, classes = nms(bbox, scores, nms_overlap_thresh, np.array(classes))
+
+        else:
+            classes = ['pedestrian']*bbox.shape[0]
+
         dets = []
-        for bb, s in zip(bbox, scores):
-            dets.append({'bbox': (bb[0], bb[1], bb[2], bb[3]), 'score': s})
+        for bb, s, c in zip(bbox, scores, classes):
+            dets.append({'bbox': (bb[0], bb[1], bb[2], bb[3]), 'score': s, 'class': c})
         data.append(dets)
 
     return data
 
 
-def save_to_csv(out_path, tracks):
+def nms(boxes, scores, overlapThresh, classes=None):
+    """
+    perform non-maximum suppression. based on Malisiewicz et al.
+    Args:
+        boxes (numpy.ndarray): boxes to process
+        scores (numpy.ndarray): corresponding scores for each box
+        overlapThresh (float): overlap threshold for boxes to merge
+        classes (numpy.ndarray, optional): class ids for each box.
+
+    Returns:
+        (tuple): tuple containing:
+
+        boxes (list): nms boxes
+        scores (list): nms scores
+        classes (list, optional): nms classes if specified
+    """
+    # # if there are no boxes, return an empty list
+    # if len(boxes) == 0:
+    #     return [], [], [] if classes else [], []
+
+    # if the bounding boxes integers, convert them to floats --
+    # this is important since we'll be doing a bunch of divisions
+    if boxes.dtype.kind == "i":
+        boxes = boxes.astype("float")
+
+    if scores.dtype.kind == "i":
+        scores = scores.astype("float")
+
+    # initialize the list of picked indexes
+    pick = []
+
+    # grab the coordinates of the bounding boxes
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    #score = boxes[:, 4]
+    # compute the area of the bounding boxes and sort the bounding
+    # boxes by the bottom-right y-coordinate of the bounding box
+    area = (x2 - x1 + 1) * (y2 - y1 + 1)
+    idxs = np.argsort(scores)
+
+    # keep looping while some indexes still remain in the indexes
+    # list
+    while len(idxs) > 0:
+        # grab the last index in the indexes list and add the
+        # index value to the list of picked indexes
+        last = len(idxs) - 1
+        i = idxs[last]
+        pick.append(i)
+
+        # find the largest (x, y) coordinates for the start of
+        # the bounding box and the smallest (x, y) coordinates
+        # for the end of the bounding box
+        xx1 = np.maximum(x1[i], x1[idxs[:last]])
+        yy1 = np.maximum(y1[i], y1[idxs[:last]])
+        xx2 = np.minimum(x2[i], x2[idxs[:last]])
+        yy2 = np.minimum(y2[i], y2[idxs[:last]])
+
+        # compute the width and height of the bounding box
+        w = np.maximum(0, xx2 - xx1 + 1)
+        h = np.maximum(0, yy2 - yy1 + 1)
+
+        # compute the ratio of overlap
+        overlap = (w * h) / area[idxs[:last]]
+
+        # delete all indexes from the index list that have
+        idxs = np.delete(idxs, np.concatenate(([last],
+                                               np.where(overlap > overlapThresh)[0])))
+
+    if classes is not None:
+        return boxes[pick], scores[pick], classes[pick]
+    else:
+        return boxes[pick], scores[pick]
+
+
+def save_to_csv(out_path, tracks, fmt='motchallenge'):
     """
     Saves tracks to a CSV file.
 
@@ -53,22 +176,34 @@ def save_to_csv(out_path, tracks):
     """
 
     with open(out_path, "w") as ofile:
-        field_names = ['frame', 'id', 'x', 'y', 'w', 'h', 'score', 'wx', 'wy', 'wz']
+        if fmt == 'motchallenge':
+            field_names = ['frame', 'id', 'x', 'y', 'w', 'h', 'score', 'wx', 'wy', 'wz']
+        elif fmt == 'visdrone':
+            field_names = ['frame', 'id', 'x', 'y', 'w', 'h', 'score', 'object_category', 'truncation', 'occlusion']
+        else:
+            raise ValueError("unknown format type '{}'".format(fmt))
 
         odict = csv.DictWriter(ofile, field_names)
         id_ = 1
         for track in tracks:
             for i, bbox in enumerate(track['bboxes']):
                 row = {'id': id_,
                        'frame': track['start_frame'] + i,
-                       'x': bbox[0],
-                       'y': bbox[1],
+                       'x': bbox[0]+1,
+                       'y': bbox[1]+1,
                        'w': bbox[2] - bbox[0],
                        'h': bbox[3] - bbox[1],
-                       'score': track['max_score'],
-                       'wx': -1,
-                       'wy': -1,
-                       'wz': -1}
+                       'score': track['max_score']}
+                if fmt == 'motchallenge':
+                    row['wx'] = -1
+                    row['wy'] = -1
+                    row['wz'] = -1
+                elif fmt == 'visdrone':
+                    row['object_category'] = visdrone_classes[track['class']]
+                    row['truncation'] = -1
+                    row['occlusion'] = -1
+                else:
+                    raise ValueError("unknown format type '{}'".format(fmt))
 
                 odict.writerow(row)
             id_ += 1