@@ -121,60 +121,60 @@ def train(hyp, opt, device, tb_writer=None):
121121 elif hasattr (v , 'weight' ) and isinstance (v .weight , nn .Parameter ):
122122 pg1 .append (v .weight ) # apply decay
123123 if hasattr (v , 'im' ):
124- if hasattr (v .im , 'implicit' ):
124+ if hasattr (v .im , 'implicit' ):
125125 pg0 .append (v .im .implicit )
126126 else :
127127 for iv in v .im :
128128 pg0 .append (iv .implicit )
129129 if hasattr (v , 'imc' ):
130- if hasattr (v .imc , 'implicit' ):
130+ if hasattr (v .imc , 'implicit' ):
131131 pg0 .append (v .imc .implicit )
132132 else :
133133 for iv in v .imc :
134134 pg0 .append (iv .implicit )
135135 if hasattr (v , 'imb' ):
136- if hasattr (v .imb , 'implicit' ):
136+ if hasattr (v .imb , 'implicit' ):
137137 pg0 .append (v .imb .implicit )
138138 else :
139139 for iv in v .imb :
140140 pg0 .append (iv .implicit )
141141 if hasattr (v , 'imo' ):
142- if hasattr (v .imo , 'implicit' ):
142+ if hasattr (v .imo , 'implicit' ):
143143 pg0 .append (v .imo .implicit )
144144 else :
145145 for iv in v .imo :
146146 pg0 .append (iv .implicit )
147147 if hasattr (v , 'ia' ):
148- if hasattr (v .ia , 'implicit' ):
148+ if hasattr (v .ia , 'implicit' ):
149149 pg0 .append (v .ia .implicit )
150150 else :
151151 for iv in v .ia :
152152 pg0 .append (iv .implicit )
153153 if hasattr (v , 'attn' ):
154- if hasattr (v .attn , 'logit_scale' ):
154+ if hasattr (v .attn , 'logit_scale' ):
155155 pg0 .append (v .attn .logit_scale )
156- if hasattr (v .attn , 'q_bias' ):
156+ if hasattr (v .attn , 'q_bias' ):
157157 pg0 .append (v .attn .q_bias )
158- if hasattr (v .attn , 'v_bias' ):
158+ if hasattr (v .attn , 'v_bias' ):
159159 pg0 .append (v .attn .v_bias )
160- if hasattr (v .attn , 'relative_position_bias_table' ):
160+ if hasattr (v .attn , 'relative_position_bias_table' ):
161161 pg0 .append (v .attn .relative_position_bias_table )
162162 if hasattr (v , 'rbr_dense' ):
163- if hasattr (v .rbr_dense , 'weight_rbr_origin' ):
163+ if hasattr (v .rbr_dense , 'weight_rbr_origin' ):
164164 pg0 .append (v .rbr_dense .weight_rbr_origin )
165- if hasattr (v .rbr_dense , 'weight_rbr_avg_conv' ):
165+ if hasattr (v .rbr_dense , 'weight_rbr_avg_conv' ):
166166 pg0 .append (v .rbr_dense .weight_rbr_avg_conv )
167- if hasattr (v .rbr_dense , 'weight_rbr_pfir_conv' ):
167+ if hasattr (v .rbr_dense , 'weight_rbr_pfir_conv' ):
168168 pg0 .append (v .rbr_dense .weight_rbr_pfir_conv )
169- if hasattr (v .rbr_dense , 'weight_rbr_1x1_kxk_idconv1' ):
169+ if hasattr (v .rbr_dense , 'weight_rbr_1x1_kxk_idconv1' ):
170170 pg0 .append (v .rbr_dense .weight_rbr_1x1_kxk_idconv1 )
171- if hasattr (v .rbr_dense , 'weight_rbr_1x1_kxk_conv2' ):
171+ if hasattr (v .rbr_dense , 'weight_rbr_1x1_kxk_conv2' ):
172172 pg0 .append (v .rbr_dense .weight_rbr_1x1_kxk_conv2 )
173- if hasattr (v .rbr_dense , 'weight_rbr_gconv_dw' ):
173+ if hasattr (v .rbr_dense , 'weight_rbr_gconv_dw' ):
174174 pg0 .append (v .rbr_dense .weight_rbr_gconv_dw )
175- if hasattr (v .rbr_dense , 'weight_rbr_gconv_pw' ):
175+ if hasattr (v .rbr_dense , 'weight_rbr_gconv_pw' ):
176176 pg0 .append (v .rbr_dense .weight_rbr_gconv_pw )
177- if hasattr (v .rbr_dense , 'vector' ):
177+ if hasattr (v .rbr_dense , 'vector' ):
178178 pg0 .append (v .rbr_dense .vector )
179179
180180 if opt .adam :
@@ -265,6 +265,9 @@ def train(hyp, opt, device, tb_writer=None):
265265 if plots :
266266 #plot_labels(labels, names, save_dir, loggers)
267267 if tb_writer :
268+ # [cui] raise a TypeError: no loop matching the specified signature and casting was found for ufunc greater
269+ # the reason is np.greater(), solved this error by pip numpy's version from 1.24.2 to 1.23.0
270+ # tb_writer.add_histogram('classes', c, 0, bins='auto', max_bins=20)
268271 tb_writer .add_histogram ('classes' , c , 0 )
269272
270273 # Anchors
@@ -409,18 +412,20 @@ def train(hyp, opt, device, tb_writer=None):
409412 final_epoch = epoch + 1 == epochs
410413 if not opt .notest or final_epoch : # Calculate mAP
411414 wandb_logger .current_epoch = epoch + 1
412- results , maps , times = test .test (data_dict ,
413- batch_size = batch_size * 2 ,
414- imgsz = imgsz_test ,
415- model = ema .ema ,
416- single_cls = opt .single_cls ,
417- dataloader = testloader ,
418- save_dir = save_dir ,
419- verbose = nc < 50 and final_epoch ,
420- plots = plots and final_epoch ,
421- wandb_logger = wandb_logger ,
422- compute_loss = compute_loss ,
423- is_coco = is_coco )
415+ results , maps , times = test .test (
416+ data_dict ,
417+ batch_size = batch_size * 2 ,
418+ imgsz = imgsz_test ,
419+ model = ema .ema ,
420+ single_cls = opt .single_cls ,
421+ dataloader = testloader ,
422+ save_dir = save_dir ,
423+ verbose = nc < 50 and final_epoch ,
424+ plots = plots and final_epoch ,
425+ wandb_logger = wandb_logger ,
426+ compute_loss = compute_loss ,
427+ is_coco = is_coco ,
428+ )
424429
425430 # Write
426431 with open (results_file , 'a' ) as f :
@@ -432,7 +437,7 @@ def train(hyp, opt, device, tb_writer=None):
432437 tags = ['train/box_loss' , 'train/obj_loss' , 'train/cls_loss' , # train loss
433438 'metrics/precision' , 'metrics/recall' , 'metrics/mAP_0.5' , 'metrics/mAP_0.5:0.95' ,
434439 'val/box_loss' , 'val/obj_loss' , 'val/cls_loss' , # val loss
435- 'x/lr0' , 'x/lr1' , 'x/lr2' ] # params
440+ 'x/lr0' , 'x/lr1' , 'x/lr2' , ] # params
436441 for x , tag in zip (list (mloss [:- 1 ]) + list (results ) + lr , tags ):
437442 if tb_writer :
438443 tb_writer .add_scalar (tag , x , epoch ) # tensorboard
@@ -447,27 +452,32 @@ def train(hyp, opt, device, tb_writer=None):
447452
448453 # Save model
449454 if (not opt .nosave ) or (final_epoch and not opt .evolve ): # if save
450- ckpt = {'epoch' : epoch ,
451- 'best_fitness' : best_fitness ,
452- 'training_results' : results_file .read_text (),
453- 'model' : deepcopy (model .module if is_parallel (model ) else model ).half (),
454- 'ema' : deepcopy (ema .ema ).half (),
455- 'updates' : ema .updates ,
456- 'optimizer' : optimizer .state_dict (),
457- 'wandb_id' : wandb_logger .wandb_run .id if wandb_logger .wandb else None }
455+ ckpt = {
456+ 'epoch' : epoch ,
457+ 'best_fitness' : best_fitness ,
458+ 'training_results' : results_file .read_text (),
459+ 'model' : deepcopy (model .module if is_parallel (model ) else model ).half (),
460+ 'ema' : deepcopy (ema .ema ).half (),
461+ 'updates' : ema .updates ,
462+ 'optimizer' : optimizer .state_dict (),
463+ 'wandb_id' : wandb_logger .wandb_run .id if wandb_logger .wandb else None ,
464+ }
458465
459466 # Save last, best and delete
467+ print (f"'current epoch': { epoch } , ckpt epoch: { ckpt ['epoch' ]} " )
460468 torch .save (ckpt , last )
469+ print (f"saved ckpt epoch: { ckpt ['epoch' ]} " )
461470 if best_fitness == fi :
# best 衡量的标准是0.1*[email protected] + 0.9*[email protected] :0.95 462471 torch .save (ckpt , best )
463472 if (best_fitness == fi ) and (epoch >= 200 ):
464473 torch .save (ckpt , wdir / 'best_{:03d}.pt' .format (epoch ))
465- # if epoch == 0:
466- # torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
467- # elif ((epoch+1) % 25) == 0:
468- # torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
469- # elif epoch >= (epochs-5):
470- # torch.save(ckpt, wdir / 'epoch_{:03d}.pt'.format(epoch))
474+ if epoch == 0 :
475+ torch .save (ckpt , wdir / 'epoch_{:03d}.pt' .format (epoch ))
476+ elif ((epoch + 1 ) % 30 ) == 0 :
477+ torch .save (ckpt , wdir / 'epoch_{:03d}.pt' .format (epoch ))
478+ # elif epoch >= (epochs-3):
479+ elif epoch >= (epochs - opt .save_tail_epochs ):
480+ torch .save (ckpt , wdir / 'epoch_{:03d}.pt' .format (epoch ))
471481 if wandb_logger .wandb :
472482 if ((epoch + 1 ) % opt .save_period == 0 and not final_epoch ) and opt .save_period != - 1 :
473483 wandb_logger .log_model (
@@ -488,18 +498,20 @@ def train(hyp, opt, device, tb_writer=None):
488498 logger .info ('%g epochs completed in %.3f hours.\n ' % (epoch - start_epoch + 1 , (time .time () - t0 ) / 3600 ))
489499 if opt .data .endswith ('coco.yaml' ) and nc == 80 : # if COCO
490500 for m in (last , best ) if best .exists () else (last ): # speed, mAP tests
491- results , _ , _ = test .test (opt .data ,
492- batch_size = batch_size * 2 ,
493- imgsz = imgsz_test ,
494- conf_thres = 0.001 ,
495- iou_thres = 0.7 ,
496- model = attempt_load (m , device ).half (),
497- single_cls = opt .single_cls ,
498- dataloader = testloader ,
499- save_dir = save_dir ,
500- save_json = True ,
501- plots = False ,
502- is_coco = is_coco )
501+ results , _ , _ = test .test (
502+ opt .data ,
503+ batch_size = batch_size * 2 ,
504+ imgsz = imgsz_test ,
505+ conf_thres = 0.001 ,
506+ iou_thres = 0.7 ,
507+ model = attempt_load (m , device ).half (),
508+ single_cls = opt .single_cls ,
509+ dataloader = testloader ,
510+ save_dir = save_dir ,
511+ save_json = True ,
512+ plots = False ,
513+ is_coco = is_coco ,
514+ )
503515
504516 # Strip optimizers
505517 final = best if best .exists () else last # final model
@@ -557,6 +569,7 @@ def train(hyp, opt, device, tb_writer=None):
557569 parser .add_argument ('--bbox_interval' , type = int , default = - 1 , help = 'Set bounding-box image logging interval for W&B' )
558570 parser .add_argument ('--save_period' , type = int , default = - 1 , help = 'Log model after every "save_period" epoch' )
559571 parser .add_argument ('--artifact_alias' , type = str , default = "latest" , help = 'version of dataset artifact to be used' )
572+ parser .add_argument ('--save-tail-epochs' , type = int , default = 0 , help = 'save some tail epochs' )
560573 opt = parser .parse_args ()
561574
562575 # Set DDP variables
@@ -613,34 +626,36 @@ def train(hyp, opt, device, tb_writer=None):
613626 # Evolve hyperparameters (optional)
614627 else :
615628 # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
616- meta = {'lr0' : (1 , 1e-5 , 1e-1 ), # initial learning rate (SGD=1E-2, Adam=1E-3)
617- 'lrf' : (1 , 0.01 , 1.0 ), # final OneCycleLR learning rate (lr0 * lrf)
618- 'momentum' : (0.3 , 0.6 , 0.98 ), # SGD momentum/Adam beta1
619- 'weight_decay' : (1 , 0.0 , 0.001 ), # optimizer weight decay
620- 'warmup_epochs' : (1 , 0.0 , 5.0 ), # warmup epochs (fractions ok)
621- 'warmup_momentum' : (1 , 0.0 , 0.95 ), # warmup initial momentum
622- 'warmup_bias_lr' : (1 , 0.0 , 0.2 ), # warmup initial bias lr
623- 'box' : (1 , 0.02 , 0.2 ), # box loss gain
624- 'cls' : (1 , 0.2 , 4.0 ), # cls loss gain
625- 'cls_pw' : (1 , 0.5 , 2.0 ), # cls BCELoss positive_weight
626- 'obj' : (1 , 0.2 , 4.0 ), # obj loss gain (scale with pixels)
627- 'obj_pw' : (1 , 0.5 , 2.0 ), # obj BCELoss positive_weight
628- 'iou_t' : (0 , 0.1 , 0.7 ), # IoU training threshold
629- 'anchor_t' : (1 , 2.0 , 8.0 ), # anchor-multiple threshold
630- 'anchors' : (2 , 2.0 , 10.0 ), # anchors per output grid (0 to ignore)
631- 'fl_gamma' : (0 , 0.0 , 2.0 ), # focal loss gamma (efficientDet default gamma=1.5)
632- 'hsv_h' : (1 , 0.0 , 0.1 ), # image HSV-Hue augmentation (fraction)
633- 'hsv_s' : (1 , 0.0 , 0.9 ), # image HSV-Saturation augmentation (fraction)
634- 'hsv_v' : (1 , 0.0 , 0.9 ), # image HSV-Value augmentation (fraction)
635- 'degrees' : (1 , 0.0 , 45.0 ), # image rotation (+/- deg)
636- 'translate' : (1 , 0.0 , 0.9 ), # image translation (+/- fraction)
637- 'scale' : (1 , 0.0 , 0.9 ), # image scale (+/- gain)
638- 'shear' : (1 , 0.0 , 10.0 ), # image shear (+/- deg)
639- 'perspective' : (0 , 0.0 , 0.001 ), # image perspective (+/- fraction), range 0-0.001
640- 'flipud' : (1 , 0.0 , 1.0 ), # image flip up-down (probability)
641- 'fliplr' : (0 , 0.0 , 1.0 ), # image flip left-right (probability)
642- 'mosaic' : (1 , 0.0 , 1.0 ), # image mixup (probability)
643- 'mixup' : (1 , 0.0 , 1.0 )} # image mixup (probability)
629+ meta = {
630+ 'lr0' : (1 , 1e-5 , 1e-1 ), # initial learning rate (SGD=1E-2, Adam=1E-3)
631+ 'lrf' : (1 , 0.01 , 1.0 ), # final OneCycleLR learning rate (lr0 * lrf)
632+ 'momentum' : (0.3 , 0.6 , 0.98 ), # SGD momentum/Adam beta1
633+ 'weight_decay' : (1 , 0.0 , 0.001 ), # optimizer weight decay
634+ 'warmup_epochs' : (1 , 0.0 , 5.0 ), # warmup epochs (fractions ok)
635+ 'warmup_momentum' : (1 , 0.0 , 0.95 ), # warmup initial momentum
636+ 'warmup_bias_lr' : (1 , 0.0 , 0.2 ), # warmup initial bias lr
637+ 'box' : (1 , 0.02 , 0.2 ), # box loss gain
638+ 'cls' : (1 , 0.2 , 4.0 ), # cls loss gain
639+ 'cls_pw' : (1 , 0.5 , 2.0 ), # cls BCELoss positive_weight
640+ 'obj' : (1 , 0.2 , 4.0 ), # obj loss gain (scale with pixels)
641+ 'obj_pw' : (1 , 0.5 , 2.0 ), # obj BCELoss positive_weight
642+ 'iou_t' : (0 , 0.1 , 0.7 ), # IoU training threshold
643+ 'anchor_t' : (1 , 2.0 , 8.0 ), # anchor-multiple threshold
644+ 'anchors' : (2 , 2.0 , 10.0 ), # anchors per output grid (0 to ignore)
645+ 'fl_gamma' : (0 , 0.0 , 2.0 ), # focal loss gamma (efficientDet default gamma=1.5)
646+ 'hsv_h' : (1 , 0.0 , 0.1 ), # image HSV-Hue augmentation (fraction)
647+ 'hsv_s' : (1 , 0.0 , 0.9 ), # image HSV-Saturation augmentation (fraction)
648+ 'hsv_v' : (1 , 0.0 , 0.9 ), # image HSV-Value augmentation (fraction)
649+ 'degrees' : (1 , 0.0 , 45.0 ), # image rotation (+/- deg)
650+ 'translate' : (1 , 0.0 , 0.9 ), # image translation (+/- fraction)
651+ 'scale' : (1 , 0.0 , 0.9 ), # image scale (+/- gain)
652+ 'shear' : (1 , 0.0 , 10.0 ), # image shear (+/- deg)
653+ 'perspective' : (0 , 0.0 , 0.001 ), # image perspective (+/- fraction), range 0-0.001
654+ 'flipud' : (1 , 0.0 , 1.0 ), # image flip up-down (probability)
655+ 'fliplr' : (0 , 0.0 , 1.0 ), # image flip left-right (probability)
656+ 'mosaic' : (1 , 0.0 , 1.0 ), # image mixup (probability)
657+ 'mixup' : (1 , 0.0 , 1.0 ), # image mixup (probability)
658+ }
644659
645660 assert opt .local_rank == - 1 , 'DDP mode not implemented for --evolve'
646661 opt .notest , opt .nosave = True , True # only test/save final epoch
@@ -689,5 +704,7 @@ def train(hyp, opt, device, tb_writer=None):
689704
690705 # Plot results
691706 plot_evolution (yaml_file )
692- print (f'Hyperparameter evolution complete. Best results saved as: { yaml_file } \n '
693- f'Command to train a new model with these hyperparameters: $ python train.py --hyp { yaml_file } ' )
707+ print (
708+ f'Hyperparameter evolution complete. Best results saved as: { yaml_file } \n '
709+ f'Command to train a new model with these hyperparameters: $ python train.py --hyp { yaml_file } '
710+ )
0 commit comments