30
30
from matplotlib .dates import julian2num , num2date
31
31
32
32
from py_eddy_tracker .observations import EddiesObservations , VirtualEddiesObservations , TrackEddiesObservations
33
- from numpy import bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin
33
+ from numpy import bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin , unique , concatenate
34
34
from netCDF4 import Dataset
35
35
import logging
36
36
@@ -96,6 +96,21 @@ def __init__(self, datasets, virtual=0, class_method=None, previous_correspondan
96
96
self .nb_obs = 0
97
97
self .eddies = None
98
98
99
+ def _copy (self ):
100
+ new = self .__class__ (
101
+ datasets = self .datasets ,
102
+ virtual = self .nb_virtual ,
103
+ class_method = self .class_method ,
104
+ previous_correspondance = self .filename_previous_correspondance )
105
+ for i in self :
106
+ new .append (i )
107
+ new .current_id = self .current_id
108
+ new .nb_link_max = self .nb_link_max
109
+ new .nb_obs = self .nb_obs
110
+ new .prepare_merging ()
111
+ logging .debug ('Copy done' )
112
+ return new
113
+
99
114
def reset_dataset_cache (self ):
100
115
self .previous2_obs = None
101
116
self .previous_obs = None
@@ -317,7 +332,7 @@ def track(self):
317
332
# We begin with second file, first one is in previous
318
333
for file_name in self .datasets [first_dataset :]:
319
334
self .swap_dataset (file_name )
320
- logging .debug ('%s match with previous state' , file_name )
335
+ logging .info ('%s match with previous state' , file_name )
321
336
logging .debug ('%d obs to match' , len (self .current_obs ))
322
337
323
338
nb_real_obs = len (self .previous_obs )
@@ -391,6 +406,7 @@ def save(self, filename, dict_completion=None):
391
406
self .previous_virtual_obs .to_netcdf (group )
392
407
h_nc .module = self .class_method .__module__
393
408
h_nc .classname = self .class_method .__qualname__
409
+ logging .info ('Create correspondance file done' )
394
410
395
411
def load_compatible (self , filename ):
396
412
if filename is None :
@@ -456,7 +472,45 @@ def prepare_merging(self):
456
472
logging .info ('%d tracks identified' , self .current_id )
457
473
logging .info ('%d observations will be join' , self .nb_obs )
458
474
459
- def merge (self , until = - 1 , size_min = None ):
475
+ def longer_than (self , size_min ):
476
+ """Remove from correspondance table all association for shorter eddies than size_min
477
+ """
478
+ # Identify eddies longer than
479
+ i_keep_track = where (self .nb_obs_by_tracks >= size_min )[0 ]
480
+ # Reduce array
481
+ self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
482
+ self .i_current_by_tracks = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
483
+ self .nb_obs = self .nb_obs_by_tracks .sum ()
484
+ # Give the last id used
485
+ self .current_id = self .nb_obs_by_tracks .shape [0 ]
486
+ translate = empty (i_keep_track .max () + 1 , dtype = 'u4' )
487
+ translate [i_keep_track ] = arange (self .current_id )
488
+ for i , correspondance in enumerate (self ):
489
+ m_keep = isin (correspondance ['id' ], i_keep_track )
490
+ self [i ] = correspondance [m_keep ]
491
+ self [i ]['id' ] = translate [self [i ]['id' ]]
492
+ logging .debug ('Select longer than %d done' , size_min )
493
+
494
+ def shorter_than (self , size_max ):
495
+ """Remove from correspondance table all association for longer eddies than size_max
496
+ """
497
+ # Identify eddies longer than
498
+ i_keep_track = where (self .nb_obs_by_tracks < size_max )[0 ]
499
+ # Reduce array
500
+ self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
501
+ self .i_current_by_tracks = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
502
+ self .nb_obs = self .nb_obs_by_tracks .sum ()
503
+ # Give the last id used
504
+ self .current_id = self .nb_obs_by_tracks .shape [0 ]
505
+ translate = empty (i_keep_track .max () + 1 , dtype = 'u4' )
506
+ translate [i_keep_track ] = arange (self .current_id )
507
+ for i , correspondance in enumerate (self ):
508
+ m_keep = isin (correspondance ['id' ], i_keep_track )
509
+ self [i ] = correspondance [m_keep ]
510
+ self [i ]['id' ] = translate [self [i ]['id' ]]
511
+ logging .debug ('Select shorter than %d done' , size_max )
512
+
513
+ def merge (self , until = - 1 ):
460
514
"""Merge all the correspondance in one array with all fields
461
515
"""
462
516
# Start loading identification again to save in the finals tracks
@@ -466,14 +520,6 @@ def merge(self, until=-1, size_min=None):
466
520
467
521
# Start create netcdf to agglomerate all eddy
468
522
logging .debug ('We will create an array (size %d)' , self .nb_obs )
469
- i_keep_track = slice (None )
470
- if size_min is not None :
471
- i_keep_track = where (self .nb_obs_by_tracks >= size_min )
472
- self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
473
- self .i_current_by_tracks [i_keep_track ] = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
474
- self .nb_obs = self .nb_obs_by_tracks .sum ()
475
- # ??
476
- self .current_id = self .nb_obs_by_tracks .shape [0 ]
477
523
eddies = TrackEddiesObservations (
478
524
size = self .nb_obs ,
479
525
track_extra_variables = self .current_obs .track_extra_variables ,
@@ -484,11 +530,9 @@ def merge(self, until=-1, size_min=None):
484
530
# in u2 (which are limited to 65535)
485
531
logging .debug ('Compute global index array (N)' )
486
532
eddies ['n' ][:] = uint16 (
487
- arange (self .nb_obs , dtype = 'u4' ) - self .i_current_by_tracks [ i_keep_track ] .repeat (self .nb_obs_by_tracks ))
533
+ arange (self .nb_obs , dtype = 'u4' ) - self .i_current_by_tracks .repeat (self .nb_obs_by_tracks ))
488
534
logging .debug ('Compute global track array' )
489
535
eddies ['track' ][:] = arange (self .current_id ).repeat (self .nb_obs_by_tracks )
490
- if size_min is not None :
491
- eddies ['track' ][:] += 1
492
536
493
537
# Set type of eddy with first file
494
538
eddies .sign_type = self .current_obs .sign_type
@@ -506,19 +550,15 @@ def merge(self, until=-1, size_min=None):
506
550
self .swap_dataset (file_name )
507
551
# We select the list of id which are involve in the correspondance
508
552
i_id = self [i ]['id' ]
509
- if size_min is not None :
510
- m_id = isin (i_id , i_keep_track )
511
- i_id = i_id [m_id ]
512
- else :
513
- m_id = slice (None )
514
- # Index where we will write in the final object
553
+ # Index where we will write in the final object
554
+ print (i_id .max ())
515
555
index_final = self .i_current_by_tracks [i_id ]
516
556
517
557
# First obs of eddies
518
558
m_first_obs = ~ first_obs_save_in_tracks [i_id ]
519
559
if m_first_obs .any ():
520
560
# Index in the previous file
521
- index_in = self [i ]['in' ][m_id ][ m_first_obs ]
561
+ index_in = self [i ]['in' ][m_first_obs ]
522
562
# Copy all variable
523
563
for field in fields :
524
564
var = field [0 ]
@@ -532,15 +572,15 @@ def merge(self, until=-1, size_min=None):
532
572
if self .virtual :
533
573
# If the flag virtual in correspondance is active,
534
574
# the previous is virtual
535
- m_virtual = self [i ]['virtual' ][ m_id ]
575
+ m_virtual = self [i ]['virtual' ]
536
576
if m_virtual .any ():
537
577
# Incrementing index
538
- self .i_current_by_tracks [i_id [m_virtual ]] += self [i ]['virtual_length' ][m_id ][ m_virtual ]
578
+ self .i_current_by_tracks [i_id [m_virtual ]] += self [i ]['virtual_length' ][m_virtual ]
539
579
# Get new index
540
580
index_final = self .i_current_by_tracks [i_id ]
541
581
542
582
# Index in the current file
543
- index_current = self [i ]['out' ][ m_id ]
583
+ index_current = self [i ]['out' ]
544
584
545
585
# Copy all variable
546
586
for field in fields :
@@ -551,3 +591,56 @@ def merge(self, until=-1, size_min=None):
551
591
self .i_current_by_tracks [i_id ] += 1
552
592
self .previous_obs = self .current_obs
553
593
return eddies
594
+
595
+ def get_unused_data (self ):
596
+ """
597
+ Add in track object all the observations which aren't selected
598
+ Returns: Unused Eddies
599
+
600
+ """
601
+ self .reset_dataset_cache ()
602
+ self .swap_dataset (self .datasets [0 ])
603
+
604
+ nb_dataset = len (self .datasets )
605
+ # Get the number of obs unused
606
+ nb_obs = 0
607
+ list_mask = list ()
608
+ has_virtual = 'virtual' in self [0 ].dtype .names
609
+ for i , filename in enumerate (self .datasets ):
610
+ last_dataset = i == (nb_dataset - 1 )
611
+ first_dataset = i == 0
612
+ if has_virtual :
613
+ if not last_dataset :
614
+ m_in = ~ self [i ]['virtual' ]
615
+ if not first_dataset :
616
+ m_out = ~ self [i - 1 ]['virtual' ]
617
+ else :
618
+ m_in , m_out = slice (None ), slice (None )
619
+ if i == 0 :
620
+ eddies_used = self [i ]['in' ][m_in ]
621
+ elif last_dataset :
622
+ eddies_used = self [i - 1 ]['out' ][m_out ]
623
+ else :
624
+ eddies_used = unique (concatenate ((self [i - 1 ]['out' ][m_out ], self [i ]['in' ][m_in ])))
625
+ with Dataset (filename ) as h :
626
+ nb_obs_day = len (h .dimensions ['Nobs' ])
627
+ m = ones (nb_obs_day , dtype = 'bool' )
628
+ m [eddies_used ] = False
629
+ list_mask .append (m )
630
+ nb_obs += m .sum ()
631
+
632
+ eddies = EddiesObservations (
633
+ size = nb_obs ,
634
+ track_extra_variables = self .current_obs .track_extra_variables ,
635
+ track_array_variables = self .current_obs .track_array_variables ,
636
+ array_variables = self .current_obs .array_variables )
637
+ j = 0
638
+ for i , dataset in enumerate (self .datasets ):
639
+ current_obs = self .class_method .load_from_netcdf (dataset )
640
+ if i == 0 :
641
+ eddies .sign_type = current_obs .sign_type
642
+ unused_obs = current_obs .observations [list_mask [i ]]
643
+ nb = unused_obs .shape [0 ]
644
+ eddies .observations [j :j + nb ] = unused_obs
645
+ j += nb
646
+ return eddies
0 commit comments