3030from  matplotlib .dates  import  julian2num , num2date 
3131
3232from  py_eddy_tracker .observations  import  EddiesObservations , VirtualEddiesObservations , TrackEddiesObservations 
33- from  numpy  import  bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin 
33+ from  numpy  import  bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin ,  unique ,  concatenate 
3434from  netCDF4  import  Dataset 
3535import  logging 
3636
@@ -96,6 +96,21 @@ def __init__(self, datasets, virtual=0, class_method=None, previous_correspondan
9696        self .nb_obs  =  0 
9797        self .eddies  =  None 
9898
99+     def  _copy (self ):
100+         new  =  self .__class__ (
101+             datasets = self .datasets ,
102+             virtual = self .nb_virtual ,
103+             class_method = self .class_method ,
104+             previous_correspondance = self .filename_previous_correspondance )
105+         for  i  in  self :
106+             new .append (i )
107+         new .current_id  =  self .current_id 
108+         new .nb_link_max  =  self .nb_link_max 
109+         new .nb_obs  =  self .nb_obs 
110+         new .prepare_merging ()
111+         logging .debug ('Copy done' )
112+         return  new 
113+ 
99114    def  reset_dataset_cache (self ):
100115        self .previous2_obs  =  None 
101116        self .previous_obs  =  None 
@@ -317,7 +332,7 @@ def track(self):
317332        # We begin with second file, first one is in previous 
318333        for  file_name  in  self .datasets [first_dataset :]:
319334            self .swap_dataset (file_name )
320-             logging .debug ('%s match with previous state' , file_name )
335+             logging .info ('%s match with previous state' , file_name )
321336            logging .debug ('%d obs to match' , len (self .current_obs ))
322337
323338            nb_real_obs  =  len (self .previous_obs )
@@ -391,6 +406,7 @@ def save(self, filename, dict_completion=None):
391406                self .previous_virtual_obs .to_netcdf (group )
392407            h_nc .module  =  self .class_method .__module__ 
393408            h_nc .classname  =  self .class_method .__qualname__ 
409+         logging .info ('Create correspondance file done' )
394410
395411    def  load_compatible (self , filename ):
396412        if  filename  is  None :
@@ -456,7 +472,45 @@ def prepare_merging(self):
456472        logging .info ('%d tracks identified' , self .current_id )
457473        logging .info ('%d observations will be join' , self .nb_obs )
458474
459-     def  merge (self , until = - 1 , size_min = None ):
475+     def  longer_than (self , size_min ):
476+         """Remove from correspondance table all association for shorter eddies than size_min 
477+         """ 
478+         # Identify eddies longer than 
479+         i_keep_track  =  where (self .nb_obs_by_tracks  >=  size_min )[0 ]
480+         # Reduce array 
481+         self .nb_obs_by_tracks  =  self .nb_obs_by_tracks [i_keep_track ]
482+         self .i_current_by_tracks  =  self .nb_obs_by_tracks .cumsum () -  self .nb_obs_by_tracks 
483+         self .nb_obs  =  self .nb_obs_by_tracks .sum ()
484+         # Give the last id used 
485+         self .current_id  =  self .nb_obs_by_tracks .shape [0 ]
486+         translate  =  empty (i_keep_track .max () +  1 , dtype = 'u4' )
487+         translate [i_keep_track ] =  arange (self .current_id )
488+         for  i , correspondance  in  enumerate (self ):
489+             m_keep  =  isin (correspondance ['id' ], i_keep_track )
490+             self [i ] =  correspondance [m_keep ]
491+             self [i ]['id' ] =  translate [self [i ]['id' ]]
492+         logging .debug ('Select longer than %d done' , size_min )
493+ 
494+     def  shorter_than (self , size_max ):
495+         """Remove from correspondance table all association for longer eddies than size_max 
496+         """ 
497+         # Identify eddies longer than 
498+         i_keep_track  =  where (self .nb_obs_by_tracks  <  size_max )[0 ]
499+         # Reduce array 
500+         self .nb_obs_by_tracks  =  self .nb_obs_by_tracks [i_keep_track ]
501+         self .i_current_by_tracks  =  self .nb_obs_by_tracks .cumsum () -  self .nb_obs_by_tracks 
502+         self .nb_obs  =  self .nb_obs_by_tracks .sum ()
503+         # Give the last id used 
504+         self .current_id  =  self .nb_obs_by_tracks .shape [0 ]
505+         translate  =  empty (i_keep_track .max () +  1 , dtype = 'u4' )
506+         translate [i_keep_track ] =  arange (self .current_id )
507+         for  i , correspondance  in  enumerate (self ):
508+             m_keep  =  isin (correspondance ['id' ], i_keep_track )
509+             self [i ] =  correspondance [m_keep ]
510+             self [i ]['id' ] =  translate [self [i ]['id' ]]
511+         logging .debug ('Select shorter than %d done' , size_max )
512+ 
513+     def  merge (self , until = - 1 ):
460514        """Merge all the correspondance in one array with all fields 
461515        """ 
462516        # Start loading identification again to save in the finals tracks 
@@ -466,14 +520,6 @@ def merge(self, until=-1, size_min=None):
466520
467521        # Start create netcdf to agglomerate all eddy 
468522        logging .debug ('We will create an array (size %d)' , self .nb_obs )
469-         i_keep_track  =  slice (None )
470-         if  size_min  is  not None :
471-             i_keep_track  =  where (self .nb_obs_by_tracks  >=  size_min )
472-             self .nb_obs_by_tracks  =  self .nb_obs_by_tracks [i_keep_track ]
473-             self .i_current_by_tracks [i_keep_track ] =  self .nb_obs_by_tracks .cumsum () -  self .nb_obs_by_tracks 
474-             self .nb_obs  =  self .nb_obs_by_tracks .sum ()
475-             # ?? 
476-             self .current_id  =  self .nb_obs_by_tracks .shape [0 ]
477523        eddies  =  TrackEddiesObservations (
478524            size = self .nb_obs ,
479525            track_extra_variables = self .current_obs .track_extra_variables ,
@@ -484,11 +530,9 @@ def merge(self, until=-1, size_min=None):
484530        # in u2 (which are limited to 65535) 
485531        logging .debug ('Compute global index array (N)' )
486532        eddies ['n' ][:] =  uint16 (
487-             arange (self .nb_obs , dtype = 'u4' ) -  self .i_current_by_tracks [ i_keep_track ] .repeat (self .nb_obs_by_tracks ))
533+             arange (self .nb_obs , dtype = 'u4' ) -  self .i_current_by_tracks .repeat (self .nb_obs_by_tracks ))
488534        logging .debug ('Compute global track array' )
489535        eddies ['track' ][:] =  arange (self .current_id ).repeat (self .nb_obs_by_tracks )
490-         if  size_min  is  not None :
491-             eddies ['track' ][:] +=  1 
492536
493537        # Set type of eddy with first file 
494538        eddies .sign_type  =  self .current_obs .sign_type 
@@ -506,19 +550,15 @@ def merge(self, until=-1, size_min=None):
506550            self .swap_dataset (file_name )
507551            # We select the list of id which are involve in the correspondance 
508552            i_id  =  self [i ]['id' ]
509-             if  size_min  is  not None :
510-                 m_id  =  isin (i_id , i_keep_track )
511-                 i_id =  i_id [m_id ]
512-             else :
513-                 m_id  =  slice (None )
514-                 # Index where we will write in the final object 
553+             # Index where we will write in the final object 
554+             print (i_id .max ())
515555            index_final  =  self .i_current_by_tracks [i_id ]
516556
517557            # First obs of eddies 
518558            m_first_obs  =  ~ first_obs_save_in_tracks [i_id ]
519559            if  m_first_obs .any ():
520560                # Index in the previous file 
521-                 index_in  =  self [i ]['in' ][m_id ][ m_first_obs ]
561+                 index_in  =  self [i ]['in' ][m_first_obs ]
522562                # Copy all variable 
523563                for  field  in  fields :
524564                    var  =  field [0 ]
@@ -532,15 +572,15 @@ def merge(self, until=-1, size_min=None):
532572            if  self .virtual :
533573                # If the flag virtual in correspondance is active, 
534574                # the previous is virtual 
535-                 m_virtual  =  self [i ]['virtual' ][ m_id ] 
575+                 m_virtual  =  self [i ]['virtual' ]
536576                if  m_virtual .any ():
537577                    # Incrementing index 
538-                     self .i_current_by_tracks [i_id [m_virtual ]] +=  self [i ]['virtual_length' ][m_id ][ m_virtual ]
578+                     self .i_current_by_tracks [i_id [m_virtual ]] +=  self [i ]['virtual_length' ][m_virtual ]
539579                    # Get new index 
540580                    index_final  =  self .i_current_by_tracks [i_id ]
541581
542582            # Index in the current file 
543-             index_current  =  self [i ]['out' ][ m_id ] 
583+             index_current  =  self [i ]['out' ]
544584
545585            # Copy all variable 
546586            for  field  in  fields :
@@ -551,3 +591,56 @@ def merge(self, until=-1, size_min=None):
551591            self .i_current_by_tracks [i_id ] +=  1 
552592            self .previous_obs  =  self .current_obs 
553593        return  eddies 
594+ 
595+     def  get_unused_data (self ):
596+         """ 
597+         Add in track object all the observations which aren't selected 
598+         Returns: Unused Eddies 
599+ 
600+         """ 
601+         self .reset_dataset_cache ()
602+         self .swap_dataset (self .datasets [0 ])
603+ 
604+         nb_dataset  =  len (self .datasets )
605+         # Get the number of obs unused 
606+         nb_obs  =  0 
607+         list_mask =  list ()
608+         has_virtual  =  'virtual'  in  self [0 ].dtype .names 
609+         for  i , filename  in  enumerate (self .datasets ):
610+             last_dataset  =  i  ==  (nb_dataset  -  1 )
611+             first_dataset  =  i  ==  0 
612+             if  has_virtual :
613+                 if  not  last_dataset :
614+                     m_in  =  ~ self [i ]['virtual' ]
615+                 if  not  first_dataset :
616+                     m_out  =  ~ self [i  -  1 ]['virtual' ]
617+             else :
618+                 m_in , m_out  =  slice (None ), slice (None )
619+             if  i  ==  0 :
620+                 eddies_used  =  self [i ]['in' ][m_in ]
621+             elif  last_dataset :
622+                 eddies_used  =  self [i  -  1 ]['out' ][m_out ]
623+             else :
624+                 eddies_used  =  unique (concatenate ((self [i  -  1 ]['out' ][m_out ], self [i ]['in' ][m_in ])))
625+             with  Dataset (filename ) as  h :
626+                 nb_obs_day  =  len (h .dimensions ['Nobs' ])
627+             m  =  ones (nb_obs_day , dtype = 'bool' )
628+             m [eddies_used ] =  False 
629+             list_mask .append (m )
630+             nb_obs  +=  m .sum ()
631+ 
632+         eddies  =  EddiesObservations (
633+             size = nb_obs ,
634+             track_extra_variables = self .current_obs .track_extra_variables ,
635+             track_array_variables = self .current_obs .track_array_variables ,
636+             array_variables = self .current_obs .array_variables )
637+         j  =  0 
638+         for  i , dataset  in  enumerate (self .datasets ):
639+             current_obs  =  self .class_method .load_from_netcdf (dataset )
640+             if  i  == 0 :
641+                 eddies .sign_type  =  current_obs .sign_type 
642+             unused_obs  =  current_obs .observations [list_mask [i ]]
643+             nb  =  unused_obs .shape [0 ]
644+             eddies .observations [j :j  +  nb ] =  unused_obs 
645+             j  +=  nb 
646+         return  eddies 
0 commit comments