3030from matplotlib .dates import julian2num , num2date
3131
3232from py_eddy_tracker .observations import EddiesObservations , VirtualEddiesObservations , TrackEddiesObservations
33- from numpy import bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin
33+ from numpy import bool_ , array , arange , ones , setdiff1d , zeros , uint16 , where , empty , isin , unique , concatenate
3434from netCDF4 import Dataset
3535import logging
3636
@@ -96,6 +96,21 @@ def __init__(self, datasets, virtual=0, class_method=None, previous_correspondan
9696 self .nb_obs = 0
9797 self .eddies = None
9898
99+ def _copy (self ):
100+ new = self .__class__ (
101+ datasets = self .datasets ,
102+ virtual = self .nb_virtual ,
103+ class_method = self .class_method ,
104+ previous_correspondance = self .filename_previous_correspondance )
105+ for i in self :
106+ new .append (i )
107+ new .current_id = self .current_id
108+ new .nb_link_max = self .nb_link_max
109+ new .nb_obs = self .nb_obs
110+ new .prepare_merging ()
111+ logging .debug ('Copy done' )
112+ return new
113+
99114 def reset_dataset_cache (self ):
100115 self .previous2_obs = None
101116 self .previous_obs = None
@@ -317,7 +332,7 @@ def track(self):
317332 # We begin with second file, first one is in previous
318333 for file_name in self .datasets [first_dataset :]:
319334 self .swap_dataset (file_name )
320- logging .debug ('%s match with previous state' , file_name )
335+ logging .info ('%s match with previous state' , file_name )
321336 logging .debug ('%d obs to match' , len (self .current_obs ))
322337
323338 nb_real_obs = len (self .previous_obs )
@@ -391,6 +406,7 @@ def save(self, filename, dict_completion=None):
391406 self .previous_virtual_obs .to_netcdf (group )
392407 h_nc .module = self .class_method .__module__
393408 h_nc .classname = self .class_method .__qualname__
409+ logging .info ('Create correspondance file done' )
394410
395411 def load_compatible (self , filename ):
396412 if filename is None :
@@ -456,7 +472,45 @@ def prepare_merging(self):
456472 logging .info ('%d tracks identified' , self .current_id )
457473 logging .info ('%d observations will be join' , self .nb_obs )
458474
459- def merge (self , until = - 1 , size_min = None ):
475+ def longer_than (self , size_min ):
476+ """Remove from correspondance table all association for shorter eddies than size_min
477+ """
478+ # Identify eddies longer than
479+ i_keep_track = where (self .nb_obs_by_tracks >= size_min )[0 ]
480+ # Reduce array
481+ self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
482+ self .i_current_by_tracks = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
483+ self .nb_obs = self .nb_obs_by_tracks .sum ()
484+ # Give the last id used
485+ self .current_id = self .nb_obs_by_tracks .shape [0 ]
486+ translate = empty (i_keep_track .max () + 1 , dtype = 'u4' )
487+ translate [i_keep_track ] = arange (self .current_id )
488+ for i , correspondance in enumerate (self ):
489+ m_keep = isin (correspondance ['id' ], i_keep_track )
490+ self [i ] = correspondance [m_keep ]
491+ self [i ]['id' ] = translate [self [i ]['id' ]]
492+ logging .debug ('Select longer than %d done' , size_min )
493+
494+ def shorter_than (self , size_max ):
495+ """Remove from correspondance table all association for longer eddies than size_max
496+ """
497+ # Identify eddies longer than
498+ i_keep_track = where (self .nb_obs_by_tracks < size_max )[0 ]
499+ # Reduce array
500+ self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
501+ self .i_current_by_tracks = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
502+ self .nb_obs = self .nb_obs_by_tracks .sum ()
503+ # Give the last id used
504+ self .current_id = self .nb_obs_by_tracks .shape [0 ]
505+ translate = empty (i_keep_track .max () + 1 , dtype = 'u4' )
506+ translate [i_keep_track ] = arange (self .current_id )
507+ for i , correspondance in enumerate (self ):
508+ m_keep = isin (correspondance ['id' ], i_keep_track )
509+ self [i ] = correspondance [m_keep ]
510+ self [i ]['id' ] = translate [self [i ]['id' ]]
511+ logging .debug ('Select shorter than %d done' , size_max )
512+
513+ def merge (self , until = - 1 ):
460514 """Merge all the correspondance in one array with all fields
461515 """
462516 # Start loading identification again to save in the finals tracks
@@ -466,14 +520,6 @@ def merge(self, until=-1, size_min=None):
466520
467521 # Start create netcdf to agglomerate all eddy
468522 logging .debug ('We will create an array (size %d)' , self .nb_obs )
469- i_keep_track = slice (None )
470- if size_min is not None :
471- i_keep_track = where (self .nb_obs_by_tracks >= size_min )
472- self .nb_obs_by_tracks = self .nb_obs_by_tracks [i_keep_track ]
473- self .i_current_by_tracks [i_keep_track ] = self .nb_obs_by_tracks .cumsum () - self .nb_obs_by_tracks
474- self .nb_obs = self .nb_obs_by_tracks .sum ()
475- # ??
476- self .current_id = self .nb_obs_by_tracks .shape [0 ]
477523 eddies = TrackEddiesObservations (
478524 size = self .nb_obs ,
479525 track_extra_variables = self .current_obs .track_extra_variables ,
@@ -484,11 +530,9 @@ def merge(self, until=-1, size_min=None):
484530 # in u2 (which are limited to 65535)
485531 logging .debug ('Compute global index array (N)' )
486532 eddies ['n' ][:] = uint16 (
487- arange (self .nb_obs , dtype = 'u4' ) - self .i_current_by_tracks [ i_keep_track ] .repeat (self .nb_obs_by_tracks ))
533+ arange (self .nb_obs , dtype = 'u4' ) - self .i_current_by_tracks .repeat (self .nb_obs_by_tracks ))
488534 logging .debug ('Compute global track array' )
489535 eddies ['track' ][:] = arange (self .current_id ).repeat (self .nb_obs_by_tracks )
490- if size_min is not None :
491- eddies ['track' ][:] += 1
492536
493537 # Set type of eddy with first file
494538 eddies .sign_type = self .current_obs .sign_type
@@ -506,19 +550,15 @@ def merge(self, until=-1, size_min=None):
506550 self .swap_dataset (file_name )
507551 # We select the list of id which are involve in the correspondance
508552 i_id = self [i ]['id' ]
509- if size_min is not None :
510- m_id = isin (i_id , i_keep_track )
511- i_id = i_id [m_id ]
512- else :
513- m_id = slice (None )
514- # Index where we will write in the final object
553+ # Index where we will write in the final object
554+ print (i_id .max ())
515555 index_final = self .i_current_by_tracks [i_id ]
516556
517557 # First obs of eddies
518558 m_first_obs = ~ first_obs_save_in_tracks [i_id ]
519559 if m_first_obs .any ():
520560 # Index in the previous file
521- index_in = self [i ]['in' ][m_id ][ m_first_obs ]
561+ index_in = self [i ]['in' ][m_first_obs ]
522562 # Copy all variable
523563 for field in fields :
524564 var = field [0 ]
@@ -532,15 +572,15 @@ def merge(self, until=-1, size_min=None):
532572 if self .virtual :
533573 # If the flag virtual in correspondance is active,
534574 # the previous is virtual
535- m_virtual = self [i ]['virtual' ][ m_id ]
575+ m_virtual = self [i ]['virtual' ]
536576 if m_virtual .any ():
537577 # Incrementing index
538- self .i_current_by_tracks [i_id [m_virtual ]] += self [i ]['virtual_length' ][m_id ][ m_virtual ]
578+ self .i_current_by_tracks [i_id [m_virtual ]] += self [i ]['virtual_length' ][m_virtual ]
539579 # Get new index
540580 index_final = self .i_current_by_tracks [i_id ]
541581
542582 # Index in the current file
543- index_current = self [i ]['out' ][ m_id ]
583+ index_current = self [i ]['out' ]
544584
545585 # Copy all variable
546586 for field in fields :
@@ -551,3 +591,56 @@ def merge(self, until=-1, size_min=None):
551591 self .i_current_by_tracks [i_id ] += 1
552592 self .previous_obs = self .current_obs
553593 return eddies
594+
595+ def get_unused_data (self ):
596+ """
597+ Add in track object all the observations which aren't selected
598+ Returns: Unused Eddies
599+
600+ """
601+ self .reset_dataset_cache ()
602+ self .swap_dataset (self .datasets [0 ])
603+
604+ nb_dataset = len (self .datasets )
605+ # Get the number of obs unused
606+ nb_obs = 0
607+ list_mask = list ()
608+ has_virtual = 'virtual' in self [0 ].dtype .names
609+ for i , filename in enumerate (self .datasets ):
610+ last_dataset = i == (nb_dataset - 1 )
611+ first_dataset = i == 0
612+ if has_virtual :
613+ if not last_dataset :
614+ m_in = ~ self [i ]['virtual' ]
615+ if not first_dataset :
616+ m_out = ~ self [i - 1 ]['virtual' ]
617+ else :
618+ m_in , m_out = slice (None ), slice (None )
619+ if i == 0 :
620+ eddies_used = self [i ]['in' ][m_in ]
621+ elif last_dataset :
622+ eddies_used = self [i - 1 ]['out' ][m_out ]
623+ else :
624+ eddies_used = unique (concatenate ((self [i - 1 ]['out' ][m_out ], self [i ]['in' ][m_in ])))
625+ with Dataset (filename ) as h :
626+ nb_obs_day = len (h .dimensions ['Nobs' ])
627+ m = ones (nb_obs_day , dtype = 'bool' )
628+ m [eddies_used ] = False
629+ list_mask .append (m )
630+ nb_obs += m .sum ()
631+
632+ eddies = EddiesObservations (
633+ size = nb_obs ,
634+ track_extra_variables = self .current_obs .track_extra_variables ,
635+ track_array_variables = self .current_obs .track_array_variables ,
636+ array_variables = self .current_obs .array_variables )
637+ j = 0
638+ for i , dataset in enumerate (self .datasets ):
639+ current_obs = self .class_method .load_from_netcdf (dataset )
640+ if i == 0 :
641+ eddies .sign_type = current_obs .sign_type
642+ unused_obs = current_obs .observations [list_mask [i ]]
643+ nb = unused_obs .shape [0 ]
644+ eddies .observations [j :j + nb ] = unused_obs
645+ j += nb
646+ return eddies
0 commit comments