@@ -20,65 +20,72 @@ import datetime as dt
2020logger = logging .getLogger ("pet" )
2121
2222
23- def browse_dataset_in (data_dir , files_model , date_regexp , date_model ,
24- start_date = None , end_date = None , sub_sampling_step = 1 ,
25- files = None ):
23+ def browse_dataset_in (
24+ data_dir ,
25+ files_model ,
26+ date_regexp ,
27+ date_model ,
28+ start_date = None ,
29+ end_date = None ,
30+ sub_sampling_step = 1 ,
31+ files = None ,
32+ ):
2633 if files is not None :
27- pattern_regexp = re_compile (' .*/' + date_regexp )
34+ pattern_regexp = re_compile (" .*/" + date_regexp )
2835 filenames = bytes_ (files )
2936 else :
30- pattern_regexp = re_compile (' .*/' + date_regexp )
37+ pattern_regexp = re_compile (" .*/" + date_regexp )
3138 full_path = join_path (data_dir , files_model )
32- logger .info (' Search files : %s' , full_path )
39+ logger .info (" Search files : %s" , full_path )
3340 filenames = bytes_ (glob (full_path ))
3441
35- dataset_list = empty (len (filenames ),
36- dtype = [('filename' , 'S500' ),
37- ('date' , 'datetime64[D]' ),
38- ])
39- dataset_list ['filename' ] = filenames
42+ dataset_list = empty (
43+ len (filenames ), dtype = [("filename" , "S500" ), ("date" , "datetime64[D]" ),]
44+ )
45+ dataset_list ["filename" ] = filenames
4046
41- logger .info (' %s grids available' , dataset_list .shape [0 ])
47+ logger .info (" %s grids available" , dataset_list .shape [0 ])
4248 mode_attrs = False
43- if '(' not in date_regexp :
44- logger .debug (' Attrs date : %s' , date_regexp )
45- mode_attrs = date_regexp .strip ().split (':' )
49+ if "(" not in date_regexp :
50+ logger .debug (" Attrs date : %s" , date_regexp )
51+ mode_attrs = date_regexp .strip ().split (":" )
4652 else :
47- logger .debug (' Pattern date : %s' , date_regexp )
53+ logger .debug (" Pattern date : %s" , date_regexp )
4854
4955 for item in dataset_list :
5056 str_date = None
5157 if mode_attrs :
52- with Dataset (item [' filename' ].decode ("utf-8" )) as h :
58+ with Dataset (item [" filename" ].decode ("utf-8" )) as h :
5359 if len (mode_attrs ) == 1 :
5460 str_date = getattr (h , mode_attrs [0 ])
5561 else :
5662 str_date = getattr (h .variables [mode_attrs [0 ]], mode_attrs [1 ])
5763 else :
58- result = pattern_regexp .match (str (item [' filename' ]))
64+ result = pattern_regexp .match (str (item [" filename" ]))
5965 if result :
6066 str_date = result .groups ()[0 ]
6167
6268 if str_date is not None :
63- item [' date' ] = datetime .strptime (str_date , date_model ).date ()
69+ item [" date" ] = datetime .strptime (str_date , date_model ).date ()
6470
65- dataset_list .sort (order = [' date' , ' filename' ])
71+ dataset_list .sort (order = [" date" , " filename" ])
6672
67- steps = unique (dataset_list [' date' ][1 :] - dataset_list [' date' ][:- 1 ])
73+ steps = unique (dataset_list [" date" ][1 :] - dataset_list [" date" ][:- 1 ])
6874 if len (steps ) > 1 :
69- raise Exception (' Several days steps in grid dataset %s' % steps )
75+ raise Exception (" Several days steps in grid dataset %s" % steps )
7076
7177 if sub_sampling_step != 1 :
72- logger .info (' Grid subsampling %d' , sub_sampling_step )
78+ logger .info (" Grid subsampling %d" , sub_sampling_step )
7379 dataset_list = dataset_list [::sub_sampling_step ]
7480
7581 if start_date is not None or end_date is not None :
76- logger .info ('Available grid from %s to %s' ,
77- dataset_list [0 ]['date' ],
78- dataset_list [- 1 ]['date' ])
79- logger .info ('Filtering grid by time %s, %s' , start_date , end_date )
80- mask = (dataset_list ['date' ] >= start_date ) * (
81- dataset_list ['date' ] <= end_date )
82+ logger .info (
83+ "Available grid from %s to %s" ,
84+ dataset_list [0 ]["date" ],
85+ dataset_list [- 1 ]["date" ],
86+ )
87+ logger .info ("Filtering grid by time %s, %s" , start_date , end_date )
88+ mask = (dataset_list ["date" ] >= start_date ) * (dataset_list ["date" ] <= end_date )
8289
8390 dataset_list = dataset_list [mask ]
8491 return dataset_list
@@ -88,111 +95,148 @@ def usage():
8895 """Usage
8996 """
9097 # Run using:
91- parser = EddyParser (
92- "Tool to use identification step to compute tracking" )
93- parser .add_argument ('yaml_file' ,
94- help = 'Yaml file to configure py-eddy-tracker' )
95- parser .add_argument ('--correspondance_in' ,
96- help = 'Filename of saved correspondance' )
97- parser .add_argument ('--correspondance_out' ,
98- help = 'Filename to save correspondance' )
99- parser .add_argument ('--save_correspondance_and_stop' ,
100- action = 'store_true' ,
101- help = 'Stop tracking after correspondance computation,'
102- ' merging can be done with EddyFinalTracking' )
103- parser .add_argument ('--zarr' ,
104- action = 'store_true' ,
105- help = 'Output will be wrote in zarr' )
106- parser .add_argument ('--unraw' ,
107- action = 'store_true' ,
108- help = 'Load unraw data' )
109- parser .add_argument ('--blank_period' ,
110- type = int ,
111- default = 0 ,
112- help = 'Nb of detection which will not use at the end of the period' )
98+ parser = EddyParser ("Tool to use identification step to compute tracking" )
99+ parser .add_argument ("yaml_file" , help = "Yaml file to configure py-eddy-tracker" )
100+ parser .add_argument ("--correspondance_in" , help = "Filename of saved correspondance" )
101+ parser .add_argument ("--correspondance_out" , help = "Filename to save correspondance" )
102+ parser .add_argument (
103+ "--save_correspondance_and_stop" ,
104+ action = "store_true" ,
105+ help = "Stop tracking after correspondance computation,"
106+ " merging can be done with EddyFinalTracking" ,
107+ )
108+ parser .add_argument (
109+ "--zarr" , action = "store_true" , help = "Output will be wrote in zarr"
110+ )
111+ parser .add_argument ("--unraw" , action = "store_true" , help = "Load unraw data" )
112+ parser .add_argument (
113+ "--blank_period" ,
114+ type = int ,
115+ default = 0 ,
116+ help = "Nb of detection which will not use at the end of the period" ,
117+ )
113118 args = parser .parse_args ()
114119
115120 # Read yaml configuration file
116- with open (args .yaml_file , 'r' ) as stream :
121+ with open (args .yaml_file , "r" ) as stream :
117122 config = yaml_load (stream )
118123 if args .correspondance_in is not None and not exists (args .correspondance_in ):
119124 args .correspondance_in = None
120- return config , args .save_correspondance_and_stop , args .correspondance_in , args .correspondance_out ,\
121- args .blank_period , args .zarr , not args .unraw
122-
123-
124- if __name__ == '__main__' :
125- CONFIG , SAVE_STOP , CORRESPONDANCES_IN , CORRESPONDANCES_OUT , BLANK_PERIOD , ZARR , RAW = usage ()
125+ return (
126+ config ,
127+ args .save_correspondance_and_stop ,
128+ args .correspondance_in ,
129+ args .correspondance_out ,
130+ args .blank_period ,
131+ args .zarr ,
132+ not args .unraw ,
133+ )
134+
135+
136+ if __name__ == "__main__" :
137+ (
138+ CONFIG ,
139+ SAVE_STOP ,
140+ CORRESPONDANCES_IN ,
141+ CORRESPONDANCES_OUT ,
142+ BLANK_PERIOD ,
143+ ZARR ,
144+ RAW ,
145+ ) = usage ()
126146 # Create output directory
127- SAVE_DIR = CONFIG [' PATHS' ].get (' SAVE_DIR' , None )
147+ SAVE_DIR = CONFIG [" PATHS" ].get (" SAVE_DIR" , None )
128148 if SAVE_DIR is not None and not exists (SAVE_DIR ):
129149 mkdir (SAVE_DIR )
130150
131- YAML_CORRESPONDANCES_IN = CONFIG [' PATHS' ].get (' CORRESPONDANCES_IN' , None )
132- YAML_CORRESPONDANCES_OUT = CONFIG [' PATHS' ].get (' CORRESPONDANCES_OUT' , None )
151+ YAML_CORRESPONDANCES_IN = CONFIG [" PATHS" ].get (" CORRESPONDANCES_IN" , None )
152+ YAML_CORRESPONDANCES_OUT = CONFIG [" PATHS" ].get (" CORRESPONDANCES_OUT" , None )
133153 if CORRESPONDANCES_IN is None :
134154 CORRESPONDANCES_IN = YAML_CORRESPONDANCES_IN
135155 if CORRESPONDANCES_OUT is None :
136156 CORRESPONDANCES_OUT = YAML_CORRESPONDANCES_OUT
137157 if YAML_CORRESPONDANCES_OUT is None and CORRESPONDANCES_OUT is None :
138- CORRESPONDANCES_OUT = ' {path}/{sign_type}_correspondances.nc'
158+ CORRESPONDANCES_OUT = " {path}/{sign_type}_correspondances.nc"
139159
140- if ' CLASS' in CONFIG :
160+ if " CLASS" in CONFIG :
141161 CLASS = getattr (
142- __import__ (CONFIG ['CLASS' ]['MODULE' ], globals (), locals (), CONFIG ['CLASS' ]['CLASS' ]),
143- CONFIG ['CLASS' ]['CLASS' ])
162+ __import__ (
163+ CONFIG ["CLASS" ]["MODULE" ], globals (), locals (), CONFIG ["CLASS" ]["CLASS" ]
164+ ),
165+ CONFIG ["CLASS" ]["CLASS" ],
166+ )
144167 else :
145168 CLASS = None
146169
147- NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int (CONFIG .get (' VIRTUAL_LENGTH_MAX' , 0 ))
170+ NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int (CONFIG .get (" VIRTUAL_LENGTH_MAX" , 0 ))
148171
149- if isinstance (CONFIG [' PATHS' ][ ' FILES_PATTERN' ], list ):
172+ if isinstance (CONFIG [" PATHS" ][ " FILES_PATTERN" ], list ):
150173 DATASET_LIST = browse_dataset_in (
151174 data_dir = None ,
152175 files_model = None ,
153- files = CONFIG ['PATHS' ]['FILES_PATTERN' ],
154- date_regexp = '.*c_([0-9]*?).[nz].*' ,
155- date_model = '%Y%m%d' )
176+ files = CONFIG ["PATHS" ]["FILES_PATTERN" ],
177+ date_regexp = ".*c_([0-9]*?).[nz].*" ,
178+ date_model = "%Y%m%d" ,
179+ )
156180 else :
157181 DATASET_LIST = browse_dataset_in (
158- data_dir = dirname (CONFIG ['PATHS' ]['FILES_PATTERN' ]),
159- files_model = basename (CONFIG ['PATHS' ]['FILES_PATTERN' ]),
160- date_regexp = '.*c_([0-9]*?).[nz].*' ,
161- date_model = '%Y%m%d' )
182+ data_dir = dirname (CONFIG ["PATHS" ]["FILES_PATTERN" ]),
183+ files_model = basename (CONFIG ["PATHS" ]["FILES_PATTERN" ]),
184+ date_regexp = ".*c_([0-9]*?).[nz].*" ,
185+ date_model = "%Y%m%d" ,
186+ )
162187
163188 if BLANK_PERIOD > 0 :
164189 DATASET_LIST = DATASET_LIST [:- BLANK_PERIOD ]
165- logger .info (' Last %d files will be pop' , BLANK_PERIOD )
190+ logger .info (" Last %d files will be pop" , BLANK_PERIOD )
166191
167192 START_TIME = dt .datetime .now ()
168- logger .info ('Start tracking on %d files' , len (DATASET_LIST ))
193+ logger .info ("Start tracking on %d files" , len (DATASET_LIST ))
194+
195+ NB_OBS_MIN = int (CONFIG .get ("TRACK_DURATION_MIN" , 14 ))
196+ if NB_OBS_MIN > len (DATASET_LIST ):
197+ raise Exception (
198+ "Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)."
199+ % (len (DATASET_LIST ), NB_OBS_MIN )
200+ )
169201
170202 CORRESPONDANCES = Correspondances (
171- datasets = DATASET_LIST [' filename' ],
203+ datasets = DATASET_LIST [" filename" ],
172204 virtual = NB_VIRTUAL_OBS_MAX_BY_SEGMENT ,
173205 class_method = CLASS ,
174- previous_correspondance = CORRESPONDANCES_IN )
206+ previous_correspondance = CORRESPONDANCES_IN ,
207+ )
175208
176209 CORRESPONDANCES .track ()
177- logger .info (' Track finish' )
178- logger .info (' Start merging' )
210+ logger .info (" Track finish" )
211+ logger .info (" Start merging" )
179212
180213 DATE_START , DATE_STOP = CORRESPONDANCES .period
181- DICT_COMPLETION = dict (date_start = DATE_START , date_stop = DATE_STOP , date_prod = START_TIME ,
182- path = SAVE_DIR , sign_type = CORRESPONDANCES .current_obs .sign_legend )
214+ DICT_COMPLETION = dict (
215+ date_start = DATE_START ,
216+ date_stop = DATE_STOP ,
217+ date_prod = START_TIME ,
218+ path = SAVE_DIR ,
219+ sign_type = CORRESPONDANCES .current_obs .sign_legend ,
220+ )
183221
184222 CORRESPONDANCES .save (CORRESPONDANCES_OUT , DICT_COMPLETION )
185223 if SAVE_STOP :
186224 exit ()
187225
188226 # Merge correspondance, only do if we stop and store just after compute of correspondance
189- NB_OBS_MIN = int (CONFIG .get ('TRACK_DURATION_MIN' , 14 ))
190227 CORRESPONDANCES .prepare_merging ()
191228
192- logger .info ('Longer track saved have %d obs' , CORRESPONDANCES .nb_obs_by_tracks .max ())
193- logger .info ('The mean length is %d observations before filtering' , CORRESPONDANCES .nb_obs_by_tracks .mean ())
229+ logger .info (
230+ "Longer track saved have %d obs" , CORRESPONDANCES .nb_obs_by_tracks .max ()
231+ )
232+ logger .info (
233+ "The mean length is %d observations before filtering" ,
234+ CORRESPONDANCES .nb_obs_by_tracks .mean (),
235+ )
194236
195- CORRESPONDANCES .get_unused_data (raw_data = RAW ).write_file (path = SAVE_DIR , filename = '%(path)s/%(sign_type)s_untracked.nc' , zarr_flag = ZARR )
237+ CORRESPONDANCES .get_unused_data (raw_data = RAW ).write_file (
238+ path = SAVE_DIR , filename = "%(path)s/%(sign_type)s_untracked.nc" , zarr_flag = ZARR
239+ )
196240
197241 SHORT_CORRESPONDANCES = CORRESPONDANCES ._copy ()
198242 SHORT_CORRESPONDANCES .shorter_than (size_max = NB_OBS_MIN )
@@ -204,19 +248,28 @@ if __name__ == '__main__':
204248
205249 # We flag obs
206250 if CORRESPONDANCES .virtual :
207- FINAL_EDDIES [' virtual' ][:] = FINAL_EDDIES [' time' ] == 0
208- FINAL_EDDIES .filled_by_interpolation (FINAL_EDDIES [' virtual' ] == 1 )
209- SHORT_TRACK [' virtual' ][:] = SHORT_TRACK [' time' ] == 0
210- SHORT_TRACK .filled_by_interpolation (SHORT_TRACK [' virtual' ] == 1 )
251+ FINAL_EDDIES [" virtual" ][:] = FINAL_EDDIES [" time" ] == 0
252+ FINAL_EDDIES .filled_by_interpolation (FINAL_EDDIES [" virtual" ] == 1 )
253+ SHORT_TRACK [" virtual" ][:] = SHORT_TRACK [" time" ] == 0
254+ SHORT_TRACK .filled_by_interpolation (SHORT_TRACK [" virtual" ] == 1 )
211255
212256 # Total running time
213257 FULL_TIME = dt .datetime .now () - START_TIME
214- logger .info ('Mean duration by loop : %s' ,
215- FULL_TIME / (len (DATASET_LIST ) - 1 ))
216- logger .info ('Duration : %s' , FULL_TIME )
258+ logger .info ("Mean duration by loop : %s" , FULL_TIME / (len (DATASET_LIST ) - 1 ))
259+ logger .info ("Duration : %s" , FULL_TIME )
217260
218- logger .info ('Longer track saved have %d obs' , CORRESPONDANCES .nb_obs_by_tracks .max ())
219- logger .info ('The mean length is %d observations after filtering' , CORRESPONDANCES .nb_obs_by_tracks .mean ())
261+ logger .info (
262+ "Longer track saved have %d obs" , CORRESPONDANCES .nb_obs_by_tracks .max ()
263+ )
264+ logger .info (
265+ "The mean length is %d observations after filtering" ,
266+ CORRESPONDANCES .nb_obs_by_tracks .mean (),
267+ )
220268
221269 FINAL_EDDIES .write_file (path = SAVE_DIR , zarr_flag = ZARR )
222- SHORT_TRACK .write_file (filename = '%(path)s/%(sign_type)s_track_too_short.nc' , path = SAVE_DIR , zarr_flag = ZARR )
270+ SHORT_TRACK .write_file (
271+ filename = "%(path)s/%(sign_type)s_track_too_short.nc" ,
272+ path = SAVE_DIR ,
273+ zarr_flag = ZARR ,
274+ )
275+
0 commit comments