@@ -20,65 +20,72 @@ import datetime as dt
20
20
logger = logging .getLogger ("pet" )
21
21
22
22
23
- def browse_dataset_in (data_dir , files_model , date_regexp , date_model ,
24
- start_date = None , end_date = None , sub_sampling_step = 1 ,
25
- files = None ):
23
+ def browse_dataset_in (
24
+ data_dir ,
25
+ files_model ,
26
+ date_regexp ,
27
+ date_model ,
28
+ start_date = None ,
29
+ end_date = None ,
30
+ sub_sampling_step = 1 ,
31
+ files = None ,
32
+ ):
26
33
if files is not None :
27
- pattern_regexp = re_compile (' .*/' + date_regexp )
34
+ pattern_regexp = re_compile (" .*/" + date_regexp )
28
35
filenames = bytes_ (files )
29
36
else :
30
- pattern_regexp = re_compile (' .*/' + date_regexp )
37
+ pattern_regexp = re_compile (" .*/" + date_regexp )
31
38
full_path = join_path (data_dir , files_model )
32
- logger .info (' Search files : %s' , full_path )
39
+ logger .info (" Search files : %s" , full_path )
33
40
filenames = bytes_ (glob (full_path ))
34
41
35
- dataset_list = empty (len (filenames ),
36
- dtype = [('filename' , 'S500' ),
37
- ('date' , 'datetime64[D]' ),
38
- ])
39
- dataset_list ['filename' ] = filenames
42
+ dataset_list = empty (
43
+ len (filenames ), dtype = [("filename" , "S500" ), ("date" , "datetime64[D]" ),]
44
+ )
45
+ dataset_list ["filename" ] = filenames
40
46
41
- logger .info (' %s grids available' , dataset_list .shape [0 ])
47
+ logger .info (" %s grids available" , dataset_list .shape [0 ])
42
48
mode_attrs = False
43
- if '(' not in date_regexp :
44
- logger .debug (' Attrs date : %s' , date_regexp )
45
- mode_attrs = date_regexp .strip ().split (':' )
49
+ if "(" not in date_regexp :
50
+ logger .debug (" Attrs date : %s" , date_regexp )
51
+ mode_attrs = date_regexp .strip ().split (":" )
46
52
else :
47
- logger .debug (' Pattern date : %s' , date_regexp )
53
+ logger .debug (" Pattern date : %s" , date_regexp )
48
54
49
55
for item in dataset_list :
50
56
str_date = None
51
57
if mode_attrs :
52
- with Dataset (item [' filename' ].decode ("utf-8" )) as h :
58
+ with Dataset (item [" filename" ].decode ("utf-8" )) as h :
53
59
if len (mode_attrs ) == 1 :
54
60
str_date = getattr (h , mode_attrs [0 ])
55
61
else :
56
62
str_date = getattr (h .variables [mode_attrs [0 ]], mode_attrs [1 ])
57
63
else :
58
- result = pattern_regexp .match (str (item [' filename' ]))
64
+ result = pattern_regexp .match (str (item [" filename" ]))
59
65
if result :
60
66
str_date = result .groups ()[0 ]
61
67
62
68
if str_date is not None :
63
- item [' date' ] = datetime .strptime (str_date , date_model ).date ()
69
+ item [" date" ] = datetime .strptime (str_date , date_model ).date ()
64
70
65
- dataset_list .sort (order = [' date' , ' filename' ])
71
+ dataset_list .sort (order = [" date" , " filename" ])
66
72
67
- steps = unique (dataset_list [' date' ][1 :] - dataset_list [' date' ][:- 1 ])
73
+ steps = unique (dataset_list [" date" ][1 :] - dataset_list [" date" ][:- 1 ])
68
74
if len (steps ) > 1 :
69
- raise Exception (' Several days steps in grid dataset %s' % steps )
75
+ raise Exception (" Several days steps in grid dataset %s" % steps )
70
76
71
77
if sub_sampling_step != 1 :
72
- logger .info (' Grid subsampling %d' , sub_sampling_step )
78
+ logger .info (" Grid subsampling %d" , sub_sampling_step )
73
79
dataset_list = dataset_list [::sub_sampling_step ]
74
80
75
81
if start_date is not None or end_date is not None :
76
- logger .info ('Available grid from %s to %s' ,
77
- dataset_list [0 ]['date' ],
78
- dataset_list [- 1 ]['date' ])
79
- logger .info ('Filtering grid by time %s, %s' , start_date , end_date )
80
- mask = (dataset_list ['date' ] >= start_date ) * (
81
- dataset_list ['date' ] <= end_date )
82
+ logger .info (
83
+ "Available grid from %s to %s" ,
84
+ dataset_list [0 ]["date" ],
85
+ dataset_list [- 1 ]["date" ],
86
+ )
87
+ logger .info ("Filtering grid by time %s, %s" , start_date , end_date )
88
+ mask = (dataset_list ["date" ] >= start_date ) * (dataset_list ["date" ] <= end_date )
82
89
83
90
dataset_list = dataset_list [mask ]
84
91
return dataset_list
@@ -88,111 +95,148 @@ def usage():
88
95
"""Usage
89
96
"""
90
97
# Run using:
91
- parser = EddyParser (
92
- "Tool to use identification step to compute tracking" )
93
- parser .add_argument ('yaml_file' ,
94
- help = 'Yaml file to configure py-eddy-tracker' )
95
- parser .add_argument ('--correspondance_in' ,
96
- help = 'Filename of saved correspondance' )
97
- parser .add_argument ('--correspondance_out' ,
98
- help = 'Filename to save correspondance' )
99
- parser .add_argument ('--save_correspondance_and_stop' ,
100
- action = 'store_true' ,
101
- help = 'Stop tracking after correspondance computation,'
102
- ' merging can be done with EddyFinalTracking' )
103
- parser .add_argument ('--zarr' ,
104
- action = 'store_true' ,
105
- help = 'Output will be wrote in zarr' )
106
- parser .add_argument ('--unraw' ,
107
- action = 'store_true' ,
108
- help = 'Load unraw data' )
109
- parser .add_argument ('--blank_period' ,
110
- type = int ,
111
- default = 0 ,
112
- help = 'Nb of detection which will not use at the end of the period' )
98
+ parser = EddyParser ("Tool to use identification step to compute tracking" )
99
+ parser .add_argument ("yaml_file" , help = "Yaml file to configure py-eddy-tracker" )
100
+ parser .add_argument ("--correspondance_in" , help = "Filename of saved correspondance" )
101
+ parser .add_argument ("--correspondance_out" , help = "Filename to save correspondance" )
102
+ parser .add_argument (
103
+ "--save_correspondance_and_stop" ,
104
+ action = "store_true" ,
105
+ help = "Stop tracking after correspondance computation,"
106
+ " merging can be done with EddyFinalTracking" ,
107
+ )
108
+ parser .add_argument (
109
+ "--zarr" , action = "store_true" , help = "Output will be wrote in zarr"
110
+ )
111
+ parser .add_argument ("--unraw" , action = "store_true" , help = "Load unraw data" )
112
+ parser .add_argument (
113
+ "--blank_period" ,
114
+ type = int ,
115
+ default = 0 ,
116
+ help = "Nb of detection which will not use at the end of the period" ,
117
+ )
113
118
args = parser .parse_args ()
114
119
115
120
# Read yaml configuration file
116
- with open (args .yaml_file , 'r' ) as stream :
121
+ with open (args .yaml_file , "r" ) as stream :
117
122
config = yaml_load (stream )
118
123
if args .correspondance_in is not None and not exists (args .correspondance_in ):
119
124
args .correspondance_in = None
120
- return config , args .save_correspondance_and_stop , args .correspondance_in , args .correspondance_out ,\
121
- args .blank_period , args .zarr , not args .unraw
122
-
123
-
124
- if __name__ == '__main__' :
125
- CONFIG , SAVE_STOP , CORRESPONDANCES_IN , CORRESPONDANCES_OUT , BLANK_PERIOD , ZARR , RAW = usage ()
125
+ return (
126
+ config ,
127
+ args .save_correspondance_and_stop ,
128
+ args .correspondance_in ,
129
+ args .correspondance_out ,
130
+ args .blank_period ,
131
+ args .zarr ,
132
+ not args .unraw ,
133
+ )
134
+
135
+
136
+ if __name__ == "__main__" :
137
+ (
138
+ CONFIG ,
139
+ SAVE_STOP ,
140
+ CORRESPONDANCES_IN ,
141
+ CORRESPONDANCES_OUT ,
142
+ BLANK_PERIOD ,
143
+ ZARR ,
144
+ RAW ,
145
+ ) = usage ()
126
146
# Create output directory
127
- SAVE_DIR = CONFIG [' PATHS' ].get (' SAVE_DIR' , None )
147
+ SAVE_DIR = CONFIG [" PATHS" ].get (" SAVE_DIR" , None )
128
148
if SAVE_DIR is not None and not exists (SAVE_DIR ):
129
149
mkdir (SAVE_DIR )
130
150
131
- YAML_CORRESPONDANCES_IN = CONFIG [' PATHS' ].get (' CORRESPONDANCES_IN' , None )
132
- YAML_CORRESPONDANCES_OUT = CONFIG [' PATHS' ].get (' CORRESPONDANCES_OUT' , None )
151
+ YAML_CORRESPONDANCES_IN = CONFIG [" PATHS" ].get (" CORRESPONDANCES_IN" , None )
152
+ YAML_CORRESPONDANCES_OUT = CONFIG [" PATHS" ].get (" CORRESPONDANCES_OUT" , None )
133
153
if CORRESPONDANCES_IN is None :
134
154
CORRESPONDANCES_IN = YAML_CORRESPONDANCES_IN
135
155
if CORRESPONDANCES_OUT is None :
136
156
CORRESPONDANCES_OUT = YAML_CORRESPONDANCES_OUT
137
157
if YAML_CORRESPONDANCES_OUT is None and CORRESPONDANCES_OUT is None :
138
- CORRESPONDANCES_OUT = ' {path}/{sign_type}_correspondances.nc'
158
+ CORRESPONDANCES_OUT = " {path}/{sign_type}_correspondances.nc"
139
159
140
- if ' CLASS' in CONFIG :
160
+ if " CLASS" in CONFIG :
141
161
CLASS = getattr (
142
- __import__ (CONFIG ['CLASS' ]['MODULE' ], globals (), locals (), CONFIG ['CLASS' ]['CLASS' ]),
143
- CONFIG ['CLASS' ]['CLASS' ])
162
+ __import__ (
163
+ CONFIG ["CLASS" ]["MODULE" ], globals (), locals (), CONFIG ["CLASS" ]["CLASS" ]
164
+ ),
165
+ CONFIG ["CLASS" ]["CLASS" ],
166
+ )
144
167
else :
145
168
CLASS = None
146
169
147
- NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int (CONFIG .get (' VIRTUAL_LENGTH_MAX' , 0 ))
170
+ NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int (CONFIG .get (" VIRTUAL_LENGTH_MAX" , 0 ))
148
171
149
- if isinstance (CONFIG [' PATHS' ][ ' FILES_PATTERN' ], list ):
172
+ if isinstance (CONFIG [" PATHS" ][ " FILES_PATTERN" ], list ):
150
173
DATASET_LIST = browse_dataset_in (
151
174
data_dir = None ,
152
175
files_model = None ,
153
- files = CONFIG ['PATHS' ]['FILES_PATTERN' ],
154
- date_regexp = '.*c_([0-9]*?).[nz].*' ,
155
- date_model = '%Y%m%d' )
176
+ files = CONFIG ["PATHS" ]["FILES_PATTERN" ],
177
+ date_regexp = ".*c_([0-9]*?).[nz].*" ,
178
+ date_model = "%Y%m%d" ,
179
+ )
156
180
else :
157
181
DATASET_LIST = browse_dataset_in (
158
- data_dir = dirname (CONFIG ['PATHS' ]['FILES_PATTERN' ]),
159
- files_model = basename (CONFIG ['PATHS' ]['FILES_PATTERN' ]),
160
- date_regexp = '.*c_([0-9]*?).[nz].*' ,
161
- date_model = '%Y%m%d' )
182
+ data_dir = dirname (CONFIG ["PATHS" ]["FILES_PATTERN" ]),
183
+ files_model = basename (CONFIG ["PATHS" ]["FILES_PATTERN" ]),
184
+ date_regexp = ".*c_([0-9]*?).[nz].*" ,
185
+ date_model = "%Y%m%d" ,
186
+ )
162
187
163
188
if BLANK_PERIOD > 0 :
164
189
DATASET_LIST = DATASET_LIST [:- BLANK_PERIOD ]
165
- logger .info (' Last %d files will be pop' , BLANK_PERIOD )
190
+ logger .info (" Last %d files will be pop" , BLANK_PERIOD )
166
191
167
192
START_TIME = dt .datetime .now ()
168
- logger .info ('Start tracking on %d files' , len (DATASET_LIST ))
193
+ logger .info ("Start tracking on %d files" , len (DATASET_LIST ))
194
+
195
+ NB_OBS_MIN = int (CONFIG .get ("TRACK_DURATION_MIN" , 14 ))
196
+ if NB_OBS_MIN > len (DATASET_LIST ):
197
+ raise Exception (
198
+ "Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)."
199
+ % (len (DATASET_LIST ), NB_OBS_MIN )
200
+ )
169
201
170
202
CORRESPONDANCES = Correspondances (
171
- datasets = DATASET_LIST [' filename' ],
203
+ datasets = DATASET_LIST [" filename" ],
172
204
virtual = NB_VIRTUAL_OBS_MAX_BY_SEGMENT ,
173
205
class_method = CLASS ,
174
- previous_correspondance = CORRESPONDANCES_IN )
206
+ previous_correspondance = CORRESPONDANCES_IN ,
207
+ )
175
208
176
209
CORRESPONDANCES .track ()
177
- logger .info (' Track finish' )
178
- logger .info (' Start merging' )
210
+ logger .info (" Track finish" )
211
+ logger .info (" Start merging" )
179
212
180
213
DATE_START , DATE_STOP = CORRESPONDANCES .period
181
- DICT_COMPLETION = dict (date_start = DATE_START , date_stop = DATE_STOP , date_prod = START_TIME ,
182
- path = SAVE_DIR , sign_type = CORRESPONDANCES .current_obs .sign_legend )
214
+ DICT_COMPLETION = dict (
215
+ date_start = DATE_START ,
216
+ date_stop = DATE_STOP ,
217
+ date_prod = START_TIME ,
218
+ path = SAVE_DIR ,
219
+ sign_type = CORRESPONDANCES .current_obs .sign_legend ,
220
+ )
183
221
184
222
CORRESPONDANCES .save (CORRESPONDANCES_OUT , DICT_COMPLETION )
185
223
if SAVE_STOP :
186
224
exit ()
187
225
188
226
# Merge correspondance, only do if we stop and store just after compute of correspondance
189
- NB_OBS_MIN = int (CONFIG .get ('TRACK_DURATION_MIN' , 14 ))
190
227
CORRESPONDANCES .prepare_merging ()
191
228
192
- logger .info ('Longer track saved have %d obs' , CORRESPONDANCES .nb_obs_by_tracks .max ())
193
- logger .info ('The mean length is %d observations before filtering' , CORRESPONDANCES .nb_obs_by_tracks .mean ())
229
+ logger .info (
230
+ "Longer track saved have %d obs" , CORRESPONDANCES .nb_obs_by_tracks .max ()
231
+ )
232
+ logger .info (
233
+ "The mean length is %d observations before filtering" ,
234
+ CORRESPONDANCES .nb_obs_by_tracks .mean (),
235
+ )
194
236
195
- CORRESPONDANCES .get_unused_data (raw_data = RAW ).write_file (path = SAVE_DIR , filename = '%(path)s/%(sign_type)s_untracked.nc' , zarr_flag = ZARR )
237
+ CORRESPONDANCES .get_unused_data (raw_data = RAW ).write_file (
238
+ path = SAVE_DIR , filename = "%(path)s/%(sign_type)s_untracked.nc" , zarr_flag = ZARR
239
+ )
196
240
197
241
SHORT_CORRESPONDANCES = CORRESPONDANCES ._copy ()
198
242
SHORT_CORRESPONDANCES .shorter_than (size_max = NB_OBS_MIN )
@@ -204,19 +248,28 @@ if __name__ == '__main__':
204
248
205
249
# We flag obs
206
250
if CORRESPONDANCES .virtual :
207
- FINAL_EDDIES [' virtual' ][:] = FINAL_EDDIES [' time' ] == 0
208
- FINAL_EDDIES .filled_by_interpolation (FINAL_EDDIES [' virtual' ] == 1 )
209
- SHORT_TRACK [' virtual' ][:] = SHORT_TRACK [' time' ] == 0
210
- SHORT_TRACK .filled_by_interpolation (SHORT_TRACK [' virtual' ] == 1 )
251
+ FINAL_EDDIES [" virtual" ][:] = FINAL_EDDIES [" time" ] == 0
252
+ FINAL_EDDIES .filled_by_interpolation (FINAL_EDDIES [" virtual" ] == 1 )
253
+ SHORT_TRACK [" virtual" ][:] = SHORT_TRACK [" time" ] == 0
254
+ SHORT_TRACK .filled_by_interpolation (SHORT_TRACK [" virtual" ] == 1 )
211
255
212
256
# Total running time
213
257
FULL_TIME = dt .datetime .now () - START_TIME
214
- logger .info ('Mean duration by loop : %s' ,
215
- FULL_TIME / (len (DATASET_LIST ) - 1 ))
216
- logger .info ('Duration : %s' , FULL_TIME )
258
+ logger .info ("Mean duration by loop : %s" , FULL_TIME / (len (DATASET_LIST ) - 1 ))
259
+ logger .info ("Duration : %s" , FULL_TIME )
217
260
218
- logger .info ('Longer track saved have %d obs' , CORRESPONDANCES .nb_obs_by_tracks .max ())
219
- logger .info ('The mean length is %d observations after filtering' , CORRESPONDANCES .nb_obs_by_tracks .mean ())
261
+ logger .info (
262
+ "Longer track saved have %d obs" , CORRESPONDANCES .nb_obs_by_tracks .max ()
263
+ )
264
+ logger .info (
265
+ "The mean length is %d observations after filtering" ,
266
+ CORRESPONDANCES .nb_obs_by_tracks .mean (),
267
+ )
220
268
221
269
FINAL_EDDIES .write_file (path = SAVE_DIR , zarr_flag = ZARR )
222
- SHORT_TRACK .write_file (filename = '%(path)s/%(sign_type)s_track_too_short.nc' , path = SAVE_DIR , zarr_flag = ZARR )
270
+ SHORT_TRACK .write_file (
271
+ filename = "%(path)s/%(sign_type)s_track_too_short.nc" ,
272
+ path = SAVE_DIR ,
273
+ zarr_flag = ZARR ,
274
+ )
275
+
0 commit comments