Skip to content

Commit de89322

Browse files
committed
add message suggested in issue AntSimi#10
1 parent 5a83790 commit de89322

File tree

1 file changed

+149
-96
lines changed

1 file changed

+149
-96
lines changed

src/scripts/EddyTracking

Lines changed: 149 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -20,65 +20,72 @@ import datetime as dt
2020
logger = logging.getLogger("pet")
2121

2222

23-
def browse_dataset_in(data_dir, files_model, date_regexp, date_model,
24-
start_date=None, end_date=None, sub_sampling_step=1,
25-
files=None):
23+
def browse_dataset_in(
24+
data_dir,
25+
files_model,
26+
date_regexp,
27+
date_model,
28+
start_date=None,
29+
end_date=None,
30+
sub_sampling_step=1,
31+
files=None,
32+
):
2633
if files is not None:
27-
pattern_regexp = re_compile('.*/' + date_regexp)
34+
pattern_regexp = re_compile(".*/" + date_regexp)
2835
filenames = bytes_(files)
2936
else:
30-
pattern_regexp = re_compile('.*/' + date_regexp)
37+
pattern_regexp = re_compile(".*/" + date_regexp)
3138
full_path = join_path(data_dir, files_model)
32-
logger.info('Search files : %s', full_path)
39+
logger.info("Search files : %s", full_path)
3340
filenames = bytes_(glob(full_path))
3441

35-
dataset_list = empty(len(filenames),
36-
dtype=[('filename', 'S500'),
37-
('date', 'datetime64[D]'),
38-
])
39-
dataset_list['filename'] = filenames
42+
dataset_list = empty(
43+
len(filenames), dtype=[("filename", "S500"), ("date", "datetime64[D]"),]
44+
)
45+
dataset_list["filename"] = filenames
4046

41-
logger.info('%s grids available', dataset_list.shape[0])
47+
logger.info("%s grids available", dataset_list.shape[0])
4248
mode_attrs = False
43-
if '(' not in date_regexp:
44-
logger.debug('Attrs date : %s', date_regexp)
45-
mode_attrs = date_regexp.strip().split(':')
49+
if "(" not in date_regexp:
50+
logger.debug("Attrs date : %s", date_regexp)
51+
mode_attrs = date_regexp.strip().split(":")
4652
else:
47-
logger.debug('Pattern date : %s', date_regexp)
53+
logger.debug("Pattern date : %s", date_regexp)
4854

4955
for item in dataset_list:
5056
str_date = None
5157
if mode_attrs:
52-
with Dataset(item['filename'].decode("utf-8")) as h:
58+
with Dataset(item["filename"].decode("utf-8")) as h:
5359
if len(mode_attrs) == 1:
5460
str_date = getattr(h, mode_attrs[0])
5561
else:
5662
str_date = getattr(h.variables[mode_attrs[0]], mode_attrs[1])
5763
else:
58-
result = pattern_regexp.match(str(item['filename']))
64+
result = pattern_regexp.match(str(item["filename"]))
5965
if result:
6066
str_date = result.groups()[0]
6167

6268
if str_date is not None:
63-
item['date'] = datetime.strptime(str_date, date_model).date()
69+
item["date"] = datetime.strptime(str_date, date_model).date()
6470

65-
dataset_list.sort(order=['date', 'filename'])
71+
dataset_list.sort(order=["date", "filename"])
6672

67-
steps = unique(dataset_list['date'][1:] - dataset_list['date'][:-1])
73+
steps = unique(dataset_list["date"][1:] - dataset_list["date"][:-1])
6874
if len(steps) > 1:
69-
raise Exception('Several days steps in grid dataset %s' % steps)
75+
raise Exception("Several days steps in grid dataset %s" % steps)
7076

7177
if sub_sampling_step != 1:
72-
logger.info('Grid subsampling %d', sub_sampling_step)
78+
logger.info("Grid subsampling %d", sub_sampling_step)
7379
dataset_list = dataset_list[::sub_sampling_step]
7480

7581
if start_date is not None or end_date is not None:
76-
logger.info('Available grid from %s to %s',
77-
dataset_list[0]['date'],
78-
dataset_list[-1]['date'])
79-
logger.info('Filtering grid by time %s, %s', start_date, end_date)
80-
mask = (dataset_list['date'] >= start_date) * (
81-
dataset_list['date'] <= end_date)
82+
logger.info(
83+
"Available grid from %s to %s",
84+
dataset_list[0]["date"],
85+
dataset_list[-1]["date"],
86+
)
87+
logger.info("Filtering grid by time %s, %s", start_date, end_date)
88+
mask = (dataset_list["date"] >= start_date) * (dataset_list["date"] <= end_date)
8289

8390
dataset_list = dataset_list[mask]
8491
return dataset_list
@@ -88,111 +95,148 @@ def usage():
8895
"""Usage
8996
"""
9097
# Run using:
91-
parser = EddyParser(
92-
"Tool to use identification step to compute tracking")
93-
parser.add_argument('yaml_file',
94-
help='Yaml file to configure py-eddy-tracker')
95-
parser.add_argument('--correspondance_in',
96-
help='Filename of saved correspondance')
97-
parser.add_argument('--correspondance_out',
98-
help='Filename to save correspondance')
99-
parser.add_argument('--save_correspondance_and_stop',
100-
action='store_true',
101-
help='Stop tracking after correspondance computation,'
102-
' merging can be done with EddyFinalTracking')
103-
parser.add_argument('--zarr',
104-
action='store_true',
105-
help='Output will be wrote in zarr')
106-
parser.add_argument('--unraw',
107-
action='store_true',
108-
help='Load unraw data')
109-
parser.add_argument('--blank_period',
110-
type=int,
111-
default=0,
112-
help='Nb of detection which will not use at the end of the period')
98+
parser = EddyParser("Tool to use identification step to compute tracking")
99+
parser.add_argument("yaml_file", help="Yaml file to configure py-eddy-tracker")
100+
parser.add_argument("--correspondance_in", help="Filename of saved correspondance")
101+
parser.add_argument("--correspondance_out", help="Filename to save correspondance")
102+
parser.add_argument(
103+
"--save_correspondance_and_stop",
104+
action="store_true",
105+
help="Stop tracking after correspondance computation,"
106+
" merging can be done with EddyFinalTracking",
107+
)
108+
parser.add_argument(
109+
"--zarr", action="store_true", help="Output will be wrote in zarr"
110+
)
111+
parser.add_argument("--unraw", action="store_true", help="Load unraw data")
112+
parser.add_argument(
113+
"--blank_period",
114+
type=int,
115+
default=0,
116+
help="Nb of detection which will not use at the end of the period",
117+
)
113118
args = parser.parse_args()
114119

115120
# Read yaml configuration file
116-
with open(args.yaml_file, 'r') as stream:
121+
with open(args.yaml_file, "r") as stream:
117122
config = yaml_load(stream)
118123
if args.correspondance_in is not None and not exists(args.correspondance_in):
119124
args.correspondance_in = None
120-
return config, args.save_correspondance_and_stop, args.correspondance_in, args.correspondance_out,\
121-
args.blank_period, args.zarr, not args.unraw
122-
123-
124-
if __name__ == '__main__':
125-
CONFIG, SAVE_STOP, CORRESPONDANCES_IN, CORRESPONDANCES_OUT, BLANK_PERIOD, ZARR, RAW = usage()
125+
return (
126+
config,
127+
args.save_correspondance_and_stop,
128+
args.correspondance_in,
129+
args.correspondance_out,
130+
args.blank_period,
131+
args.zarr,
132+
not args.unraw,
133+
)
134+
135+
136+
if __name__ == "__main__":
137+
(
138+
CONFIG,
139+
SAVE_STOP,
140+
CORRESPONDANCES_IN,
141+
CORRESPONDANCES_OUT,
142+
BLANK_PERIOD,
143+
ZARR,
144+
RAW,
145+
) = usage()
126146
# Create output directory
127-
SAVE_DIR = CONFIG['PATHS'].get('SAVE_DIR', None)
147+
SAVE_DIR = CONFIG["PATHS"].get("SAVE_DIR", None)
128148
if SAVE_DIR is not None and not exists(SAVE_DIR):
129149
mkdir(SAVE_DIR)
130150

131-
YAML_CORRESPONDANCES_IN = CONFIG['PATHS'].get('CORRESPONDANCES_IN', None)
132-
YAML_CORRESPONDANCES_OUT = CONFIG['PATHS'].get('CORRESPONDANCES_OUT', None)
151+
YAML_CORRESPONDANCES_IN = CONFIG["PATHS"].get("CORRESPONDANCES_IN", None)
152+
YAML_CORRESPONDANCES_OUT = CONFIG["PATHS"].get("CORRESPONDANCES_OUT", None)
133153
if CORRESPONDANCES_IN is None:
134154
CORRESPONDANCES_IN = YAML_CORRESPONDANCES_IN
135155
if CORRESPONDANCES_OUT is None:
136156
CORRESPONDANCES_OUT = YAML_CORRESPONDANCES_OUT
137157
if YAML_CORRESPONDANCES_OUT is None and CORRESPONDANCES_OUT is None:
138-
CORRESPONDANCES_OUT = '{path}/{sign_type}_correspondances.nc'
158+
CORRESPONDANCES_OUT = "{path}/{sign_type}_correspondances.nc"
139159

140-
if 'CLASS' in CONFIG:
160+
if "CLASS" in CONFIG:
141161
CLASS = getattr(
142-
__import__(CONFIG['CLASS']['MODULE'], globals(), locals(), CONFIG['CLASS']['CLASS']),
143-
CONFIG['CLASS']['CLASS'])
162+
__import__(
163+
CONFIG["CLASS"]["MODULE"], globals(), locals(), CONFIG["CLASS"]["CLASS"]
164+
),
165+
CONFIG["CLASS"]["CLASS"],
166+
)
144167
else:
145168
CLASS = None
146169

147-
NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int(CONFIG.get('VIRTUAL_LENGTH_MAX', 0))
170+
NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int(CONFIG.get("VIRTUAL_LENGTH_MAX", 0))
148171

149-
if isinstance(CONFIG['PATHS']['FILES_PATTERN'], list):
172+
if isinstance(CONFIG["PATHS"]["FILES_PATTERN"], list):
150173
DATASET_LIST = browse_dataset_in(
151174
data_dir=None,
152175
files_model=None,
153-
files=CONFIG['PATHS']['FILES_PATTERN'],
154-
date_regexp='.*c_([0-9]*?).[nz].*',
155-
date_model='%Y%m%d')
176+
files=CONFIG["PATHS"]["FILES_PATTERN"],
177+
date_regexp=".*c_([0-9]*?).[nz].*",
178+
date_model="%Y%m%d",
179+
)
156180
else:
157181
DATASET_LIST = browse_dataset_in(
158-
data_dir=dirname(CONFIG['PATHS']['FILES_PATTERN']),
159-
files_model=basename(CONFIG['PATHS']['FILES_PATTERN']),
160-
date_regexp='.*c_([0-9]*?).[nz].*',
161-
date_model='%Y%m%d')
182+
data_dir=dirname(CONFIG["PATHS"]["FILES_PATTERN"]),
183+
files_model=basename(CONFIG["PATHS"]["FILES_PATTERN"]),
184+
date_regexp=".*c_([0-9]*?).[nz].*",
185+
date_model="%Y%m%d",
186+
)
162187

163188
if BLANK_PERIOD > 0:
164189
DATASET_LIST = DATASET_LIST[:-BLANK_PERIOD]
165-
logger.info('Last %d files will be pop', BLANK_PERIOD)
190+
logger.info("Last %d files will be pop", BLANK_PERIOD)
166191

167192
START_TIME = dt.datetime.now()
168-
logger.info('Start tracking on %d files', len(DATASET_LIST))
193+
logger.info("Start tracking on %d files", len(DATASET_LIST))
194+
195+
NB_OBS_MIN = int(CONFIG.get("TRACK_DURATION_MIN", 14))
196+
if NB_OBS_MIN > len(DATASET_LIST):
197+
raise Exception(
198+
"Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)."
199+
% (len(DATASET_LIST), NB_OBS_MIN)
200+
)
169201

170202
CORRESPONDANCES = Correspondances(
171-
datasets=DATASET_LIST['filename'],
203+
datasets=DATASET_LIST["filename"],
172204
virtual=NB_VIRTUAL_OBS_MAX_BY_SEGMENT,
173205
class_method=CLASS,
174-
previous_correspondance=CORRESPONDANCES_IN)
206+
previous_correspondance=CORRESPONDANCES_IN,
207+
)
175208

176209
CORRESPONDANCES.track()
177-
logger.info('Track finish')
178-
logger.info('Start merging')
210+
logger.info("Track finish")
211+
logger.info("Start merging")
179212

180213
DATE_START, DATE_STOP = CORRESPONDANCES.period
181-
DICT_COMPLETION = dict(date_start=DATE_START, date_stop=DATE_STOP, date_prod=START_TIME,
182-
path=SAVE_DIR, sign_type=CORRESPONDANCES.current_obs.sign_legend)
214+
DICT_COMPLETION = dict(
215+
date_start=DATE_START,
216+
date_stop=DATE_STOP,
217+
date_prod=START_TIME,
218+
path=SAVE_DIR,
219+
sign_type=CORRESPONDANCES.current_obs.sign_legend,
220+
)
183221

184222
CORRESPONDANCES.save(CORRESPONDANCES_OUT, DICT_COMPLETION)
185223
if SAVE_STOP:
186224
exit()
187225

188226
# Merge correspondance, only do if we stop and store just after compute of correspondance
189-
NB_OBS_MIN = int(CONFIG.get('TRACK_DURATION_MIN', 14))
190227
CORRESPONDANCES.prepare_merging()
191228

192-
logger.info('Longer track saved have %d obs', CORRESPONDANCES.nb_obs_by_tracks.max())
193-
logger.info('The mean length is %d observations before filtering', CORRESPONDANCES.nb_obs_by_tracks.mean())
229+
logger.info(
230+
"Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max()
231+
)
232+
logger.info(
233+
"The mean length is %d observations before filtering",
234+
CORRESPONDANCES.nb_obs_by_tracks.mean(),
235+
)
194236

195-
CORRESPONDANCES.get_unused_data(raw_data=RAW).write_file(path=SAVE_DIR, filename='%(path)s/%(sign_type)s_untracked.nc', zarr_flag=ZARR)
237+
CORRESPONDANCES.get_unused_data(raw_data=RAW).write_file(
238+
path=SAVE_DIR, filename="%(path)s/%(sign_type)s_untracked.nc", zarr_flag=ZARR
239+
)
196240

197241
SHORT_CORRESPONDANCES = CORRESPONDANCES._copy()
198242
SHORT_CORRESPONDANCES.shorter_than(size_max=NB_OBS_MIN)
@@ -204,19 +248,28 @@ if __name__ == '__main__':
204248

205249
# We flag obs
206250
if CORRESPONDANCES.virtual:
207-
FINAL_EDDIES['virtual'][:] = FINAL_EDDIES['time'] == 0
208-
FINAL_EDDIES.filled_by_interpolation(FINAL_EDDIES['virtual'] == 1)
209-
SHORT_TRACK['virtual'][:] = SHORT_TRACK['time'] == 0
210-
SHORT_TRACK.filled_by_interpolation(SHORT_TRACK['virtual'] == 1)
251+
FINAL_EDDIES["virtual"][:] = FINAL_EDDIES["time"] == 0
252+
FINAL_EDDIES.filled_by_interpolation(FINAL_EDDIES["virtual"] == 1)
253+
SHORT_TRACK["virtual"][:] = SHORT_TRACK["time"] == 0
254+
SHORT_TRACK.filled_by_interpolation(SHORT_TRACK["virtual"] == 1)
211255

212256
# Total running time
213257
FULL_TIME = dt.datetime.now() - START_TIME
214-
logger.info('Mean duration by loop : %s',
215-
FULL_TIME / (len(DATASET_LIST) - 1))
216-
logger.info('Duration : %s', FULL_TIME)
258+
logger.info("Mean duration by loop : %s", FULL_TIME / (len(DATASET_LIST) - 1))
259+
logger.info("Duration : %s", FULL_TIME)
217260

218-
logger.info('Longer track saved have %d obs', CORRESPONDANCES.nb_obs_by_tracks.max())
219-
logger.info('The mean length is %d observations after filtering', CORRESPONDANCES.nb_obs_by_tracks.mean())
261+
logger.info(
262+
"Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max()
263+
)
264+
logger.info(
265+
"The mean length is %d observations after filtering",
266+
CORRESPONDANCES.nb_obs_by_tracks.mean(),
267+
)
220268

221269
FINAL_EDDIES.write_file(path=SAVE_DIR, zarr_flag=ZARR)
222-
SHORT_TRACK.write_file(filename='%(path)s/%(sign_type)s_track_too_short.nc', path=SAVE_DIR, zarr_flag=ZARR)
270+
SHORT_TRACK.write_file(
271+
filename="%(path)s/%(sign_type)s_track_too_short.nc",
272+
path=SAVE_DIR,
273+
zarr_flag=ZARR,
274+
)
275+

0 commit comments

Comments
 (0)