Skip to content

Commit 38f7bf2

Browse files
committed
correct issues with datetime after merge
1 parent 3147088 commit 38f7bf2

File tree

3 files changed

+288
-12
lines changed

3 files changed

+288
-12
lines changed

src/py_eddy_tracker/appli/eddies.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
from re import compile as re_compile
1313

1414
from netCDF4 import Dataset
15+
from datetime import datetime
1516
from numpy import bincount, bytes_, empty, in1d, unique
17+
from numpy import dtype as npdtype
1618
from yaml import safe_load
1719

1820
from .. import EddyParser
@@ -241,18 +243,18 @@ def browse_dataset_in(
241243
len(filenames),
242244
dtype=[
243245
("filename", "S500"),
244-
("date", "datetime64[D]"),
246+
("datetime", npdtype('M8[us]')),
245247
],
246248
)
247249
dataset_list["filename"] = filenames
248250

249251
logger.info("%s grids available", dataset_list.shape[0])
250252
mode_attrs = False
251253
if "(" not in date_regexp:
252-
logger.debug("Attrs date : %s", date_regexp)
254+
logger.debug("Attrs datetime : %s", date_regexp)
253255
mode_attrs = date_regexp.strip().split(":")
254256
else:
255-
logger.debug("Pattern date : %s", date_regexp)
257+
logger.debug("Pattern datetime : %s", date_regexp)
256258

257259
for item in dataset_list:
258260
str_date = None
@@ -268,13 +270,14 @@ def browse_dataset_in(
268270
str_date = result.groups()[0]
269271

270272
if str_date is not None:
271-
item["date"] = datetime.strptime(str_date, date_model).date()
273+
item["datetime"] = datetime.strptime(str_date, date_model)
272274

273-
dataset_list.sort(order=["date", "filename"])
275+
dataset_list.sort(order=["datetime", "filename"])
274276

275-
steps = unique(dataset_list["date"][1:] - dataset_list["date"][:-1])
277+
steps = unique(dataset_list["datetime"][1:] - dataset_list["datetime"][:-1])
276278
if len(steps) > 1:
277-
raise Exception("Several days steps in grid dataset %s" % steps)
279+
print("Several days steps in grid dataset %s" % steps)
280+
#raise Exception("Several days steps in grid dataset %s" % steps)
278281

279282
if sub_sampling_step != 1:
280283
logger.info("Grid subsampling %d", sub_sampling_step)
@@ -283,11 +286,11 @@ def browse_dataset_in(
283286
if start_date is not None or end_date is not None:
284287
logger.info(
285288
"Available grid from %s to %s",
286-
dataset_list[0]["date"],
287-
dataset_list[-1]["date"],
289+
dataset_list[0]["datetime"],
290+
dataset_list[-1]["datetime"],
288291
)
289292
logger.info("Filtering grid by time %s, %s", start_date, end_date)
290-
mask = (dataset_list["date"] >= start_date) * (dataset_list["date"] <= end_date)
293+
mask = (dataset_list["datetime"] >= start_date) * (dataset_list["datetime"] <= end_date)
291294

292295
dataset_list = dataset_list[mask]
293296
return dataset_list
@@ -304,7 +307,7 @@ def track(
304307
correspondances_only=False,
305308
**kw_c,
306309
):
307-
kw = dict(date_regexp=".*_([0-9]*?).[nz].*", date_model="%Y%m%d")
310+
kw = dict(date_regexp=".*_([0-9]*?).[nz].*", date_model="%Y%m%d%H")
308311
if isinstance(pattern, list):
309312
kw.update(dict(data_dir=None, files_model=None, files=pattern))
310313
else:

src/py_eddy_tracker/dataset/grid.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,6 @@ def get_amplitude(
10391039
data,
10401040
anticyclonic_search=True,
10411041
level=None,
1042-
interval=None,
10431042
grid_height='sla',
10441043
mle=1,
10451044
**kwargs

src/scripts/EddyTracking

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Track eddy with Identification file produce with EddyIdentification
5+
"""
6+
from py_eddy_tracker import EddyParser
7+
from yaml import load as yaml_load
8+
from py_eddy_tracker.tracking import Correspondances
9+
from os.path import exists, dirname, basename
10+
from os import mkdir
11+
from re import compile as re_compile
12+
from os.path import join as join_path
13+
from numpy import bytes_, empty, unique
14+
from numpy import dtype as npdtype
15+
from netCDF4 import Dataset
16+
from datetime import datetime
17+
from glob import glob
18+
import logging
19+
20+
21+
logger = logging.getLogger("pet")
22+
23+
24+
def browse_dataset_in(
25+
data_dir,
26+
files_model,
27+
date_regexp,
28+
date_model,
29+
start_date=None,
30+
end_date=None,
31+
sub_sampling_step=1,
32+
files=None,
33+
):
34+
pattern_regexp = re_compile(".*/" + date_regexp)
35+
if files is not None:
36+
filenames = bytes_(files)
37+
else:
38+
full_path = join_path(data_dir, files_model)
39+
logger.info("Search files : %s", full_path)
40+
filenames = bytes_(glob(full_path))
41+
42+
dataset_list = empty(
43+
len(filenames), dtype=[("filename", "S500"), ("datetime", npdtype('M8[us]')),]
44+
)
45+
dataset_list["filename"] = filenames
46+
47+
logger.info("%s grids available", dataset_list.shape[0])
48+
mode_attrs = False
49+
if "(" not in date_regexp:
50+
logger.debug("Attrs datetime : %s", date_regexp)
51+
mode_attrs = date_regexp.strip().split(":")
52+
else:
53+
logger.debug("Pattern datetime : %s", date_regexp)
54+
55+
for item in dataset_list:
56+
str_date = None
57+
if mode_attrs:
58+
with Dataset(item["filename"].decode("utf-8")) as h:
59+
if len(mode_attrs) == 1:
60+
str_date = getattr(h, mode_attrs[0])
61+
else:
62+
str_date = getattr(h.variables[mode_attrs[0]], mode_attrs[1])
63+
else:
64+
result = pattern_regexp.match(str(item["filename"]))
65+
if result:
66+
str_date = result.groups()[0]
67+
68+
if str_date is not None:
69+
#try:
70+
# item["date"] = datetime.strptime(str_date, date_model).date()
71+
item["datetime"] = datetime.strptime(str_date, date_model)
72+
73+
dataset_list.sort(order=["datetime", "filename"])
74+
75+
steps = unique(dataset_list["datetime"][1:] - dataset_list["datetime"][:-1])
76+
if len(steps) > 1:
77+
print("Several days steps in grid dataset %s" % steps)
78+
#raise Exception("Several days steps in grid dataset %s" % steps)
79+
80+
if sub_sampling_step != 1:
81+
logger.info("Grid subsampling %d", sub_sampling_step)
82+
dataset_list = dataset_list[::sub_sampling_step]
83+
84+
if start_date is not None or end_date is not None:
85+
logger.info(
86+
"Available grid from %s to %s",
87+
dataset_list[0]["datetime"],
88+
dataset_list[-1]["datetime"],
89+
)
90+
logger.info("Filtering grid by time %s, %s", start_date, end_date)
91+
mask = (dataset_list["datetime"] >= start_date) * (dataset_list["datetime"] <= end_date)
92+
93+
dataset_list = dataset_list[mask]
94+
return dataset_list
95+
96+
97+
def usage():
98+
"""Usage
99+
"""
100+
# Run using:
101+
parser = EddyParser("Tool to use identification step to compute tracking")
102+
parser.add_argument("yaml_file", help="Yaml file to configure py-eddy-tracker")
103+
parser.add_argument("--correspondance_in", help="Filename of saved correspondance")
104+
parser.add_argument("--correspondance_out", help="Filename to save correspondance")
105+
parser.add_argument(
106+
"--save_correspondance_and_stop",
107+
action="store_true",
108+
help="Stop tracking after correspondance computation,"
109+
" merging can be done with EddyFinalTracking",
110+
)
111+
parser.add_argument(
112+
"--zarr", action="store_true", help="Output will be wrote in zarr"
113+
)
114+
parser.add_argument("--unraw", action="store_true", help="Load unraw data")
115+
parser.add_argument(
116+
"--blank_period",
117+
type=int,
118+
default=0,
119+
help="Nb of detection which will not use at the end of the period",
120+
)
121+
args = parser.parse_args()
122+
123+
# Read yaml configuration file
124+
with open(args.yaml_file, "r") as stream:
125+
config = yaml_load(stream)
126+
if args.correspondance_in is not None and not exists(args.correspondance_in):
127+
args.correspondance_in = None
128+
return (
129+
config,
130+
args.save_correspondance_and_stop,
131+
args.correspondance_in,
132+
args.correspondance_out,
133+
args.blank_period,
134+
args.zarr,
135+
not args.unraw,
136+
)
137+
138+
139+
if __name__ == "__main__":
140+
(
141+
CONFIG,
142+
SAVE_STOP,
143+
CORRESPONDANCES_IN,
144+
CORRESPONDANCES_OUT,
145+
BLANK_PERIOD,
146+
ZARR,
147+
RAW,
148+
) = usage()
149+
# Create output directory
150+
SAVE_DIR = CONFIG["PATHS"].get("SAVE_DIR", None)
151+
if SAVE_DIR is not None and not exists(SAVE_DIR):
152+
mkdir(SAVE_DIR)
153+
154+
YAML_CORRESPONDANCES_OUT = CONFIG["PATHS"].get("CORRESPONDANCES_OUT", None)
155+
if CORRESPONDANCES_IN is None:
156+
CORRESPONDANCES_IN = CONFIG["PATHS"].get("CORRESPONDANCES_IN", None)
157+
if CORRESPONDANCES_OUT is None:
158+
CORRESPONDANCES_OUT = YAML_CORRESPONDANCES_OUT
159+
if YAML_CORRESPONDANCES_OUT is None and CORRESPONDANCES_OUT is None:
160+
CORRESPONDANCES_OUT = "{path}/{sign_type}_correspondances.nc"
161+
162+
CLASS = None
163+
if "CLASS" in CONFIG:
164+
CLASSNAME = CONFIG["CLASS"]["CLASS"]
165+
CLASS = getattr(
166+
__import__(CONFIG["CLASS"]["MODULE"], globals(), locals(), CLASSNAME),
167+
CLASSNAME,
168+
)
169+
170+
NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int(CONFIG.get("VIRTUAL_LENGTH_MAX", 0))
171+
172+
if isinstance(CONFIG["PATHS"]["FILES_PATTERN"], list):
173+
DATASET_LIST = browse_dataset_in(
174+
data_dir=None,
175+
files_model=None,
176+
files=CONFIG["PATHS"]["FILES_PATTERN"],
177+
date_regexp=".*_([0-9]*?).[nz].*",
178+
date_model="%Y%m%d%H",
179+
)
180+
else:
181+
DATASET_LIST = browse_dataset_in(
182+
data_dir=dirname(CONFIG["PATHS"]["FILES_PATTERN"]),
183+
files_model=basename(CONFIG["PATHS"]["FILES_PATTERN"]),
184+
date_regexp=".*_([0-9]*?).[nz].*",
185+
date_model="%Y%m%d%H",
186+
)
187+
188+
if BLANK_PERIOD > 0:
189+
DATASET_LIST = DATASET_LIST[:-BLANK_PERIOD]
190+
logger.info("Last %d files will be pop", BLANK_PERIOD)
191+
192+
START_TIME = datetime.now()
193+
logger.info("Start tracking on %d files", len(DATASET_LIST))
194+
195+
NB_OBS_MIN = int(CONFIG.get("TRACK_DURATION_MIN", 14))
196+
if NB_OBS_MIN > len(DATASET_LIST):
197+
raise Exception(
198+
"Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)."
199+
% (len(DATASET_LIST), NB_OBS_MIN)
200+
)
201+
202+
CORRESPONDANCES = Correspondances(
203+
datasets=DATASET_LIST["filename"],
204+
virtual=NB_VIRTUAL_OBS_MAX_BY_SEGMENT,
205+
class_method=CLASS,
206+
previous_correspondance=CORRESPONDANCES_IN,
207+
)
208+
CORRESPONDANCES.track()
209+
logger.info("Track finish")
210+
211+
logger.info("Start merging")
212+
DATE_START, DATE_STOP = CORRESPONDANCES.period
213+
DICT_COMPLETION = dict(
214+
date_start=DATE_START,
215+
date_stop=DATE_STOP,
216+
date_prod=START_TIME,
217+
path=SAVE_DIR,
218+
sign_type=CORRESPONDANCES.current_obs.sign_legend,
219+
)
220+
221+
CORRESPONDANCES.save(CORRESPONDANCES_OUT, DICT_COMPLETION)
222+
if SAVE_STOP:
223+
exit()
224+
225+
# Merge correspondance, only do if we stop and store just after compute of correspondance
226+
CORRESPONDANCES.prepare_merging()
227+
228+
logger.info(
229+
"Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max()
230+
)
231+
logger.info(
232+
"The mean length is %d observations before filtering",
233+
CORRESPONDANCES.nb_obs_by_tracks.mean(),
234+
)
235+
236+
CORRESPONDANCES.get_unused_data(raw_data=RAW).write_file(
237+
path=SAVE_DIR, filename="%(path)s/%(sign_type)s_untracked.nc", zarr_flag=ZARR
238+
)
239+
240+
SHORT_CORRESPONDANCES = CORRESPONDANCES._copy()
241+
SHORT_CORRESPONDANCES.shorter_than(size_max=NB_OBS_MIN)
242+
243+
CORRESPONDANCES.longer_than(size_min=NB_OBS_MIN)
244+
245+
FINAL_EDDIES = CORRESPONDANCES.merge(raw_data=RAW)
246+
SHORT_TRACK = SHORT_CORRESPONDANCES.merge(raw_data=RAW)
247+
248+
# We flag obs
249+
if CORRESPONDANCES.virtual:
250+
FINAL_EDDIES["virtual"][:] = FINAL_EDDIES["time"] == 0
251+
FINAL_EDDIES.filled_by_interpolation(FINAL_EDDIES["virtual"] == 1)
252+
SHORT_TRACK["virtual"][:] = SHORT_TRACK["time"] == 0
253+
SHORT_TRACK.filled_by_interpolation(SHORT_TRACK["virtual"] == 1)
254+
255+
# Total running time
256+
FULL_TIME = datetime.now() - START_TIME
257+
logger.info("Mean duration by loop : %s", FULL_TIME / (len(DATASET_LIST) - 1))
258+
logger.info("Duration : %s", FULL_TIME)
259+
260+
logger.info(
261+
"Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max()
262+
)
263+
logger.info(
264+
"The mean length is %d observations after filtering",
265+
CORRESPONDANCES.nb_obs_by_tracks.mean(),
266+
)
267+
268+
FINAL_EDDIES.write_file(path=SAVE_DIR, zarr_flag=ZARR)
269+
SHORT_TRACK.write_file(
270+
filename="%(path)s/%(sign_type)s_track_too_short.nc",
271+
path=SAVE_DIR,
272+
zarr_flag=ZARR,
273+
)
274+

0 commit comments

Comments
 (0)