@@ -8,9 +8,78 @@ from yaml import load as yaml_load
8
8
from py_eddy_tracker .tracking import Correspondances
9
9
from os .path import exists , dirname , basename
10
10
from os import mkdir
11
+ from re import compile as re_compile
12
+ from os .path import join as join_path
13
+ from numpy import bytes_ , empty , unique
14
+ from netCDF4 import Dataset
15
+ from datetime import datetime
16
+ from glob import glob
11
17
import logging
12
18
import datetime as dt
13
- from py_eddy_tracker .grid import browse_dataset_in
19
+
20
+
21
+ def browse_dataset_in (data_dir , files_model , date_regexp , date_model ,
22
+ start_date = None , end_date = None , sub_sampling_step = 1 ,
23
+ files = None ):
24
+ if files is not None :
25
+ pattern_regexp = re_compile ('.*/' + date_regexp )
26
+ filenames = bytes_ (files )
27
+ else :
28
+ pattern_regexp = re_compile ('.*/' + date_regexp )
29
+ full_path = join_path (data_dir , files_model )
30
+ logging .info ('Search files : %s' , full_path )
31
+ filenames = bytes_ (glob (full_path ))
32
+
33
+ dataset_list = empty (len (filenames ),
34
+ dtype = [('filename' , 'S500' ),
35
+ ('date' , 'datetime64[D]' ),
36
+ ])
37
+ dataset_list ['filename' ] = filenames
38
+
39
+ logging .info ('%s grids available' , dataset_list .shape [0 ])
40
+ mode_attrs = False
41
+ if '(' not in date_regexp :
42
+ logging .debug ('Attrs date : %s' , date_regexp )
43
+ mode_attrs = date_regexp .strip ().split (':' )
44
+ else :
45
+ logging .debug ('Pattern date : %s' , date_regexp )
46
+
47
+ for item in dataset_list :
48
+ str_date = None
49
+ if mode_attrs :
50
+ with Dataset (item ['filename' ].decode ("utf-8" )) as h :
51
+ if len (mode_attrs ) == 1 :
52
+ str_date = getattr (h , mode_attrs [0 ])
53
+ else :
54
+ str_date = getattr (h .variables [mode_attrs [0 ]], mode_attrs [1 ])
55
+ else :
56
+ result = pattern_regexp .match (str (item ['filename' ]))
57
+ if result :
58
+ str_date = result .groups ()[0 ]
59
+
60
+ if str_date is not None :
61
+ item ['date' ] = datetime .strptime (str_date , date_model ).date ()
62
+
63
+ dataset_list .sort (order = ['date' , 'filename' ])
64
+
65
+ steps = unique (dataset_list ['date' ][1 :] - dataset_list ['date' ][:- 1 ])
66
+ if len (steps ) > 1 :
67
+ raise Exception ('Several days steps in grid dataset %s' % steps )
68
+
69
+ if sub_sampling_step != 1 :
70
+ logging .info ('Grid subsampling %d' , sub_sampling_step )
71
+ dataset_list = dataset_list [::sub_sampling_step ]
72
+
73
+ if start_date is not None or end_date is not None :
74
+ logging .info ('Available grid from %s to %s' ,
75
+ dataset_list [0 ]['date' ],
76
+ dataset_list [- 1 ]['date' ])
77
+ logging .info ('Filtering grid by time %s, %s' , start_date , end_date )
78
+ mask = (dataset_list ['date' ] >= start_date ) * (
79
+ dataset_list ['date' ] <= end_date )
80
+
81
+ dataset_list = dataset_list [mask ]
82
+ return dataset_list
14
83
15
84
16
85
def usage ():
0 commit comments