@@ -8,9 +8,78 @@ from yaml import load as yaml_load
88from py_eddy_tracker .tracking import Correspondances
99from os .path import exists , dirname , basename
1010from os import mkdir
11+ from re import compile as re_compile
12+ from os .path import join as join_path
13+ from numpy import bytes_ , empty , unique
14+ from netCDF4 import Dataset
15+ from datetime import datetime
16+ from glob import glob
1117import logging
1218import datetime as dt
13- from py_eddy_tracker .grid import browse_dataset_in
19+
20+
21+ def browse_dataset_in (data_dir , files_model , date_regexp , date_model ,
22+ start_date = None , end_date = None , sub_sampling_step = 1 ,
23+ files = None ):
24+ if files is not None :
25+ pattern_regexp = re_compile ('.*/' + date_regexp )
26+ filenames = bytes_ (files )
27+ else :
28+ pattern_regexp = re_compile ('.*/' + date_regexp )
29+ full_path = join_path (data_dir , files_model )
30+ logging .info ('Search files : %s' , full_path )
31+ filenames = bytes_ (glob (full_path ))
32+
33+ dataset_list = empty (len (filenames ),
34+ dtype = [('filename' , 'S500' ),
35+ ('date' , 'datetime64[D]' ),
36+ ])
37+ dataset_list ['filename' ] = filenames
38+
39+ logging .info ('%s grids available' , dataset_list .shape [0 ])
40+ mode_attrs = False
41+ if '(' not in date_regexp :
42+ logging .debug ('Attrs date : %s' , date_regexp )
43+ mode_attrs = date_regexp .strip ().split (':' )
44+ else :
45+ logging .debug ('Pattern date : %s' , date_regexp )
46+
47+ for item in dataset_list :
48+ str_date = None
49+ if mode_attrs :
50+ with Dataset (item ['filename' ].decode ("utf-8" )) as h :
51+ if len (mode_attrs ) == 1 :
52+ str_date = getattr (h , mode_attrs [0 ])
53+ else :
54+ str_date = getattr (h .variables [mode_attrs [0 ]], mode_attrs [1 ])
55+ else :
56+ result = pattern_regexp .match (str (item ['filename' ]))
57+ if result :
58+ str_date = result .groups ()[0 ]
59+
60+ if str_date is not None :
61+ item ['date' ] = datetime .strptime (str_date , date_model ).date ()
62+
63+ dataset_list .sort (order = ['date' , 'filename' ])
64+
65+ steps = unique (dataset_list ['date' ][1 :] - dataset_list ['date' ][:- 1 ])
66+ if len (steps ) > 1 :
67+ raise Exception ('Several days steps in grid dataset %s' % steps )
68+
69+ if sub_sampling_step != 1 :
70+ logging .info ('Grid subsampling %d' , sub_sampling_step )
71+ dataset_list = dataset_list [::sub_sampling_step ]
72+
73+ if start_date is not None or end_date is not None :
74+ logging .info ('Available grid from %s to %s' ,
75+ dataset_list [0 ]['date' ],
76+ dataset_list [- 1 ]['date' ])
77+ logging .info ('Filtering grid by time %s, %s' , start_date , end_date )
78+ mask = (dataset_list ['date' ] >= start_date ) * (
79+ dataset_list ['date' ] <= end_date )
80+
81+ dataset_list = dataset_list [mask ]
82+ return dataset_list
1483
1584
1685def usage ():
0 commit comments