diff --git a/examples/datarmor/detection_gigatl1.py b/examples/datarmor/detection_gigatl1.py new file mode 100644 index 00000000..2fec2b85 --- /dev/null +++ b/examples/datarmor/detection_gigatl1.py @@ -0,0 +1,234 @@ +from datetime import datetime, timedelta +from py_eddy_tracker import start_logger +from py_eddy_tracker.dataset.grid import UnRegularGridDataset +from numpy import zeros, arange, float +from netCDF4 import Dataset + +# for plotting +from matplotlib import pyplot as plt + +import sys,os,shutil + + +class RomsDataset(UnRegularGridDataset): + + __slots__ = list() + + def init_speed_coef(self, uname, vname): + # xi_u and eta_v must be specified because this dimension are not use in lon/lat + print(self.indexs) + u = self.grid(uname, indexs=self.indexs) + v = self.grid(vname, indexs=self.indexs) + print('u.shape',u.shape) + + u = self.rho_2d(u.T).T + v = self.rho_2d(v) + self._speed_norm = (v ** 2 + u ** 2) ** .5 + + + @staticmethod + def rho_2d(x): + """Transformation to have u or v on same grid than h + """ + M, Lp = x.shape + new_x = zeros((M + 1, Lp)) + new_x[1:-1] = .5 * (x[:-1] + x[1:]) + new_x[0] = new_x[1] + new_x[-1] = new_x[-2] + return new_x + + @staticmethod + def psi2rho(var_psi): + """Transformation to have vrt on same grid than h + """ + M, L = var_psi.shape + Mp=M+1; Lp=L+1 + Mm=M-1; Lm=L-1 + var_rho = zeros((Mp, Lp)) + var_rho[1:M,1:L]=0.25*(var_psi[0:Mm,0:Lm]+var_psi[0:Mm,1:L]+var_psi[1:M,0:Lm]+var_psi[1:M,1:L]) + var_rho[0,:]=var_rho[1,:] + var_rho[Mp-1,:]=var_rho[M-1,:] + var_rho[:,0]=var_rho[:,1] + var_rho[:,Lp-1]=var_rho[:,L-1] + return var_rho + +########################## + +varname = sys.argv[1] + +print("varname id %s", varname) + +#varname = 'zeta' + +simul = 'gigatl6' + +########################## + +if __name__ == '__main__': + start_logger().setLevel('DEBUG') + + + + #### + # create case-specific folder + folder = varname + '/' + sys.argv[2] + '/' + + try: + os.mkdir(folder) + except OSError: + print ("Directory %s already exists" % folder) + + + # copy script in folder + shutil.copy('detection_gigatl6.py',folder) + + + + #### + # define simulation + + if 'gigatl1' in simul: + folder_simulation = '~/megatl/GIGATL1/GIGATL1_1h/SURF/final/' + elif 'gigatl6' in simul: + folder_simulation = './HIS/' + + # Pick a depth/isopycnal + + if varname == 'zeta': + s_rho = -1 + depth = 0 + elif varname == 'ow': + #isopycnal + filename = './iso/gigatl6_1h_isopycnal_section.01440.nc' + nc = Dataset(filename,'r') + isopycnals = nc.variables['isopycnal'][:] + nc.close() + + s_rho = 1 # + depth = isopycnals[s_rho] + + + + # Time loop + #for time in range(34800, 34801): + for time in range(1440, 6000): + + + # hourly data + #dtfile = 1 + # 12-hourly + dtfile = 12 + + tfile = 5*24//dtfile + + ########### + realyear_origin = datetime(2004,1,15) + date = realyear_origin + timedelta(days=float(time)*dtfile/24.) + + infiletime = time%tfile + filetime = time - time%tfile + + + # Using times: + #filename = './GIGATL6/gigatl6_1h_horizontal_section.' + '{0:05}'.format(filetime) + '.nc' + + if varname =='ow': + #filename = './GIGATL6/gigatl6_1h_horizontal_section.' + '{0:05}'.format(filetime) + '.nc' + filename = './iso/gigatl6_1h_isopycnal_section.' + '{0:05}'.format(filetime) + '.nc' + + + elif varname == 'zeta': + + + # or using dates + date1 = realyear_origin + timedelta(days=float(filetime)*dtfile/24.) + date2 = date1 + timedelta(days=float(4.5)) + + if 'gigatl1' in simul: + filedate = '{0:04}'.format(date1.year)+'-'+\ + '{0:02}'.format(date1.month) + '-'+\ + '{0:02}'.format(date1.day) + + filename = folder_simulation + 'gigatl1_surf.' + filedate + '.nc' + gridname = '~/megatl/GIGATL1/gigatl1_grd.nc' + + elif 'gigatl6' in simul: + filedate = '{0:04}'.format(date1.year)+'-'+\ + '{0:02}'.format(date1.month) + '-'+\ + '{0:02}'.format(date1.day)+ '-'+\ + '{0:04}'.format(date2.year)+'-'+\ + '{0:02}'.format(date2.month) + '-' +\ + '{0:02}'.format(date2.day) + + print(filedate) + + filename = './HIS/GIGATL6_1h_inst_surf_' + filedate + '.nc' + gridname = filename + + # Identification + if varname=='zeta' and 'gigatl6' in simul: + lon_name, lat_name = 'nav_lon_rho', 'nav_lat_rho' + elif varname=='ow': + lon_name, lat_name = 'lon', 'lat' + else: + lon_name, lat_name = 'lon_rho', 'lat_rho' + + # domain grid points: x1, x2, y1, y2 + x1, x2 = 400, 1200 + y1, y2 = 400, 1200 + + x1, x2 = 0, 1600 + y1, y2 = 0, 2000 + + h = RomsDataset(filename, lon_name, lat_name, gridname = gridname, + indexs=dict(time=infiletime,time_counter=infiletime, + eta_rho=slice(y1, y2), + xi_rho=slice(x1, x2), + eta_v=slice(y1, y2-1), + xi_u=slice(x1, x2-1), + y_rho=slice(y1, y2), + x_rho=slice(x1, x2), + y_v=slice(y1, y2-1), + y_u=slice(y1, y2), + x_u=slice(x1, x2-1), + x_v=slice(x1, x2), + s_rho=s_rho) + ) + + + + # Identification + if varname=='zeta': + z_min = -2 ; z_max = 1.5; step = 0.02 + elif varname=='ow': + # ow is multiplied by 1e10 + z_min = -1; z_max = -0.01; step = 0.01 + + + a, c = h.eddy_identification(varname, 'u', 'v', date, z_min = z_min, z_max = z_max, step = step, pixel_limit=(10, 2000), shape_error=40, force_height_unit='m',force_speed_unit='m/s',vorticity_name='vrt') + + + #### + + filename = 'gigatl6_1h_' + varname + '_'+ '{0:04}'.format(depth) + '_' + + with Dataset(date.strftime(folder + 'Anticyclonic_' + filename + '%Y%m%d%H.nc'), 'w') as h: + a.to_netcdf(h) + with Dataset(date.strftime(folder + 'Cyclonic_' + filename + '%Y%m%d%H.nc'), 'w') as h: + c.to_netcdf(h) + + # PLOT + + + fig = plt.figure(figsize=(15,7)) + ax = fig.add_axes([.03,.03,.94,.94]) + ax.set_title('Eddies detected -- Cyclonic(red) and Anticyclonic(blue)') + #ax.set_ylim(-75,75) + ax.set_xlim(250,360) + ax.set_aspect('equal') + a.display(ax, color='b', linewidth=.5) + c.display(ax, color='r', linewidth=.5) + ax.grid() + fig.savefig(folder + 'eddies_' + date.strftime( filename + '%Y%m%d%H') +'.png') + + diff --git a/examples/datarmor/detection_gigatl6.py b/examples/datarmor/detection_gigatl6.py new file mode 100644 index 00000000..54b108e5 --- /dev/null +++ b/examples/datarmor/detection_gigatl6.py @@ -0,0 +1,211 @@ +from datetime import datetime, timedelta +from py_eddy_tracker import start_logger +from py_eddy_tracker.dataset.grid import UnRegularGridDataset +from numpy import zeros, arange, float +from netCDF4 import Dataset + +# for plotting +from matplotlib import pyplot as plt + +import sys,os,shutil + + +class RomsDataset(UnRegularGridDataset): + + __slots__ = list() + + def init_speed_coef(self, uname, vname): + # xi_u and eta_v must be specified because this dimension are not use in lon/lat + print(self.indexs) + u = self.grid(uname, indexs=self.indexs) + v = self.grid(vname, indexs=self.indexs) + print('u.shape',u.shape) + + u = self.rho_2d(u.T).T + v = self.rho_2d(v) + self._speed_norm = (v ** 2 + u ** 2) ** .5 + + + @staticmethod + def rho_2d(x): + """Transformation to have u or v on same grid than h + """ + M, Lp = x.shape + new_x = zeros((M + 1, Lp)) + new_x[1:-1] = .5 * (x[:-1] + x[1:]) + new_x[0] = new_x[1] + new_x[-1] = new_x[-2] + return new_x + + @staticmethod + def psi2rho(var_psi): + """Transformation to have vrt on same grid than h + """ + M, L = var_psi.shape + Mp=M+1; Lp=L+1 + Mm=M-1; Lm=L-1 + var_rho = zeros((Mp, Lp)) + var_rho[1:M,1:L]=0.25*(var_psi[0:Mm,0:Lm]+var_psi[0:Mm,1:L]+var_psi[1:M,0:Lm]+var_psi[1:M,1:L]) + var_rho[0,:]=var_rho[1,:] + var_rho[Mp-1,:]=var_rho[M-1,:] + var_rho[:,0]=var_rho[:,1] + var_rho[:,Lp-1]=var_rho[:,L-1] + return var_rho + +########################## + +varname = sys.argv[1] + +print("varname id %s", varname) + +#varname = 'zeta' + +########################## + +if __name__ == '__main__': + start_logger().setLevel('DEBUG') + + + + #### + # create case-specific folder + folder = varname + '/' + sys.argv[2] + '/' + + try: + os.mkdir(folder) + except OSError: + print ("Directory %s already exists" % folder) + + + # copy script in folder + shutil.copy('detection_gigatl6.py',folder) + + + # Pick a depth/isopycnal + + if varname == 'zeta': + s_rho = -1 + depth = 0 + elif varname == 'ow': + #isopycnal + grid_name = './iso/gigatl6_1h_isopycnal_section.01440.nc' + nc = Dataset(grid_name,'r') + isopycnals = nc.variables['isopycnal'][:] + nc.close() + + s_rho = 1 # + depth = isopycnals[s_rho] + + + + # Time loop + #for time in range(34800, 34801): + for time in range(1440, 6000): + + + # hourly data + #dtfile = 1 + # 12-hourly + dtfile = 12 + + tfile = 5*24//dtfile + + ########### + realyear_origin = datetime(2004,1,15) + date = realyear_origin + timedelta(days=float(time)*dtfile/24.) + + infiletime = time%tfile + filetime = time - time%tfile + + + # Using times: + #grid_name = './GIGATL6/gigatl6_1h_horizontal_section.' + '{0:05}'.format(filetime) + '.nc' + + if varname =='ow': + #grid_name = './GIGATL6/gigatl6_1h_horizontal_section.' + '{0:05}'.format(filetime) + '.nc' + grid_name = './iso/gigatl6_1h_isopycnal_section.' + '{0:05}'.format(filetime) + '.nc' + + + elif varname == 'zeta': + + # or using dates + date1 = realyear_origin + timedelta(days=float(filetime)*dtfile/24.) + date2 = date1 + timedelta(days=float(4.5)) + + filedate = '{0:04}'.format(date1.year)+'-'+\ + '{0:02}'.format(date1.month) + '-'+\ + '{0:02}'.format(date1.day)+ '-'+\ + '{0:04}'.format(date2.year)+'-'+\ + '{0:02}'.format(date2.month) + '-' +\ + '{0:02}'.format(date2.day) + + print(filedate) + + grid_name = './HIS/GIGATL6_1h_inst_surf_' + filedate + '.nc' + + + # Identification + if varname=='zeta': + lon_name, lat_name = 'nav_lon_rho', 'nav_lat_rho' + elif varname=='ow': + lon_name, lat_name = 'lon', 'lat' + + # domain grid points: x1, x2, y1, y2 + x1, x2 = 400, 1200 + y1, y2 = 400, 1200 + + x1, x2 = 0, 1600 + y1, y2 = 0, 2000 + + h = RomsDataset(grid_name, lon_name, lat_name, + indexs=dict(time=infiletime,time_counter=infiletime, + eta_rho=slice(y1, y2), + xi_rho=slice(x1, x2), + eta_v=slice(y1, y2-1), + xi_u=slice(x1, x2-1), + y_rho=slice(y1, y2), + x_rho=slice(x1, x2), + y_v=slice(y1, y2-1), + y_u=slice(y1, y2), + x_u=slice(x1, x2-1), + x_v=slice(x1, x2), + s_rho=s_rho) + ) + + + + # Identification + if varname=='zeta': + z_min = -2 ; z_max = 1.5; step = 0.02 + elif varname=='ow': + # ow is multiplied by 1e10 + z_min = -1; z_max = -0.01; step = 0.01 + + + a, c = h.eddy_identification(varname, 'u', 'v', date, z_min = z_min, z_max = z_max, step = step, pixel_limit=(10, 2000), shape_error=40, force_height_unit='m',force_speed_unit='m/s',vorticity_name='vrt') + + + #### + + filename = 'gigatl6_1h_' + varname + '_'+ '{0:04}'.format(depth) + '_' + + with Dataset(date.strftime(folder + 'Anticyclonic_' + filename + '%Y%m%d%H.nc'), 'w') as h: + a.to_netcdf(h) + with Dataset(date.strftime(folder + 'Cyclonic_' + filename + '%Y%m%d%H.nc'), 'w') as h: + c.to_netcdf(h) + + # PLOT + + + fig = plt.figure(figsize=(15,7)) + ax = fig.add_axes([.03,.03,.94,.94]) + ax.set_title('Eddies detected -- Cyclonic(red) and Anticyclonic(blue)') + #ax.set_ylim(-75,75) + ax.set_xlim(250,360) + ax.set_aspect('equal') + a.display(ax, color='b', linewidth=.5) + c.display(ax, color='r', linewidth=.5) + ax.grid() + fig.savefig(folder + 'eddies_' + date.strftime( filename + '%Y%m%d%H') +'.png') + + diff --git a/examples/datarmor/run_detection_gigatl6.pbs b/examples/datarmor/run_detection_gigatl6.pbs new file mode 100755 index 00000000..b969254d --- /dev/null +++ b/examples/datarmor/run_detection_gigatl6.pbs @@ -0,0 +1,47 @@ +#!/bin/bash +#PBS -q omp +#PBS -l ncpus=2 +#PBS -l mem=65g +#PBS -l walltime=24:00:00 +# COMMENT +# Tina ODAKA 06.03.2017 +# example for using datarmor with 'queue omp' in a personalised way +# By default, if you use 'omp' it use 1 node with 2 openmp threads +# here we show you example of it using 10 openmp threads (specify ncpus=10) +# and it use 10g of memory in this example (specifly mem=10g) +# and you can run max for 30 minutes with walltime setting +# To optimised multi thread, you need to place a command 'omplace' before your 'exe' +# Here, in this example, we suppose that your code 'exe' is compiled with +# intel compiler. +# for the users who use gnu compiler, plz use omp.10g.gnu.pbs +# +# cd to the directory you submitted your job + +cd $PBS_O_WORKDIR +qstat -f $PBS_JOBID + +##################### + + + +source /usr/share/Modules/3.2.10/init/bash +module load anaconda-py3.6/4.7.12 + +##################### + +#export PYTHONPATH=$HOME/Python_Modules_p3 +#setenv PYTHONPATH $HOME/Python_Modules_p3 +source activate eddy-track + + + +##################### +run='run00' +variable='ow' + +mkdir $variable/$run + +python detection_gigatl6.py $variable $run >& $variable/$run/output_detection_$run + + + diff --git a/src/py_eddy_tracker/__init__.py b/src/py_eddy_tracker/__init__.py index 46946e77..c1ec4a38 100644 --- a/src/py_eddy_tracker/__init__.py +++ b/src/py_eddy_tracker/__init__.py @@ -113,7 +113,7 @@ def parse_args(self, *args, **kwargs): attr_name="time", nc_name="time", old_nc_name=["j1"], - nc_type="int32", + nc_type="float32", nc_dims=("obs",), nc_attr=dict( standard_name="time", diff --git a/src/py_eddy_tracker/appli/eddies.py b/src/py_eddy_tracker/appli/eddies.py index d5a727f9..e1b195ff 100644 --- a/src/py_eddy_tracker/appli/eddies.py +++ b/src/py_eddy_tracker/appli/eddies.py @@ -12,10 +12,12 @@ from re import compile as re_compile from netCDF4 import Dataset +from datetime import datetime from numpy import bincount, bytes_, empty, in1d, unique +from numpy import dtype as npdtype from yaml import safe_load -from .. import EddyParser +from .. import EddyParser, identify_time from ..observations.observation import EddiesObservations, reverse_index from ..observations.tracking import TrackEddiesObservations from ..tracking import Correspondances @@ -163,7 +165,12 @@ def eddies_tracking(): parser.add_argument( "--zarr", action="store_true", help="Output will be wrote in zarr" ) - parser.add_argument("--unraw", action="store_true", help="Load unraw data") + parser.add_argument( + "--unraw", + action="store_true", + help="Load unraw data, use only for netcdf." + "If unraw is active, netcdf is loaded without apply scalefactor and add_offset.", + ) parser.add_argument( "--blank_period", type=int, @@ -241,7 +248,7 @@ def browse_dataset_in( len(filenames), dtype=[ ("filename", "S500"), - ("date", "datetime64[D]"), + ("datetime", npdtype('M8[us]')), ], ) dataset_list["filename"] = filenames @@ -249,10 +256,10 @@ def browse_dataset_in( logger.info("%s grids available", dataset_list.shape[0]) mode_attrs = False if "(" not in date_regexp: - logger.debug("Attrs date : %s", date_regexp) + logger.debug("Attrs datetime : %s", date_regexp) mode_attrs = date_regexp.strip().split(":") else: - logger.debug("Pattern date : %s", date_regexp) + logger.debug("Pattern datetime : %s", date_regexp) for item in dataset_list: str_date = None @@ -268,13 +275,14 @@ def browse_dataset_in( str_date = result.groups()[0] if str_date is not None: - item["date"] = datetime.strptime(str_date, date_model).date() + item["datetime"] = datetime.strptime(str_date, date_model) - dataset_list.sort(order=["date", "filename"]) + dataset_list.sort(order=["datetime", "filename"]) - steps = unique(dataset_list["date"][1:] - dataset_list["date"][:-1]) + steps = unique(dataset_list["datetime"][1:] - dataset_list["datetime"][:-1]) if len(steps) > 1: - raise Exception("Several days steps in grid dataset %s" % steps) + print("Several days steps in grid dataset %s" % steps) + #raise Exception("Several days steps in grid dataset %s" % steps) if sub_sampling_step != 1: logger.info("Grid subsampling %d", sub_sampling_step) @@ -283,11 +291,11 @@ def browse_dataset_in( if start_date is not None or end_date is not None: logger.info( "Available grid from %s to %s", - dataset_list[0]["date"], - dataset_list[-1]["date"], + dataset_list[0]["datetime"], + dataset_list[-1]["datetime"], ) logger.info("Filtering grid by time %s, %s", start_date, end_date) - mask = (dataset_list["date"] >= start_date) * (dataset_list["date"] <= end_date) + mask = (dataset_list["datetime"] >= start_date) * (dataset_list["datetime"] <= end_date) dataset_list = dataset_list[mask] return dataset_list @@ -304,7 +312,7 @@ def track( correspondances_only=False, **kw_c, ): - kw = dict(date_regexp=".*_([0-9]*?).[nz].*", date_model="%Y%m%d") + kw = dict(date_regexp=".*_([0-9]*?).[nz].*", date_model="%Y%m%d%H") if isinstance(pattern, list): kw.update(dict(data_dir=None, files_model=None, files=pattern)) else: @@ -351,11 +359,13 @@ def track( short_c = c._copy() short_c.shorter_than(size_max=nb_obs_min) - c.longer_than(size_min=nb_obs_min) - - long_track = c.merge(raw_data=raw) short_track = short_c.merge(raw_data=raw) + if c.longer_than(size_min=nb_obs_min) is False: + long_track = short_track.empty_dataset() + else: + long_track = c.merge(raw_data=raw) + # We flag obs if c.virtual: long_track["virtual"][:] = long_track["time"] == 0 diff --git a/src/py_eddy_tracker/dataset/grid.py b/src/py_eddy_tracker/dataset/grid.py index 11227475..41a638f1 100644 --- a/src/py_eddy_tracker/dataset/grid.py +++ b/src/py_eddy_tracker/dataset/grid.py @@ -39,6 +39,9 @@ pi, radians, round_, + exp, + nanstd, + mean as np_mean, sin, sinc, where, @@ -252,6 +255,7 @@ class GridDataset(object): "y_dim", "coordinates", "filename", + "gridname", "dimensions", "indexs", "variables_description", @@ -267,12 +271,13 @@ class GridDataset(object): N = 1 def __init__( - self, filename, x_name, y_name, centered=None, indexs=None, unset=False + self, filename, x_name, y_name, gridname=None, centered=None, indexs=None, unset=False ): """ :param str filename: Filename to load :param str x_name: Name of longitude coordinates :param str y_name: Name of latitude coordinates + :param str gridname: Name of file containing lon,lat (filename is used if not defined) :param bool,None centered: Allow to know how coordinates could be used with pixel :param dict indexs: A dictionary that sets indexes to use for non-coordinate dimensions :param bool unset: Set to True to create an empty grid object without file @@ -289,6 +294,10 @@ def __init__( self.centered = centered self.contours = None self.filename = filename + if gridname is not None: + self.gridname = gridname + else: + self.gridname = filename self.coordinates = x_name, y_name self.vars = dict() self.indexs = dict() if indexs is None else indexs @@ -381,7 +390,8 @@ def load(self): Get coordinates and setup coordinates function """ x_name, y_name = self.coordinates - with Dataset(self.filename) as h: + with Dataset(self.gridname) as h: + self.x_dim = h.variables[x_name].dimensions self.y_dim = h.variables[y_name].dimensions @@ -591,6 +601,8 @@ def eddy_identification( uname, vname, date, + z_min=None, + z_max=None, step=0.005, shape_error=55, sampling=50, @@ -599,6 +611,10 @@ def eddy_identification( precision=None, force_height_unit=None, force_speed_unit=None, + vorticity_name='vrt', + mle=1, + filtering=None, + filtering_scale=30, **kwargs, ): """ @@ -609,6 +625,10 @@ def eddy_identification( :param str vname: Grid name of v speed component :param datetime.datetime date: Date to be stored in object to date data :param float,int step: Height between two layers in m + :param float z_min : minimum value foe eddy detection + :param float z_max : maximum value for eddy detection + :param float,int shape_error: Maximal error allow for outter contour in % + :param int sampling: Sampling of contour and speed profile :param float,int shape_error: Maximal error allowed for outermost contour in % :param int sampling: Number of points to store contours and speed profile :param str sampling_method: Method to resample, 'uniform' or 'visvalingam' @@ -638,7 +658,11 @@ def eddy_identification( self.units(grid_height) if force_height_unit is None else force_height_unit ) units = UnitRegistry() - in_h_unit = units.parse_expression(h_units) + if grid_height in ['ow']: + in_h_unit = None + else: + in_h_unit = units.parse_expression(h_units) + if in_h_unit is not None: factor, _ = in_h_unit.to("m").to_tuple() logger.info( @@ -649,36 +673,62 @@ def eddy_identification( if precision is not None: precision /= factor - # Get ssh grid - data = self.grid(grid_height).astype("f8") + # Get ssh or ow + if grid_height in ['ow']: + data = 1e10 * self.grid(grid_height).astype("f8") + else: + data = self.grid(grid_height).astype("f8") + + if grid_height in ['ow']: + # Get vorticity as an aditional field (to identify cyc/acyc) + vrt = self.grid(vorticity_name, indexs=self.indexs).astype('f8') + if vorticity_name=='vrt': vrt = self.psi2rho(vrt) + # In case of a reduced mask if len(data.mask.shape) == 0 and not data.mask: data.mask = zeros(data.shape, dtype="bool") - # we remove noisy data + data.mask[isnan(data)] = 1 + + if filtering is not None and grid_height in ['zeta']: + if filtering is 'highpass': + data = data - gaussian_filter(data, filtering_scale) + elif filtering is 'lowpass': + data[:] = gaussian_filter(data, filtering_scale) + + + # we remove noisy information if precision is not None: data = (data / precision).round() * precision - # Compute levels for ssh - z_min, z_max = data.min(), data.max() - d_z = z_max - z_min - data_tmp = data[~data.mask] - epsilon = 0.001 # in % - z_min_p, z_max_p = ( - percentile(data_tmp, epsilon), - percentile(data_tmp, 100 - epsilon), - ) - d_zp = z_max_p - z_min_p - if d_z / d_zp > 2: - logger.warning( - "Maybe some extrema are present zmin %f (m) and zmax %f (m) will be replace by %f and %f", - z_min, - z_max, - z_min_p, - z_max_p, + + # Compute levels for ssh/okubo + if z_min is None or z_max is None: + if grid_height in ['ow']: + #z_min, z_max = -0.3 * 1e-9, -0.2 * 1e-9 + z_min, z_max = -0.3 * nanstd(data),-0.2 * nanstd(data) + else: + z_min, z_max = data.min(), data.max() + d_z = z_max - z_min + data_tmp = data[~data.mask] + epsilon = 0.001 # in % + z_min_p, z_max_p = ( + percentile(data_tmp, epsilon), + percentile(data_tmp, 100 - epsilon), ) - z_min, z_max = z_min_p, z_max_p - - levels = arange(z_min - z_min % step, z_max - z_max % step + 2 * step, step) - + d_zp = z_max_p - z_min_p + if d_z / d_zp > 2: + logger.warning( + "Maybe some extrema are present zmin %f (m) and zmax %f (m) will be replace by %f and %f", + z_min, + z_max, + z_min_p, + z_max_p, + ) + z_min, z_max = z_min_p, z_max_p + + logger.warning('z_min, z_max, step are: %f, %f, %f.',z_min, z_max, step) #debug + levels = arange(z_min - z_min % step, z_max - z_max % step + step, step) + print('levels used are:',levels) + # Get x and y values x, y = self.x_c, self.y_c @@ -705,7 +755,11 @@ def eddy_identification( a_and_c = list() for anticyclonic_search in [True, False]: eddies = list() - iterator = 1 if anticyclonic_search else -1 + if grid_height in ['ow']: + iterator = -1 + else: + iterator = 1 if anticyclonic_search else -1 + # Loop over each collection for coll_ind, coll in enumerate(self.contours.iter(step=iterator)): @@ -747,11 +801,18 @@ def eddy_identification( contour.reject = 2 continue - # Test of the rotating sense: cyclone or anticyclone - if has_value( + # Test to know cyclone or anticyclone + if grid_height in ['ow']: + #acyc_not_cyc = vrt[i_x, i_y] <= 0 + if has_value( + vrt, i_x_in, i_y_in, 0, below=not anticyclonic_search + ): + continue + else: + if has_value( data, i_x_in, i_y_in, cvalues, below=anticyclonic_search ): - continue + continue # Test the number of pixels within the outermost contour # FIXME : Maybe limit max must be replaced with a maximum of surface @@ -772,7 +833,8 @@ def eddy_identification( data, anticyclonic_search=anticyclonic_search, level=self.contours.levels[corrected_coll_index], - interval=step, + interval=step, grid_height=grid_height, + mle=mle, **kwargs, ) # If we have a valid amplitude @@ -994,7 +1056,14 @@ def _gaussian_filter(data, sigma, mode="reflect"): @staticmethod def get_amplitude( - contour, contour_height, data, anticyclonic_search=True, level=None, **kwargs + contour, + contour_height, + data, + anticyclonic_search=True, + level=None, + grid_height='sla', + mle=1, + **kwargs ): # Instantiate Amplitude object amp = Amplitude( @@ -1004,12 +1073,21 @@ def get_amplitude( contour_height=contour_height, # All grid data=data, + # nb of allowed extrema + mle=mle, **kwargs, ) - if anticyclonic_search: - reset_centroid = amp.all_pixels_above_h0(level) + + if grid_height in ['ow']: + if anticyclonic_search: + reset_centroid = amp.all_pixels_below_h0(level,grid_height='ow') + else: + reset_centroid = amp.all_pixels_below_h0(level,grid_height='ow') else: - reset_centroid = amp.all_pixels_below_h0(level) + if anticyclonic_search: + reset_centroid = amp.all_pixels_above_h0(level) + else: + reset_centroid = amp.all_pixels_below_h0(level) return reset_centroid, amp @@ -2257,13 +2335,26 @@ def from_netcdf_cube(cls, filename, x_name, y_name, t_name, heigth=None): @classmethod def from_netcdf_list(cls, filenames, t, x_name, y_name, indexs=None, heigth=None): new = cls() - for i, t in enumerate(t): - d = RegularGridDataset(filenames[i], x_name, y_name, indexs=indexs) + for i, _t in enumerate(t): + filename = filenames[i] + logger.debug(f"load file {i:02d}/{len(t)} t={_t} : {filename}") + d = RegularGridDataset(filename, x_name, y_name, indexs=indexs) if heigth is not None: d.add_uv(heigth) - new.datasets.append((t, d)) + new.datasets.append((_t, d)) return new + def shift_files(self, t, filename, heigth=None, **rgd_kwargs): + """Add next file to the list and remove the oldest""" + + self.datasets = self.datasets[1:] + + d = RegularGridDataset(filename, **rgd_kwargs) + if heigth is not None: + d.add_uv(heigth) + self.datasets.append((t, d)) + logger.debug(f"shift and adding i={len(self.datasets)} t={t} : {filename}") + def interp(self, grid_name, t, lons, lats, method="bilinear"): """ Compute z over lons, lats @@ -2423,6 +2514,7 @@ def advect( else: mask_particule += isnan(x) + isnan(y) while True: + logger.debug(f"advect : t={t}") if (backward and t <= t1) or (not backward and t >= t1): t0, u0, v0, m0 = t1, u1, v1, m1 t1, d1 = generator.__next__() @@ -2449,25 +2541,21 @@ def advect( yield t, x, y def get_next_time_step(self, t_init): - first = True for i, (t, dataset) in enumerate(self.datasets): if t < t_init: continue - if first: - first = False - yield self.datasets[i - 1] + + logger.debug(f"i={i}, t={t}, dataset={dataset}") yield t, dataset def get_previous_time_step(self, t_init): - first = True i = len(self.datasets) for t, dataset in reversed(self.datasets): i -= 1 if t > t_init: continue - if first: - first = False - yield self.datasets[i + 1] + + logger.debug(f"i={i}, t={t}, dataset={dataset}") yield t, dataset @@ -2541,6 +2629,11 @@ def get_uv_quad(i0, j0, u, v, m, nb_x=0): i1, j1 = i0 + 1, j0 + 1 if nb_x != 0: i1 %= nb_x + i_max, j_max = m.shape + + if i1 >= i_max or j1 >= j_max: + return True, nan, nan, nan, nan, nan, nan, nan, nan + if m[i0, j0] or m[i0, j1] or m[i1, j0] or m[i1, j1]: return True, nan, nan, nan, nan, nan, nan, nan, nan # Extract value for u and v diff --git a/src/py_eddy_tracker/eddy_feature.py b/src/py_eddy_tracker/eddy_feature.py index 037beb35..5c4760b3 100644 --- a/src/py_eddy_tracker/eddy_feature.py +++ b/src/py_eddy_tracker/eddy_feature.py @@ -65,7 +65,7 @@ def __init__( :param float contour_height: :param array data: :param float interval: - :param int mle: maximum number of local maxima in contour + :param int mle: maximum number of local extrema in contour :param int nb_step_min: number of intervals to consider an eddy :param int nb_step_to_be_mle: number of intervals to be considered as an another maxima """ @@ -113,7 +113,7 @@ def within_amplitude_limits(self): """Need update""" return self.interval_min <= self.amplitude - def all_pixels_below_h0(self, level): + def all_pixels_below_h0(self, level, grid_height='sla'): """ Check CSS11 criterion 1: The SSH values of all of the pixels are below (above) a given SSH threshold for cyclonic (anticyclonic) @@ -148,8 +148,10 @@ def all_pixels_below_h0(self, level): (level - self.grid_extract.data[lmi_i, lmi_j]) >= self.interval_min_secondary ).sum() - if nb_real_extrema > self.mle: - return False + + if grid_height!='ow': + if nb_real_extrema > self.mle: + return False index = self.grid_extract.data[lmi_i, lmi_j].argmin() i, j = lmi_i[index], lmi_j[index] self.amplitude = abs(self.grid_extract.data[i, j] - self.h_0) diff --git a/src/scripts/EddyTracking b/src/scripts/EddyTracking new file mode 100644 index 00000000..ddaeae71 --- /dev/null +++ b/src/scripts/EddyTracking @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +Track eddy with Identification file produce with EddyIdentification +""" +from py_eddy_tracker import EddyParser +from yaml import load as yaml_load +from py_eddy_tracker.tracking import Correspondances +from os.path import exists, dirname, basename +from os import mkdir +from re import compile as re_compile +from os.path import join as join_path +from numpy import bytes_, empty, unique +from numpy import dtype as npdtype +from netCDF4 import Dataset +from datetime import datetime +from glob import glob +import logging + + +logger = logging.getLogger("pet") + + +def browse_dataset_in( + data_dir, + files_model, + date_regexp, + date_model, + start_date=None, + end_date=None, + sub_sampling_step=1, + files=None, +): + pattern_regexp = re_compile(".*/" + date_regexp) + if files is not None: + filenames = bytes_(files) + else: + full_path = join_path(data_dir, files_model) + logger.info("Search files : %s", full_path) + filenames = bytes_(glob(full_path)) + + dataset_list = empty( + len(filenames), dtype=[("filename", "S500"), ("datetime", npdtype('M8[us]')),] + ) + dataset_list["filename"] = filenames + + logger.info("%s grids available", dataset_list.shape[0]) + mode_attrs = False + if "(" not in date_regexp: + logger.debug("Attrs datetime : %s", date_regexp) + mode_attrs = date_regexp.strip().split(":") + else: + logger.debug("Pattern datetime : %s", date_regexp) + + for item in dataset_list: + str_date = None + if mode_attrs: + with Dataset(item["filename"].decode("utf-8")) as h: + if len(mode_attrs) == 1: + str_date = getattr(h, mode_attrs[0]) + else: + str_date = getattr(h.variables[mode_attrs[0]], mode_attrs[1]) + else: + result = pattern_regexp.match(str(item["filename"])) + if result: + str_date = result.groups()[0] + + if str_date is not None: + #try: + # item["date"] = datetime.strptime(str_date, date_model).date() + item["datetime"] = datetime.strptime(str_date, date_model) + + dataset_list.sort(order=["datetime", "filename"]) + + steps = unique(dataset_list["datetime"][1:] - dataset_list["datetime"][:-1]) + if len(steps) > 1: + print("Several days steps in grid dataset %s" % steps) + #raise Exception("Several days steps in grid dataset %s" % steps) + + if sub_sampling_step != 1: + logger.info("Grid subsampling %d", sub_sampling_step) + dataset_list = dataset_list[::sub_sampling_step] + + if start_date is not None or end_date is not None: + logger.info( + "Available grid from %s to %s", + dataset_list[0]["datetime"], + dataset_list[-1]["datetime"], + ) + logger.info("Filtering grid by time %s, %s", start_date, end_date) + mask = (dataset_list["datetime"] >= start_date) * (dataset_list["datetime"] <= end_date) + + dataset_list = dataset_list[mask] + return dataset_list + + +def usage(): + """Usage + """ + # Run using: + parser = EddyParser("Tool to use identification step to compute tracking") + parser.add_argument("yaml_file", help="Yaml file to configure py-eddy-tracker") + parser.add_argument("--correspondance_in", help="Filename of saved correspondance") + parser.add_argument("--correspondance_out", help="Filename to save correspondance") + parser.add_argument( + "--save_correspondance_and_stop", + action="store_true", + help="Stop tracking after correspondance computation," + " merging can be done with EddyFinalTracking", + ) + parser.add_argument( + "--zarr", action="store_true", help="Output will be wrote in zarr" + ) + parser.add_argument("--unraw", action="store_true", help="Load unraw data") + parser.add_argument( + "--blank_period", + type=int, + default=0, + help="Nb of detection which will not use at the end of the period", + ) + args = parser.parse_args() + + # Read yaml configuration file + with open(args.yaml_file, "r") as stream: + config = yaml_load(stream) + if args.correspondance_in is not None and not exists(args.correspondance_in): + args.correspondance_in = None + return ( + config, + args.save_correspondance_and_stop, + args.correspondance_in, + args.correspondance_out, + args.blank_period, + args.zarr, + not args.unraw, + ) + + +if __name__ == "__main__": + ( + CONFIG, + SAVE_STOP, + CORRESPONDANCES_IN, + CORRESPONDANCES_OUT, + BLANK_PERIOD, + ZARR, + RAW, + ) = usage() + # Create output directory + SAVE_DIR = CONFIG["PATHS"].get("SAVE_DIR", None) + if SAVE_DIR is not None and not exists(SAVE_DIR): + mkdir(SAVE_DIR) + + YAML_CORRESPONDANCES_OUT = CONFIG["PATHS"].get("CORRESPONDANCES_OUT", None) + if CORRESPONDANCES_IN is None: + CORRESPONDANCES_IN = CONFIG["PATHS"].get("CORRESPONDANCES_IN", None) + if CORRESPONDANCES_OUT is None: + CORRESPONDANCES_OUT = YAML_CORRESPONDANCES_OUT + if YAML_CORRESPONDANCES_OUT is None and CORRESPONDANCES_OUT is None: + CORRESPONDANCES_OUT = "{path}/{sign_type}_correspondances.nc" + + CLASS = None + if "CLASS" in CONFIG: + CLASSNAME = CONFIG["CLASS"]["CLASS"] + CLASS = getattr( + __import__(CONFIG["CLASS"]["MODULE"], globals(), locals(), CLASSNAME), + CLASSNAME, + ) + + NB_VIRTUAL_OBS_MAX_BY_SEGMENT = int(CONFIG.get("VIRTUAL_LENGTH_MAX", 0)) + + if isinstance(CONFIG["PATHS"]["FILES_PATTERN"], list): + DATASET_LIST = browse_dataset_in( + data_dir=None, + files_model=None, + files=CONFIG["PATHS"]["FILES_PATTERN"], + date_regexp=".*_([0-9]*?).[nz].*", + date_model="%Y%m%d%H", + ) + else: + DATASET_LIST = browse_dataset_in( + data_dir=dirname(CONFIG["PATHS"]["FILES_PATTERN"]), + files_model=basename(CONFIG["PATHS"]["FILES_PATTERN"]), + date_regexp=".*_([0-9]*?).[nz].*", + date_model="%Y%m%d%H", + ) + + if BLANK_PERIOD > 0: + DATASET_LIST = DATASET_LIST[:-BLANK_PERIOD] + logger.info("Last %d files will be pop", BLANK_PERIOD) + + START_TIME = datetime.now() + logger.info("Start tracking on %d files", len(DATASET_LIST)) + + NB_OBS_MIN = int(CONFIG.get("TRACK_DURATION_MIN", 14)) + if NB_OBS_MIN > len(DATASET_LIST): + raise Exception( + "Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)." + % (len(DATASET_LIST), NB_OBS_MIN) + ) + + CORRESPONDANCES = Correspondances( + datasets=DATASET_LIST["filename"], + virtual=NB_VIRTUAL_OBS_MAX_BY_SEGMENT, + class_method=CLASS, + previous_correspondance=CORRESPONDANCES_IN, + ) + CORRESPONDANCES.track() + logger.info("Track finish") + + logger.info("Start merging") + DATE_START, DATE_STOP = CORRESPONDANCES.period + DICT_COMPLETION = dict( + date_start=DATE_START, + date_stop=DATE_STOP, + date_prod=START_TIME, + path=SAVE_DIR, + sign_type=CORRESPONDANCES.current_obs.sign_legend, + ) + + CORRESPONDANCES.save(CORRESPONDANCES_OUT, DICT_COMPLETION) + if SAVE_STOP: + exit() + + # Merge correspondance, only do if we stop and store just after compute of correspondance + CORRESPONDANCES.prepare_merging() + + logger.info( + "Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max() + ) + logger.info( + "The mean length is %d observations before filtering", + CORRESPONDANCES.nb_obs_by_tracks.mean(), + ) + + CORRESPONDANCES.get_unused_data(raw_data=RAW).write_file( + path=SAVE_DIR, filename="%(path)s/%(sign_type)s_untracked.nc", zarr_flag=ZARR + ) + + SHORT_CORRESPONDANCES = CORRESPONDANCES._copy() + SHORT_CORRESPONDANCES.shorter_than(size_max=NB_OBS_MIN) + + CORRESPONDANCES.longer_than(size_min=NB_OBS_MIN) + + FINAL_EDDIES = CORRESPONDANCES.merge(raw_data=RAW) + SHORT_TRACK = SHORT_CORRESPONDANCES.merge(raw_data=RAW) + + # We flag obs + if CORRESPONDANCES.virtual: + FINAL_EDDIES["virtual"][:] = FINAL_EDDIES["time"] == 0 + FINAL_EDDIES.filled_by_interpolation(FINAL_EDDIES["virtual"] == 1) + SHORT_TRACK["virtual"][:] = SHORT_TRACK["time"] == 0 + SHORT_TRACK.filled_by_interpolation(SHORT_TRACK["virtual"] == 1) + + # Total running time + FULL_TIME = datetime.now() - START_TIME + logger.info("Mean duration by loop : %s", FULL_TIME / (len(DATASET_LIST) - 1)) + logger.info("Duration : %s", FULL_TIME) + + logger.info( + "Longer track saved have %d obs", CORRESPONDANCES.nb_obs_by_tracks.max() + ) + logger.info( + "The mean length is %d observations after filtering", + CORRESPONDANCES.nb_obs_by_tracks.mean(), + ) + + FINAL_EDDIES.write_file(path=SAVE_DIR, zarr_flag=ZARR) + SHORT_TRACK.write_file( + filename="%(path)s/%(sign_type)s_track_too_short.nc", + path=SAVE_DIR, + zarr_flag=ZARR, + ) +