diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 80def41c..87b5d870 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,9 +12,12 @@ Changed ^^^^^^^ Fixed ^^^^^ +- GridCollection get_next_time_step & get_previous_time_step needed more files to work in the dataset list. + The loop needed explicitly self.dataset[i+-1] even when i==0, therefore indice went out of range Added ^^^^^ + [3.4.0] - 2021-03-29 -------------------- Changed diff --git a/examples/16_network/pet_follow_particle.py b/examples/16_network/pet_follow_particle.py index e5451daa..a2e72d5a 100644 --- a/examples/16_network/pet_follow_particle.py +++ b/examples/16_network/pet_follow_particle.py @@ -16,7 +16,6 @@ from py_eddy_tracker.dataset.grid import GridCollection from py_eddy_tracker.observations.groups import particle_candidate from py_eddy_tracker.observations.network import NetworkObservations -from py_eddy_tracker.poly import group_obs start_logger().setLevel("ERROR") @@ -128,12 +127,6 @@ def update(frame): # ^^^^^^^^^^^^^^^^^^ step = 1 / 60.0 -x, y = meshgrid(arange(24, 36, step), arange(31, 36, step)) -x0, y0 = x.reshape(-1), y.reshape(-1) -# Pre-order to speed up -_, i = group_obs(x0, y0, 1, 360) -x0, y0 = x0[i], y0[i] - t_start, t_end = n.period dt = 14 @@ -141,12 +134,12 @@ def update(frame): # Forward run i_target_f, pct_target_f = -ones(shape, dtype="i4"), zeros(shape, dtype="i1") for t in range(t_start, t_end - dt): - particle_candidate(x0, y0, c, n, t, i_target_f, pct_target_f, n_days=dt) + particle_candidate(c, n, step, t, i_target_f, pct_target_f, n_days=dt) # Backward run i_target_b, pct_target_b = -ones(shape, dtype="i4"), zeros(shape, dtype="i1") for t in range(t_start + dt, t_end): - particle_candidate(x0, y0, c, n, t, i_target_b, pct_target_b, n_days=-dt) + particle_candidate(c, n, step, t, i_target_b, pct_target_b, n_days=-dt) # %% fig = plt.figure(figsize=(10, 10)) diff --git a/requirements.txt b/requirements.txt index 9539c555..097e786a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ matplotlib netCDF4 -numba +numba>=0.53 numpy opencv-python pint diff --git a/src/py_eddy_tracker/__init__.py b/src/py_eddy_tracker/__init__.py index d5db40f6..5cf0d59a 100644 --- a/src/py_eddy_tracker/__init__.py +++ b/src/py_eddy_tracker/__init__.py @@ -32,9 +32,7 @@ def start_logger(): - FORMAT_LOG = ( - "%(levelname)-8s %(asctime)s %(module)s.%(funcName)s :\n\t\t\t\t\t%(message)s" - ) + FORMAT_LOG = "%(levelname)-8s %(asctime)s %(module)s.%(funcName)s :\n\t%(message)s" logger = logging.getLogger("pet") if len(logger.handlers) == 0: # set up logging to CONSOLE diff --git a/src/py_eddy_tracker/dataset/grid.py b/src/py_eddy_tracker/dataset/grid.py index a2237c61..6c8e332f 100644 --- a/src/py_eddy_tracker/dataset/grid.py +++ b/src/py_eddy_tracker/dataset/grid.py @@ -2260,11 +2260,13 @@ def from_netcdf_cube(cls, filename, x_name, y_name, t_name, heigth=None): @classmethod def from_netcdf_list(cls, filenames, t, x_name, y_name, indexs=None, heigth=None): new = cls() - for i, t in enumerate(t): - d = RegularGridDataset(filenames[i], x_name, y_name, indexs=indexs) + for i, _t in enumerate(t): + filename = filenames[i] + logger.debug(f"load file {i:02d}/{len(t)} t={_t} : {filename}") + d = RegularGridDataset(filename, x_name, y_name, indexs=indexs) if heigth is not None: d.add_uv(heigth) - new.datasets.append((t, d)) + new.datasets.append((_t, d)) return new def shift_files(self, t, filename, heigth=None, **rgd_kwargs): @@ -2276,6 +2278,7 @@ def shift_files(self, t, filename, heigth=None, **rgd_kwargs): if heigth is not None: d.add_uv(heigth) self.datasets.append((t, d)) + logger.debug(f"shift and adding i={len(self.datasets)} t={t} : {filename}") def interp(self, grid_name, t, lons, lats, method="bilinear"): """ @@ -2436,6 +2439,7 @@ def advect( else: mask_particule += isnan(x) + isnan(y) while True: + logger.debug(f"advect : t={t}") if (backward and t <= t1) or (not backward and t >= t1): t0, u0, v0, m0 = t1, u1, v1, m1 t1, d1 = generator.__next__() @@ -2462,25 +2466,21 @@ def advect( yield t, x, y def get_next_time_step(self, t_init): - first = True for i, (t, dataset) in enumerate(self.datasets): if t < t_init: continue - if first: - first = False - yield self.datasets[i - 1] + + logger.debug(f"i={i}, t={t}, dataset={dataset}") yield t, dataset def get_previous_time_step(self, t_init): - first = True i = len(self.datasets) for t, dataset in reversed(self.datasets): i -= 1 if t > t_init: continue - if first: - first = False - yield self.datasets[i + 1] + + logger.debug(f"i={i}, t={t}, dataset={dataset}") yield t, dataset diff --git a/src/py_eddy_tracker/observations/groups.py b/src/py_eddy_tracker/observations/groups.py index e77c81fe..64a81a36 100644 --- a/src/py_eddy_tracker/observations/groups.py +++ b/src/py_eddy_tracker/observations/groups.py @@ -87,11 +87,9 @@ def advect(x, y, c, t0, n_days): return t, x, y -def particle_candidate(x, y, c, eddies, t_start, i_target, pct, **kwargs): +def particle_candidate(c, eddies, step_mesh, t_start, i_target, pct, **kwargs): """Select particles within eddies, advect them, return target observation and associated percentages - :param np.array(float) x: longitude of particles - :param np.array(float) y: latitude of particles :param `~py_eddy_tracker.dataset.grid.GridCollection` c: GridCollection with speed for particles :param GroupEddiesObservations eddies: GroupEddiesObservations considered :param int t_start: julian day of the advection @@ -102,24 +100,26 @@ def particle_candidate(x, y, c, eddies, t_start, i_target, pct, **kwargs): """ # Obs from initial time m_start = eddies.time == t_start - e = eddies.extract_with_mask(m_start) + # to be able to get global index translate_start = where(m_start)[0] - # Identify particle in eddies (only in core) - i_start = e.contains(x, y, intern=True) - m = i_start != -1 - x, y, i_start = x[m], y[m], i_start[m] - # Advect + x, y, i_start = e.create_particles(step_mesh) + + # Advection t_end, x, y = advect(x, y, c, t_start, **kwargs) + # eddies at last date m_end = eddies.time == t_end / 86400 e_end = eddies.extract_with_mask(m_end) + # to be able to get global index translate_end = where(m_end)[0] + # Id eddies for each alive particle (in core and extern) i_end = e_end.contains(x, y) + # compute matrix and fill target array get_matrix(i_start, i_end, translate_start, translate_end, i_target, pct) diff --git a/src/py_eddy_tracker/observations/network.py b/src/py_eddy_tracker/observations/network.py index a8b1ebc0..cb6d3986 100644 --- a/src/py_eddy_tracker/observations/network.py +++ b/src/py_eddy_tracker/observations/network.py @@ -3,11 +3,10 @@ Class to create network of observations """ import logging +import time from glob import glob -import zarr from numba import njit -from numba import types as nb_types from numpy import ( arange, array, @@ -16,7 +15,6 @@ concatenate, empty, in1d, - meshgrid, ones, uint16, uint32, @@ -27,7 +25,7 @@ from ..dataset.grid import GridCollection from ..generic import build_index, wrap_longitude -from ..poly import bbox_intersection, group_obs, vertice_overlap +from ..poly import bbox_intersection, vertice_overlap from .groups import GroupEddiesObservations, get_missing_indices, particle_candidate from .observation import EddiesObservations from .tracking import TrackEddiesObservations, track_loess_filter, track_median_filter @@ -1261,15 +1259,14 @@ def extract_segment(self, segments, absolute=False): mask[i] = False return self.extract_with_mask(mask) - def extract_with_period(self, period): + def get_mask_with_period(self, period): """ - Extract within a time period + obtain mask within a time period :param (int,int) period: two dates to define the period, must be specified from 1/1/1950 - :return: Return all eddy trajectories in period - :rtype: NetworkObservations + :return: mask where period is defined + :rtype: np.array(bool) - .. minigallery:: py_eddy_tracker.NetworkObservations.extract_with_period """ dataset_period = self.period p_min, p_max = period @@ -1283,7 +1280,57 @@ def extract_with_period(self, period): mask *= self.time <= p_max elif p_max < 0: mask *= self.time <= (dataset_period[1] + p_max) - return self.extract_with_mask(mask) + return mask + + def extract_with_period(self, period): + """ + Extract within a time period + + :param (int,int) period: two dates to define the period, must be specified from 1/1/1950 + :return: Return all eddy trajectories in period + :rtype: NetworkObservations + + .. minigallery:: py_eddy_tracker.NetworkObservations.extract_with_period + """ + + return self.extract_with_mask(self.get_mask_with_period(period)) + + def extract_light_with_mask(self, mask): + """extract data with mask, but only with variables used for coherence, aka self.array_variables + + :param mask: mask used to extract + :type mask: np.array(bool) + :return: new EddiesObservation with data wanted + :rtype: self + """ + + if isinstance(mask, slice): + nb_obs = mask.stop - mask.start + else: + nb_obs = mask.sum() + + # only time & contour_lon/lat_e/s + variables = ["time"] + self.array_variables + new = self.__class__( + size=nb_obs, + track_extra_variables=[], + track_array_variables=self.track_array_variables, + array_variables=self.array_variables, + only_variables=variables, + raw_data=self.raw_data, + ) + new.sign_type = self.sign_type + if nb_obs == 0: + logger.warning("Empty dataset will be created") + else: + logger.info( + f"{nb_obs} observations will be extracted ({nb_obs / self.shape[0]:.3%})" + ) + + for field in variables: + logger.debug("Copy of field %s ...", field) + new.obs[field] = self.obs[field][mask] + return new def extract_with_mask(self, mask): """ @@ -1302,7 +1349,7 @@ def extract_with_mask(self, mask): if nb_obs == 0: logger.warning("Empty dataset will be created") else: - logger.info( + logger.debug( f"{nb_obs} observations will be extracted ({nb_obs / self.shape[0]:.3%})" ) for field in self.obs.dtype.descr: @@ -1357,28 +1404,20 @@ def analysis_coherence( return network_clean, res - def segment_coherence( - self, - date_function, - uv_params, - advection_mode="both", - dt_advect=14, - step_mesh=1.0 / 50, - output_name=None, + def segment_coherence_backward( + self, date_function, uv_params, n_days=14, step_mesh=1.0 / 50, output_name=None, ): """ - Percentage of particules and their targets after forward or/and backward advection from a specific eddy. + Percentage of particules and their targets after backward advection from a specific eddy. :param callable date_function: python function, takes as param `int` (julian day) and return data filename associated to the date (see note) :param dict uv_params: dict of parameters used by :py:meth:`~py_eddy_tracker.dataset.grid.GridCollection.from_netcdf_list` - :param str advection_mode: "backward", "forward" or "both" - :param int dt_advect: days for advection + :param int n_days: days for advection :param float step_mesh: step for particule mesh in degrees - :param str output_name: if not None, name of file saved in zarr. Else, data will not be saved - :return: list of 2 or 4 array (depending if forward, backward or both) with segment matchs, and percents + :return: observations matchs, and percents .. note:: the param `date_function` should be something like : @@ -1389,126 +1428,104 @@ def date2file(julian_day): 1950, 1, 1 ) - return f"/tmp/dt_global_allsat_phy_l4_{date.strftime('%Y%m%d')}.nc" - + return f"/tmp/dt_global_{date.strftime('%Y%m%d')}.nc" """ - if advection_mode in ["both", "forward"]: - itf_final = -ones((self.obs.size, 2), dtype="i4") - ptf_final = zeros((self.obs.size, 2), dtype="i1") - - if advection_mode in ["both", "backward"]: - itb_final = -ones((self.obs.size, 2), dtype="i4") - ptb_final = zeros((self.obs.size, 2), dtype="i1") + itb_final = -ones((self.obs.size, 2), dtype="i4") + ptb_final = zeros((self.obs.size, 2), dtype="i1") - for slice_track, b0, _ in self.iter_on(self.track): - if b0 == 0: - continue + t_start, t_end = self.period - sub_networks = self.network(b0) + dates = arange(t_start, t_start + n_days + 1) + first_files = [date_function(x) for x in dates] - # find extremum to create a mesh of particles - lon = sub_networks.contour_lon_s - lonMin = lon.min() - 0.1 - lonMax = lon.max() + 0.1 + c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) + first = True + range_start = t_start + n_days + range_end = t_end + 1 - lat = sub_networks.contour_lat_s - latMin = lat.min() - 0.1 - latMax = lat.max() + 0.1 + for _t in range(t_start + n_days, t_end + 1): + _timestamp = time.time() + t_shift = _t - x0, y0 = meshgrid( - arange(lonMin, lonMax, step_mesh), arange(latMin, latMax, step_mesh) + # skip first shift, because already included + if first: + first = False + else: + # add next date to GridCollection and delete last date + c.shift_files(t_shift, date_function(int(t_shift)), **uv_params) + particle_candidate( + c, self, step_mesh, _t, itb_final, ptb_final, n_days=-n_days ) - x0, y0 = x0.reshape(-1), y0.reshape(-1) - _, i = group_obs(x0, y0, 1, 360) - x0, y0 = x0[i], y0[i] - - t_start, t_end = sub_networks.period - shape = (sub_networks.obs.size, 2) - - if advection_mode in ["both", "forward"]: - - # first dates to load. - dates = arange(t_start - 1, t_start + dt_advect + 2) - # files associated with dates - first_files = [date_function(x) for x in dates] - - c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) - - i_target_f = -ones(shape, dtype="i4") - pct_target_f = zeros(shape, dtype="i1") - - for _t in range(t_start, t_end - dt_advect + 1): - t_shift = _t + dt_advect + 2 - - # add next date to GridCollection and delete last date - c.shift_files(t_shift, date_function(int(t_shift)), **uv_params) - particle_candidate( - x0, - y0, - c, - sub_networks, - _t, - i_target_f, - pct_target_f, - n_days=dt_advect, - ) + logger.info(( + f"coherence {_t} / {range_end-1} ({(_t - range_start) / (range_end - range_start-1):.1%})" + f" : {time.time()-_timestamp:5.2f}s" + )) - itf_final[slice_track] = i_target_f - ptf_final[slice_track] = pct_target_f + return itb_final, ptb_final - if advection_mode in ["both", "backward"]: + def segment_coherence_forward( + self, date_function, uv_params, n_days=14, step_mesh=1.0 / 50, + ): - # first dates to load. - dates = arange(t_start - 1, t_start + dt_advect + 2) - # files associated with dates - first_files = [date_function(x) for x in dates] + """ + Percentage of particules and their targets after forward advection from a specific eddy. - c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) + :param callable date_function: python function, takes as param `int` (julian day) and return + data filename associated to the date (see note) + :param dict uv_params: dict of parameters used by + :py:meth:`~py_eddy_tracker.dataset.grid.GridCollection.from_netcdf_list` + :param int n_days: days for advection + :param float step_mesh: step for particule mesh in degrees + :return: observations matchs, and percents - i_target_b = -ones(shape, dtype="i4") - pct_target_b = zeros(shape, dtype="i1") + .. note:: the param `date_function` should be something like : - for _t in range(t_start + dt_advect + 1, t_end + 1): - t_shift = _t + 1 + .. code-block:: python - # add next date to GridCollection and delete last date - c.shift_files(t_shift, date_function(int(t_shift)), **uv_params) - particle_candidate( - x0, - y0, - c, - sub_networks, - _t, - i_target_b, - pct_target_b, - n_days=-dt_advect, + def date2file(julian_day): + date = datetime.timedelta(days=julian_day) + datetime.datetime( + 1950, 1, 1 ) - itb_final[slice_track] = i_target_b - ptb_final[slice_track] = pct_target_b + return f"/tmp/dt_global_{date.strftime('%Y%m%d')}.nc" + """ + + itf_final = -ones((self.obs.size, 2), dtype="i4") + ptf_final = zeros((self.obs.size, 2), dtype="i1") - if output_name is not None: - zg = zarr.open(output_name, "w") + t_start, t_end = self.period + # if begin is not None and begin > t_start: + # t_start = begin + # if end is not None and end < t_end: + # t_end = end - # zarr compression parameters - params_seg = dict() - params_pct = dict() + dates = arange(t_start, t_start + n_days + 1) + first_files = [date_function(x) for x in dates] - res = [] - if advection_mode in ["forward", "both"]: - res = res + [itf_final, ptf_final] - if output_name is not None: - zg.array("i_target_forward", itf_final, **params_seg) - zg.array("pct_target_forward", ptf_final, **params_pct) + c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) + first = True + range_start = t_start + range_end = t_end - n_days + 1 - if advection_mode in ["backward", "both"]: - res = res + [itb_final, ptb_final] - if output_name is not None: - zg.array("i_target_backward", itb_final, **params_seg) - zg.array("pct_target_backward", ptb_final, **params_pct) + for _t in range(range_start, range_end): + _timestamp = time.time() + t_shift = _t + n_days - return res + # skip first shift, because already included + if first: + first = False + else: + # add next date to GridCollection and delete last date + c.shift_files(t_shift, date_function(int(t_shift)), **uv_params) + particle_candidate( + c, self, step_mesh, _t, itf_final, ptf_final, n_days=n_days + ) + logger.info(( + f"coherence {_t} / {range_end-1} ({(_t - range_start) / (range_end - range_start-1):.1%})" + f" : {time.time()-_timestamp:5.2f}s" + )) + return itf_final, ptf_final class Network: diff --git a/src/py_eddy_tracker/observations/observation.py b/src/py_eddy_tracker/observations/observation.py index 0be29fe7..d969f800 100644 --- a/src/py_eddy_tracker/observations/observation.py +++ b/src/py_eddy_tracker/observations/observation.py @@ -69,6 +69,7 @@ poly_indexs, reduce_size, vertice_overlap, + winding_number_poly ) logger = logging.getLogger("pet") @@ -719,6 +720,7 @@ def load_file(cls, filename, **kwargs): zarr_file = filename_.endswith(end) else: zarr_file = False + logger.info(f"loading file '{filename}'") if zarr_file: return cls.load_from_zarr(filename, **kwargs) else: @@ -2273,6 +2275,19 @@ def nb_days(self): """ return self.period[1] - self.period[0] + 1 + def create_particles(self, step, intern=True): + """create particles only inside speed contour. Avoid creating too large numpy arrays, only to me masked + + :param step: step for particles + :type step: float + :param bool intern: If true use speed contour instead of effective contour + :return: lon, lat and indices of particles + :rtype: tuple(np.array) + """ + + xname, yname = self.intern(intern) + return _create_meshed_particles(self[xname], self[yname], step) + @njit(cache=True) def grid_count_(grid, i, j): @@ -2429,6 +2444,24 @@ def grid_stat(x_c, y_c, grid, x, y, result, circular=False, method="mean"): result[elt] = v_max +@njit(cache=True) +def _create_meshed_particles(lons, lats, step): + x_out, y_out, i_out = list(), list(), list() + for i, (lon, lat) in enumerate(zip(lons, lats)): + lon_min, lon_max = lon.min(), lon.max() + lat_min, lat_max = lat.min(), lat.max() + lon_min -= lon_min % step + lon_max -= lon_max % step - step * 2 + lat_min -= lat_min % step + lat_max -= lat_max % step - step * 2 + + for x in arange(lon_min, lon_max, step): + for y in arange(lat_min, lat_max, step): + if winding_number_poly(x, y, create_vertice(*reduce_size(lon, lat))): + x_out.append(x), y_out.append(y), i_out.append(i) + return array(x_out), array(y_out), array(i_out) + + class VirtualEddiesObservations(EddiesObservations): """Class to work with virtual obs"""