diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c6ab4cac..110c6081 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,10 @@ Changed New identifications are produced with this type, old files could still be loaded. If you use old identifications for tracking use the `--unraw` option to unpack old times and store data with the new format. - Now amplitude is stored with .1 mm of precision (instead of 1 mm), same advice as for time. +- expose more parameters to users for bash tools build_network & divide_network +- add warning when loading a file created from a previous version of py-eddy-tracker. + + Fixed ^^^^^ @@ -22,6 +26,9 @@ Fixed - Fix bug in convolution(filter), lowest rows was replace by zeros in convolution computation. Important impact for tiny kernel - Fix method of sampling before contour fitting +- Fix bug when loading dataset in zarr format, not all variables were correctly loaded +- Fix bug when zarr dataset has same size for number of observations and contour size +- Fix bug when tracking, previous_virtual_obs was not always loaded Added ^^^^^ diff --git a/examples/02_eddy_identification/pet_eddy_detection_ACC.py b/examples/02_eddy_identification/pet_eddy_detection_ACC.py index c799a45e..e6c5e381 100644 --- a/examples/02_eddy_identification/pet_eddy_detection_ACC.py +++ b/examples/02_eddy_identification/pet_eddy_detection_ACC.py @@ -65,8 +65,7 @@ def set_fancy_labels(fig, ticklabelsize=14, labelsize=14, labelweight="semibold" y_name="latitude", # Manual area subset indexs=dict( - latitude=slice(100 - margin, 220 + margin), - longitude=slice(0, 230 + margin), + latitude=slice(100 - margin, 220 + margin), longitude=slice(0, 230 + margin), ), ) g_raw = RegularGridDataset(**kw_data) @@ -188,16 +187,10 @@ def set_fancy_labels(fig, ticklabelsize=14, labelsize=14, labelweight="semibold" ax.set_ylabel("With filter") ax.plot( - a_[field][i_a] * factor, - a[field][j_a] * factor, - "r.", - label="Anticyclonic", + a_[field][i_a] * factor, a[field][j_a] * factor, "r.", label="Anticyclonic", ) ax.plot( - c_[field][i_c] * factor, - c[field][j_c] * factor, - "b.", - label="Cyclonic", + c_[field][i_c] * factor, c[field][j_c] * factor, "b.", label="Cyclonic", ) ax.set_aspect("equal"), ax.grid() ax.plot((0, 1000), (0, 1000), "g") diff --git a/examples/16_network/pet_replay_segmentation.py b/examples/16_network/pet_replay_segmentation.py index 757854d5..d6b4568b 100644 --- a/examples/16_network/pet_replay_segmentation.py +++ b/examples/16_network/pet_replay_segmentation.py @@ -149,13 +149,7 @@ def get_obs(dataset): n_.median_filter(15, "time", "latitude") kw["s"] = (n_.radius_e * 1e-3) ** 2 / 30 ** 2 * 20 m = n_.scatter_timeline( - ax, - "shape_error_e", - vmin=14, - vmax=70, - **kw, - yfield="lon", - method="all", + ax, "shape_error_e", vmin=14, vmax=70, **kw, yfield="lon", method="all", ) ax.set_ylabel("Longitude") cb = update_axes(ax, m["scatter"]) diff --git a/src/py_eddy_tracker/__init__.py b/src/py_eddy_tracker/__init__.py index f3ecec84..275bb795 100644 --- a/src/py_eddy_tracker/__init__.py +++ b/src/py_eddy_tracker/__init__.py @@ -422,20 +422,14 @@ def identify_time(str_date): nc_name="previous_cost", nc_type="float32", nc_dims=("obs",), - nc_attr=dict( - long_name="Previous cost for previous observation", - comment="", - ), + nc_attr=dict(long_name="Previous cost for previous observation", comment="",), ), next_cost=dict( attr_name=None, nc_name="next_cost", nc_type="float32", nc_dims=("obs",), - nc_attr=dict( - long_name="Next cost for next observation", - comment="", - ), + nc_attr=dict(long_name="Next cost for next observation", comment="",), ), n=dict( attr_name=None, @@ -646,8 +640,7 @@ def identify_time(str_date): nc_type="f4", nc_dims=("obs",), nc_attr=dict( - long_name="Log base 10 background chlorophyll", - units="Log(Chl/[mg/m^3])", + long_name="Log base 10 background chlorophyll", units="Log(Chl/[mg/m^3])", ), ), year=dict( diff --git a/src/py_eddy_tracker/appli/eddies.py b/src/py_eddy_tracker/appli/eddies.py index df4e7d43..4809fddf 100644 --- a/src/py_eddy_tracker/appli/eddies.py +++ b/src/py_eddy_tracker/appli/eddies.py @@ -243,8 +243,7 @@ def browse_dataset_in( filenames = bytes_(glob(full_path)) dataset_list = empty( - len(filenames), - dtype=[("filename", "S500"), ("date", "datetime64[s]")], + len(filenames), dtype=[("filename", "S500"), ("date", "datetime64[s]")], ) dataset_list["filename"] = filenames @@ -372,8 +371,7 @@ def track( logger.info("Longer track saved have %d obs", c.nb_obs_by_tracks.max()) logger.info( - "The mean length is %d observations for long track", - c.nb_obs_by_tracks.mean(), + "The mean length is %d observations for long track", c.nb_obs_by_tracks.mean(), ) long_track.write_file(**kw_write) @@ -383,14 +381,7 @@ def track( def get_group( - dataset1, - dataset2, - index1, - index2, - score, - invalid=2, - low=10, - high=60, + dataset1, dataset2, index1, index2, score, invalid=2, low=10, high=60, ): group1, group2 = dict(), dict() m_valid = (score * 100) >= invalid @@ -499,8 +490,7 @@ def get_values(v, dataset): ] labels = dict( - high=f"{high:0.0f} <= high", - low=f"{invalid:0.0f} <= low < {low:0.0f}", + high=f"{high:0.0f} <= high", low=f"{invalid:0.0f} <= low < {low:0.0f}", ) keys = [labels.get(key, key) for key in list(gr_ref.values())[0].keys()] diff --git a/src/py_eddy_tracker/appli/network.py b/src/py_eddy_tracker/appli/network.py index 5c4cdcaf..e9baa7be 100644 --- a/src/py_eddy_tracker/appli/network.py +++ b/src/py_eddy_tracker/appli/network.py @@ -21,6 +21,20 @@ def build_network(): parser.add_argument( "--window", "-w", type=int, help="Half time window to search eddy", default=1 ) + + parser.add_argument( + "--min-overlap", + "-p", + type=float, + help="minimum overlap area to associate observations", + default=0.2, + ) + parser.add_argument( + "--minimal-area", + action="store_true", + help="If True, use intersection/little polygon, else intersection/union", + ) + parser.contour_intern_arg() parser.memory_arg() @@ -32,7 +46,9 @@ def build_network(): intern=args.intern, memory=args.memory, ) - group = n.group_observations(minimal_area=True) + group = n.group_observations( + min_overlap=args.min_overlap, minimal_area=args.minimal_area + ) n.build_dataset(group).write_file(filename=args.out) @@ -44,6 +60,18 @@ def divide_network(): parser.add_argument( "--window", "-w", type=int, help="Half time window to search eddy", default=1 ) + parser.add_argument( + "--min-overlap", + "-p", + type=float, + help="minimum overlap area to associate observations", + default=0.2, + ) + parser.add_argument( + "--minimal-area", + action="store_true", + help="If True, use intersection/little polygon, else intersection/union", + ) args = parser.parse_args() contour_name = TrackEddiesObservations.intern(args.intern, public_label=True) e = TrackEddiesObservations.load_file( @@ -52,7 +80,12 @@ def divide_network(): ) n = NetworkObservations.from_split_network( TrackEddiesObservations.load_file(args.input, raw_data=True), - e.split_network(intern=args.intern, window=args.window), + e.split_network( + intern=args.intern, + window=args.window, + min_overlap=args.min_overlap, + minimal_area=args.minimal_area, + ), ) n.write_file(filename=args.out) @@ -76,9 +109,7 @@ def subset_network(): help="Remove short dead end, first is for minimal obs number and second for minimal segment time to keep", ) parser.add_argument( - "--remove_trash", - action="store_true", - help="Remove trash (network id == 0)", + "--remove_trash", action="store_true", help="Remove trash (network id == 0)", ) parser.add_argument( "-p", diff --git a/src/py_eddy_tracker/dataset/grid.py b/src/py_eddy_tracker/dataset/grid.py index 5b884b68..091d2016 100644 --- a/src/py_eddy_tracker/dataset/grid.py +++ b/src/py_eddy_tracker/dataset/grid.py @@ -858,13 +858,11 @@ def eddy_identification( xy_i = uniform_resample( inner_contour.lon, inner_contour.lat, - num_fac=presampling_multiplier - ) - xy_e = uniform_resample( - contour.lon, - contour.lat, num_fac=presampling_multiplier, ) + xy_e = uniform_resample( + contour.lon, contour.lat, num_fac=presampling_multiplier, + ) xy_s = uniform_resample( speed_contour.lon, speed_contour.lat, diff --git a/src/py_eddy_tracker/eddy_feature.py b/src/py_eddy_tracker/eddy_feature.py index d2616957..3640b306 100644 --- a/src/py_eddy_tracker/eddy_feature.py +++ b/src/py_eddy_tracker/eddy_feature.py @@ -433,8 +433,8 @@ def __init__(self, x, y, z, levels, wrap_x=False, keep_unclose=False): closed_contours = 0 # Count level and contour for i, collection in enumerate(self.contours.collections): - collection.get_nearest_path_bbox_contain_pt = ( - lambda x, y, i=i: self.get_index_nearest_path_bbox_contain_pt(i, x, y) + collection.get_nearest_path_bbox_contain_pt = lambda x, y, i=i: self.get_index_nearest_path_bbox_contain_pt( + i, x, y ) nb_level += 1 diff --git a/src/py_eddy_tracker/observations/network.py b/src/py_eddy_tracker/observations/network.py index 8f592056..1c078bf8 100644 --- a/src/py_eddy_tracker/observations/network.py +++ b/src/py_eddy_tracker/observations/network.py @@ -1301,7 +1301,7 @@ def extract_with_period(self, period): return self.extract_with_mask(self.get_mask_with_period(period)) - def extract_light_with_mask(self, mask): + def extract_light_with_mask(self, mask, track_extra_variables=[]): """extract data with mask, but only with variables used for coherence, aka self.array_variables :param mask: mask used to extract @@ -1319,7 +1319,7 @@ def extract_light_with_mask(self, mask): variables = ["time"] + self.array_variables new = self.__class__( size=nb_obs, - track_extra_variables=[], + track_extra_variables=track_extra_variables, track_array_variables=self.track_array_variables, array_variables=self.array_variables, only_variables=variables, @@ -1333,9 +1333,22 @@ def extract_light_with_mask(self, mask): f"{nb_obs} observations will be extracted ({nb_obs / self.shape[0]:.3%})" ) - for field in variables: + for field in variables + track_extra_variables: logger.debug("Copy of field %s ...", field) new.obs[field] = self.obs[field][mask] + + if ( + "previous_obs" in track_extra_variables + and "next_obs" in track_extra_variables + ): + # n & p must be re-index + n, p = self.next_obs[mask], self.previous_obs[mask] + # we add 2 for -1 index return index -1 + translate = -ones(len(self) + 1, dtype="i4") + translate[:-1][mask] = arange(nb_obs) + new.next_obs[:] = translate[n] + new.previous_obs[:] = translate[p] + return new def extract_with_mask(self, mask): @@ -1495,7 +1508,8 @@ def date2file(julian_day): t_start, t_end = int(self.period[0]), int(self.period[1]) - dates = arange(t_start, t_start + n_days + 1) + # dates = arange(t_start, t_start + n_days + 1) + dates = arange(t_start, min(t_start + n_days + 1, t_end + 1)) first_files = [date_function(x) for x in dates] c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) @@ -1570,12 +1584,8 @@ def date2file(julian_day): ptf_final = zeros((self.obs.size, 2), dtype="i1") t_start, t_end = int(self.period[0]), int(self.period[1]) - # if begin is not None and begin > t_start: - # t_start = begin - # if end is not None and end < t_end: - # t_end = end - dates = arange(t_start, t_start + n_days + 1) + dates = arange(t_start, min(t_start + n_days + 1, t_end + 1)) first_files = [date_function(x) for x in dates] c = GridCollection.from_netcdf_list(first_files, dates, **uv_params) @@ -1699,7 +1709,23 @@ def group_translator(nb, duos): apply_replace(translate, gr_i, gr_j) return translate - def group_observations(self, **kwargs): + def group_observations(self, min_overlap=0.2, minimal_area=False): + """Store every interaction between identifications + + Parameters + ---------- + minimal_area : bool, optional + If True, function will compute intersection/little polygon, else intersection/union, by default False + + min_overlap : float, optional + minimum overlap area to associate observations, by default 0.2 + + Returns + ------- + TrackEddiesObservations + netcdf with interactions + """ + results, nb_obs = list(), list() # To display print only in INFO display_iteration = logger.getEffectiveLevel() == logging.INFO @@ -1713,7 +1739,12 @@ def group_observations(self, **kwargs): for j in range(i + 1, min(self.window + i + 1, self.nb_input)): xj, yj = self.buffer.load_contour(self.filenames[j]) ii, ij = bbox_intersection(xi, yi, xj, yj) - m = vertice_overlap(xi[ii], yi[ii], xj[ij], yj[ij], **kwargs) > 0.2 + m = ( + vertice_overlap( + xi[ii], yi[ii], xj[ij], yj[ij], minimal_area=minimal_area + ) + > min_overlap + ) results.append((i, j, ii[m], ij[m])) if display_iteration: print() diff --git a/src/py_eddy_tracker/observations/observation.py b/src/py_eddy_tracker/observations/observation.py index 56e0f67d..3543caa7 100644 --- a/src/py_eddy_tracker/observations/observation.py +++ b/src/py_eddy_tracker/observations/observation.py @@ -8,6 +8,7 @@ from tarfile import ExFileObject from tokenize import TokenError +import packaging import zarr from matplotlib.cm import get_cmap from matplotlib.collections import PolyCollection @@ -74,6 +75,29 @@ logger = logging.getLogger("pet") +# keep only major and minor version number +_software_version_reduced = packaging.version.Version( + "{v.major}.{v.minor}".format(v=packaging.version.parse(__version__)) +) + + +def _check_versions(version): + """Check if version of py_eddy_tracker used to create the file is compatible with software version + + if not, warn user with both versions + + :param version: string version of software used to create the file. If None, version was not provided + :type version: str, None + """ + + file_version = packaging.version.parse(version) if version is not None else None + if file_version is None or file_version < _software_version_reduced: + logger.warning( + "File was created with py-eddy-tracker version '%s' but software version is '%s'", + file_version, + _software_version_reduced, + ) + @njit(cache=True, fastmath=True) def shifted_ellipsoid_degrees_mask2(lon0, lat0, lon1, lat1, minor=1.5, major=1.5): @@ -687,10 +711,13 @@ def zarr_dimension(filename): h = filename else: h = zarr.open(filename) + dims = list() for varname in h: - dims.extend(list(getattr(h, varname).shape)) - return set(dims) + shape = getattr(h, varname).shape + if len(shape) > len(dims): + dims = shape + return dims @classmethod def load_file(cls, filename, **kwargs): @@ -702,11 +729,7 @@ def load_file(cls, filename, **kwargs): .. code-block:: python kwargs_latlon_300 = dict( - include_vars=[ - "longitude", - "latitude", - ], - indexs=dict(obs=slice(0, 300)), + include_vars=["longitude", "latitude",], indexs=dict(obs=slice(0, 300)), ) small_dataset = TrackEddiesObservations.load_file( filename, **kwargs_latlon_300 @@ -754,20 +777,19 @@ def load_from_zarr( :return type: class """ # FIXME - array_dim = -1 if isinstance(filename, zarr.storage.MutableMapping): h_zarr = filename else: if not isinstance(filename, str): filename = filename.astype(str) h_zarr = zarr.open(filename) + + _check_versions(h_zarr.attrs.get("framework_version", None)) var_list = cls.build_var_list(list(h_zarr.keys()), remove_vars, include_vars) nb_obs = getattr(h_zarr, var_list[0]).shape[0] - dims = list(cls.zarr_dimension(filename)) - if len(dims) == 2 and nb_obs in dims: - # FIXME must be investigated, in zarr no dimensions name (or could be add in attr) - array_dim = dims[1] if nb_obs == dims[0] else dims[0] + track_array_variables = h_zarr.attrs["track_array_variables"] + if indexs is not None and "obs" in indexs: sl = indexs["obs"] sl = slice(sl.start, min(sl.stop, nb_obs)) @@ -781,28 +803,33 @@ def load_from_zarr( logger.debug("%d observations will be load", nb_obs) kwargs = dict() - if array_dim in dims: - kwargs["track_array_variables"] = array_dim - kwargs["array_variables"] = list() - for variable in var_list: - if array_dim in h_zarr[variable].shape: - var_inv = VAR_DESCR_inv[variable] - kwargs["array_variables"].append(var_inv) - array_variables = kwargs.get("array_variables", list()) - kwargs["track_extra_variables"] = [] + kwargs["track_array_variables"] = h_zarr.attrs.get( + "track_array_variables", track_array_variables + ) + + array_variables = list() + for variable in var_list: + if len(h_zarr[variable].shape) > 1: + var_inv = VAR_DESCR_inv[variable] + array_variables.append(var_inv) + kwargs["array_variables"] = array_variables + track_extra_variables = [] + for variable in var_list: var_inv = VAR_DESCR_inv[variable] if var_inv not in cls.ELEMENTS and var_inv not in array_variables: - kwargs["track_extra_variables"].append(var_inv) + track_extra_variables.append(var_inv) + kwargs["track_extra_variables"] = track_extra_variables kwargs["raw_data"] = raw_data kwargs["only_variables"] = ( None if include_vars is None else [VAR_DESCR_inv[i] for i in include_vars] ) kwargs.update(class_kwargs) eddies = cls(size=nb_obs, **kwargs) - for variable in var_list: + + for i_var, variable in enumerate(var_list): var_inv = VAR_DESCR_inv[variable] - logger.debug("%s will be loaded", variable) + logger.debug("%s will be loaded (%d/%d)", variable, i_var, len(var_list)) # find unit factor input_unit = h_zarr[variable].attrs.get("unit", None) if input_unit is None: @@ -858,6 +885,7 @@ def copy_data_to_zarr( i_start = 0 if i_stop is None: i_stop = handler_zarr.shape[0] + for i in range(i_start, i_stop, buffer_size): sl_in = slice(i, min(i + buffer_size, i_stop)) data = handler_zarr[sl_in] @@ -868,6 +896,7 @@ def copy_data_to_zarr( data -= add_offset if scale_factor is not None: data /= scale_factor + sl_out = slice(i - i_start, i - i_start + buffer_size) handler_eddies[sl_out] = data @@ -901,6 +930,8 @@ def load_from_netcdf( else: args, kwargs = (filename,), dict() with Dataset(*args, **kwargs) as h_nc: + _check_versions(getattr(h_nc, "framework_version", None)) + var_list = cls.build_var_list( list(h_nc.variables.keys()), remove_vars, include_vars ) @@ -1032,6 +1063,7 @@ def from_zarr(cls, handler): eddies.obs[variable] = handler.variables[variable][:] else: eddies.obs[VAR_DESCR_inv[variable]] = handler.variables[variable][:] + eddies.sign_type = handler.rotation_type return eddies @classmethod @@ -1050,6 +1082,7 @@ def from_netcdf(cls, handler): eddies.obs[variable] = handler.variables[variable][:] else: eddies.obs[VAR_DESCR_inv[variable]] = handler.variables[variable][:] + eddies.sign_type = handler.rotation_type return eddies def propagate( @@ -1977,11 +2010,7 @@ def bins_stat(self, xname, bins=None, yname=None, method=None, mask=None): def format_label(self, label): t0, t1 = self.period - return label.format( - t0=t0, - t1=t1, - nb_obs=len(self), - ) + return label.format(t0=t0, t1=t1, nb_obs=len(self),) def display(self, ax, ref=None, extern_only=False, intern_only=False, **kwargs): """Plot the speed and effective (dashed) contour of the eddies @@ -2352,14 +2381,7 @@ def grid_count_pixel_in( x_, y_ = reduce_size(x_, y_) v = create_vertice(x_, y_) (x_start, x_stop), (y_start, y_stop) = bbox_indice_regular( - v, - x_bounds, - y_bounds, - xstep, - ystep, - N, - is_circular, - x_size, + v, x_bounds, y_bounds, xstep, ystep, N, is_circular, x_size, ) i, j = get_pixel_in_regular(v, x_c, y_c, x_start, x_stop, y_start, y_stop) grid_count_(grid, i, j) diff --git a/src/py_eddy_tracker/observations/tracking.py b/src/py_eddy_tracker/observations/tracking.py index 2914df6b..6612c6d5 100644 --- a/src/py_eddy_tracker/observations/tracking.py +++ b/src/py_eddy_tracker/observations/tracking.py @@ -657,12 +657,12 @@ def split_network(self, intern=True, **kwargs): def set_tracks(self, x, y, ids, window, **kwargs): """ - Will split one group (network) in segments + Split one group (network) in segments :param array x: coordinates of group :param array y: coordinates of group :param ndarray ids: several fields like time, group, ... - :param int windows: number of days where observations could missed + :param int window: number of days where observations could missed """ time_index = build_index((ids["time"]).astype("i4")) nb = x.shape[0] @@ -714,8 +714,8 @@ def get_previous_obs( time_e, time_ref, window, - min_overlap=0.01, - **kwargs, + min_overlap=0.2, + minimal_area=False, ): """Backward association of observations to the segments""" time_cur = int_(ids["time"][i_current]) @@ -731,7 +731,9 @@ def get_previous_obs( if len(ii) == 0: continue c = zeros(len(xj)) - c[ij] = vertice_overlap(xi[ii], yi[ii], xj[ij], yj[ij], **kwargs) + c[ij] = vertice_overlap( + xi[ii], yi[ii], xj[ij], yj[ij], minimal_area=minimal_area + ) # We remove low overlap c[c < min_overlap] = 0 # We get index of maximal overlap @@ -754,8 +756,8 @@ def get_next_obs( time_e, time_ref, window, - min_overlap=0.01, - **kwargs, + min_overlap=0.2, + minimal_area=False, ): """Forward association of observations to the segments""" time_max = time_e.shape[0] - 1 @@ -774,7 +776,9 @@ def get_next_obs( if len(ii) == 0: continue c = zeros(len(xj)) - c[ij] = vertice_overlap(xi[ii], yi[ii], xj[ij], yj[ij], **kwargs) + c[ij] = vertice_overlap( + xi[ii], yi[ii], xj[ij], yj[ij], minimal_area=minimal_area + ) # We remove low overlap c[c < min_overlap] = 0 # We get index of maximal overlap diff --git a/src/py_eddy_tracker/tracking.py b/src/py_eddy_tracker/tracking.py index 577496ff..7543a4d3 100644 --- a/src/py_eddy_tracker/tracking.py +++ b/src/py_eddy_tracker/tracking.py @@ -350,6 +350,9 @@ def load_state(self): self.virtual_obs = VirtualEddiesObservations.from_netcdf( general_handler.groups["LastVirtualObs"] ) + self.previous_virtual_obs = VirtualEddiesObservations.from_netcdf( + general_handler.groups["LastPreviousVirtualObs"] + ) # Load and last previous virtual obs to be merge with current => will be previous2_obs # TODO : Need to rethink this line ?? self.current_obs = self.current_obs.merge(