AntSimi · AntSimi · Dec 16, 2021 · Aug 18, 2021 · Sep 24, 2021 · Dec 8, 2021
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -15,13 +15,20 @@ Changed
   New identifications are produced with this type, old files could still be loaded.
   If you use old identifications for tracking use the `--unraw` option to unpack old times and store data with the new format.
 - Now amplitude is stored with .1 mm of precision (instead of 1 mm), same advice as for time.
+- expose more parameters to users for bash tools build_network & divide_network
+- add warning when loading a file created from a previous version of py-eddy-tracker.
+
+
 
 Fixed
 ^^^^^
 
 - Fix bug in convolution(filter), lowest rows was replace by zeros in convolution computation.
   Important impact for tiny kernel
 - Fix method of sampling before contour fitting
+- Fix bug when loading dataset in zarr format, not all variables were correctly loaded
+- Fix bug when zarr dataset has same size for number of observations and contour size
+- Fix bug when tracking, previous_virtual_obs was not always loaded
 
 Added
 ^^^^^

diff --git a/examples/02_eddy_identification/pet_eddy_detection_ACC.py b/examples/02_eddy_identification/pet_eddy_detection_ACC.py
@@ -65,8 +65,7 @@ def set_fancy_labels(fig, ticklabelsize=14, labelsize=14, labelweight="semibold"
     y_name="latitude",
     # Manual area subset
     indexs=dict(
-        latitude=slice(100 - margin, 220 + margin),
-        longitude=slice(0, 230 + margin),
+        latitude=slice(100 - margin, 220 + margin), longitude=slice(0, 230 + margin),
     ),
 )
 g_raw = RegularGridDataset(**kw_data)
@@ -188,16 +187,10 @@ def set_fancy_labels(fig, ticklabelsize=14, labelsize=14, labelweight="semibold"
     ax.set_ylabel("With filter")
 
     ax.plot(
-        a_[field][i_a] * factor,
-        a[field][j_a] * factor,
-        "r.",
-        label="Anticyclonic",
+        a_[field][i_a] * factor, a[field][j_a] * factor, "r.", label="Anticyclonic",
     )
     ax.plot(
-        c_[field][i_c] * factor,
-        c[field][j_c] * factor,
-        "b.",
-        label="Cyclonic",
+        c_[field][i_c] * factor, c[field][j_c] * factor, "b.", label="Cyclonic",
     )
     ax.set_aspect("equal"), ax.grid()
     ax.plot((0, 1000), (0, 1000), "g")

diff --git a/examples/16_network/pet_replay_segmentation.py b/examples/16_network/pet_replay_segmentation.py
@@ -149,13 +149,7 @@ def get_obs(dataset):
 n_.median_filter(15, "time", "latitude")
 kw["s"] = (n_.radius_e * 1e-3) ** 2 / 30 ** 2 * 20
 m = n_.scatter_timeline(
-    ax,
-    "shape_error_e",
-    vmin=14,
-    vmax=70,
-    **kw,
-    yfield="lon",
-    method="all",
+    ax, "shape_error_e", vmin=14, vmax=70, **kw, yfield="lon", method="all",
 )
 ax.set_ylabel("Longitude")
 cb = update_axes(ax, m["scatter"])

diff --git a/src/py_eddy_tracker/__init__.py b/src/py_eddy_tracker/__init__.py
@@ -422,20 +422,14 @@ def identify_time(str_date):
         nc_name="previous_cost",
         nc_type="float32",
         nc_dims=("obs",),
-        nc_attr=dict(
-            long_name="Previous cost for previous observation",
-            comment="",
-        ),
+        nc_attr=dict(long_name="Previous cost for previous observation", comment="",),
     ),
     next_cost=dict(
         attr_name=None,
         nc_name="next_cost",
         nc_type="float32",
         nc_dims=("obs",),
-        nc_attr=dict(
-            long_name="Next cost for next observation",
-            comment="",
-        ),
+        nc_attr=dict(long_name="Next cost for next observation", comment="",),
     ),
     n=dict(
         attr_name=None,
@@ -646,8 +640,7 @@ def identify_time(str_date):
         nc_type="f4",
         nc_dims=("obs",),
         nc_attr=dict(
-            long_name="Log base 10 background chlorophyll",
-            units="Log(Chl/[mg/m^3])",
+            long_name="Log base 10 background chlorophyll", units="Log(Chl/[mg/m^3])",
         ),
     ),
     year=dict(

diff --git a/src/py_eddy_tracker/appli/eddies.py b/src/py_eddy_tracker/appli/eddies.py
@@ -243,8 +243,7 @@ def browse_dataset_in(
         filenames = bytes_(glob(full_path))
 
     dataset_list = empty(
-        len(filenames),
-        dtype=[("filename", "S500"), ("date", "datetime64[s]")],
+        len(filenames), dtype=[("filename", "S500"), ("date", "datetime64[s]")],
     )
     dataset_list["filename"] = filenames
 
@@ -372,8 +371,7 @@ def track(
 
     logger.info("Longer track saved have %d obs", c.nb_obs_by_tracks.max())
     logger.info(
-        "The mean length is %d observations for long track",
-        c.nb_obs_by_tracks.mean(),
+        "The mean length is %d observations for long track", c.nb_obs_by_tracks.mean(),
     )
 
     long_track.write_file(**kw_write)
@@ -383,14 +381,7 @@ def track(
 
 
 def get_group(
-    dataset1,
-    dataset2,
-    index1,
-    index2,
-    score,
-    invalid=2,
-    low=10,
-    high=60,
+    dataset1, dataset2, index1, index2, score, invalid=2, low=10, high=60,
 ):
     group1, group2 = dict(), dict()
     m_valid = (score * 100) >= invalid
@@ -499,8 +490,7 @@ def get_values(v, dataset):
             ]
 
     labels = dict(
-        high=f"{high:0.0f} <= high",
-        low=f"{invalid:0.0f} <= low < {low:0.0f}",
+        high=f"{high:0.0f} <= high", low=f"{invalid:0.0f} <= low < {low:0.0f}",
     )
 
     keys = [labels.get(key, key) for key in list(gr_ref.values())[0].keys()]

diff --git a/src/py_eddy_tracker/appli/network.py b/src/py_eddy_tracker/appli/network.py
@@ -21,6 +21,20 @@ def build_network():
     parser.add_argument(
         "--window", "-w", type=int, help="Half time window to search eddy", default=1
     )
+
+    parser.add_argument(
+        "--min-overlap",
+        "-p",
+        type=float,
+        help="minimum overlap area to associate observations",
+        default=0.2,
+    )
+    parser.add_argument(
+        "--minimal-area",
+        action="store_true",
+        help="If True, use intersection/little polygon, else intersection/union",
+    )
+
     parser.contour_intern_arg()
 
     parser.memory_arg()
@@ -32,7 +46,9 @@ def build_network():
         intern=args.intern,
         memory=args.memory,
     )
-    group = n.group_observations(minimal_area=True)
+    group = n.group_observations(
+        min_overlap=args.min_overlap, minimal_area=args.minimal_area
+    )
     n.build_dataset(group).write_file(filename=args.out)
 
 
@@ -44,6 +60,18 @@ def divide_network():
     parser.add_argument(
         "--window", "-w", type=int, help="Half time window to search eddy", default=1
     )
+    parser.add_argument(
+        "--min-overlap",
+        "-p",
+        type=float,
+        help="minimum overlap area to associate observations",
+        default=0.2,
+    )
+    parser.add_argument(
+        "--minimal-area",
+        action="store_true",
+        help="If True, use intersection/little polygon, else intersection/union",
+    )
     args = parser.parse_args()
     contour_name = TrackEddiesObservations.intern(args.intern, public_label=True)
     e = TrackEddiesObservations.load_file(
@@ -52,7 +80,12 @@ def divide_network():
     )
     n = NetworkObservations.from_split_network(
         TrackEddiesObservations.load_file(args.input, raw_data=True),
-        e.split_network(intern=args.intern, window=args.window),
+        e.split_network(
+            intern=args.intern,
+            window=args.window,
+            min_overlap=args.min_overlap,
+            minimal_area=args.minimal_area,
+        ),
     )
     n.write_file(filename=args.out)
 
@@ -76,9 +109,7 @@ def subset_network():
         help="Remove short dead end, first is for minimal obs number and second for minimal segment time to keep",
     )
     parser.add_argument(
-        "--remove_trash",
-        action="store_true",
-        help="Remove trash (network id == 0)",
+        "--remove_trash", action="store_true", help="Remove trash (network id == 0)",
     )
     parser.add_argument(
         "-p",

diff --git a/src/py_eddy_tracker/dataset/grid.py b/src/py_eddy_tracker/dataset/grid.py
@@ -858,13 +858,11 @@ def eddy_identification(
                     xy_i = uniform_resample(
                         inner_contour.lon,
                         inner_contour.lat,
-                        num_fac=presampling_multiplier
-                        )
-                    xy_e = uniform_resample(
-                        contour.lon,
-                        contour.lat,
                         num_fac=presampling_multiplier,
                     )
+                    xy_e = uniform_resample(
+                        contour.lon, contour.lat, num_fac=presampling_multiplier,
+                    )
                     xy_s = uniform_resample(
                         speed_contour.lon,
                         speed_contour.lat,

diff --git a/src/py_eddy_tracker/eddy_feature.py b/src/py_eddy_tracker/eddy_feature.py
@@ -433,8 +433,8 @@ def __init__(self, x, y, z, levels, wrap_x=False, keep_unclose=False):
         closed_contours = 0
         # Count level and contour
         for i, collection in enumerate(self.contours.collections):
-            collection.get_nearest_path_bbox_contain_pt = (
-                lambda x, y, i=i: self.get_index_nearest_path_bbox_contain_pt(i, x, y)
+            collection.get_nearest_path_bbox_contain_pt = lambda x, y, i=i: self.get_index_nearest_path_bbox_contain_pt(
+                i, x, y
             )
             nb_level += 1
 

diff --git a/src/py_eddy_tracker/observations/network.py b/src/py_eddy_tracker/observations/network.py
@@ -1301,7 +1301,7 @@ def extract_with_period(self, period):
 
         return self.extract_with_mask(self.get_mask_with_period(period))
 
-    def extract_light_with_mask(self, mask):
+    def extract_light_with_mask(self, mask, track_extra_variables=[]):
         """extract data with mask, but only with variables used for coherence, aka self.array_variables
 
         :param mask: mask used to extract
@@ -1319,7 +1319,7 @@ def extract_light_with_mask(self, mask):
         variables = ["time"] + self.array_variables
         new = self.__class__(
             size=nb_obs,
-            track_extra_variables=[],
+            track_extra_variables=track_extra_variables,
             track_array_variables=self.track_array_variables,
             array_variables=self.array_variables,
             only_variables=variables,
@@ -1333,9 +1333,22 @@ def extract_light_with_mask(self, mask):
                 f"{nb_obs} observations will be extracted ({nb_obs / self.shape[0]:.3%})"
             )
 
-        for field in variables:
+        for field in variables + track_extra_variables:
             logger.debug("Copy of field %s ...", field)
             new.obs[field] = self.obs[field][mask]
+
+        if (
+            "previous_obs" in track_extra_variables
+            and "next_obs" in track_extra_variables
+        ):
+            # n & p must be re-index
+            n, p = self.next_obs[mask], self.previous_obs[mask]
+            # we add 2 for -1 index return index -1
+            translate = -ones(len(self) + 1, dtype="i4")
+            translate[:-1][mask] = arange(nb_obs)
+            new.next_obs[:] = translate[n]
+            new.previous_obs[:] = translate[p]
+
         return new
 
     def extract_with_mask(self, mask):
@@ -1495,7 +1508,8 @@ def date2file(julian_day):
 
         t_start, t_end = int(self.period[0]), int(self.period[1])
 
-        dates = arange(t_start, t_start + n_days + 1)
+        # dates = arange(t_start, t_start + n_days + 1)
+        dates = arange(t_start, min(t_start + n_days + 1, t_end + 1))
         first_files = [date_function(x) for x in dates]
 
         c = GridCollection.from_netcdf_list(first_files, dates, **uv_params)
@@ -1570,12 +1584,8 @@ def date2file(julian_day):
         ptf_final = zeros((self.obs.size, 2), dtype="i1")
 
         t_start, t_end = int(self.period[0]), int(self.period[1])
-        # if begin is not None and begin > t_start:
-        #     t_start = begin
-        # if end is not None and end < t_end:
-        #     t_end = end
 
-        dates = arange(t_start, t_start + n_days + 1)
+        dates = arange(t_start, min(t_start + n_days + 1, t_end + 1))
         first_files = [date_function(x) for x in dates]
 
         c = GridCollection.from_netcdf_list(first_files, dates, **uv_params)
@@ -1699,7 +1709,23 @@ def group_translator(nb, duos):
                 apply_replace(translate, gr_i, gr_j)
         return translate
 
-    def group_observations(self, **kwargs):
+    def group_observations(self, min_overlap=0.2, minimal_area=False):
+        """Store every interaction between identifications
+
+        Parameters
+        ----------
+        minimal_area : bool, optional
+            If True, function will compute intersection/little polygon, else intersection/union, by default False
+
+        min_overlap : float, optional
+            minimum overlap area to associate observations, by default 0.2
+
+        Returns
+        -------
+        TrackEddiesObservations
+            netcdf with interactions
+        """
+
         results, nb_obs = list(), list()
         # To display print only in INFO
         display_iteration = logger.getEffectiveLevel() == logging.INFO
@@ -1713,7 +1739,12 @@ def group_observations(self, **kwargs):
             for j in range(i + 1, min(self.window + i + 1, self.nb_input)):
                 xj, yj = self.buffer.load_contour(self.filenames[j])
                 ii, ij = bbox_intersection(xi, yi, xj, yj)
-                m = vertice_overlap(xi[ii], yi[ii], xj[ij], yj[ij], **kwargs) > 0.2
+                m = (
+                    vertice_overlap(
+                        xi[ii], yi[ii], xj[ij], yj[ij], minimal_area=minimal_area
+                    )
+                    > min_overlap
+                )
                 results.append((i, j, ii[m], ij[m]))
         if display_iteration:
             print()