Add examples to display groups dependance at parameter

AntSimi · AntSimi · commit eebbf584c426 · 2020-11-16T15:49:15.000+01:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- Add a method to merge several indexs type for eddy obs
 - Acces at dataset variable like attribute, and lifetime/age are available for all observations
 - Add **EddyInfos** application to get general information about eddies dataset
 - Add method to inspect contour rejection (which are not in eddies)
diff --git a/examples/10_tracking_diagnostics/pet_groups.py b/examples/10_tracking_diagnostics/pet_groups.py
@@ -0,0 +1,79 @@
+"""
+Groups distribution
+==================
+
+"""
+import py_eddy_tracker_sample
+from matplotlib import pyplot as plt
+from numpy import arange, ones, percentile
+
+from py_eddy_tracker.observations.tracking import TrackEddiesObservations
+
+# %%
+# Load an experimental med atlas over a period of 26 years (1993-2019)
+a = TrackEddiesObservations.load_file(
+    py_eddy_tracker_sample.get_path("eddies_med_adt_allsat_dt2018/Anticyclonic.zarr")
+)
+
+# %%
+# Group distribution
+groups = dict()
+bins_time = [10, 20, 30, 60, 90, 180, 360, 100000]
+for t0, t1 in zip(bins_time[:-1], bins_time[1:]):
+    groups[f"lifetime_{t0}_{t1}"] = lambda dataset, t0=t0, t1=t1: (
+        dataset.lifetime >= t0
+    ) * (dataset.lifetime < t1)
+bins_percentile = arange(0, 100.0001, 5)
+
+
+# %%
+# Function to build stats
+def stats_compilation(dataset, groups, field, bins, filter=None):
+    datas = dict(ref=dataset.bins_stat(field, bins=bins, mask=filter)[1], y=dict())
+    for k, index in groups.items():
+        i = dataset.merge_indexs(filter, index)
+        x, datas["y"][k] = dataset.bins_stat(field, bins=bins, mask=i)
+    datas["x"], datas["bins"] = x, bins
+    return datas
+
+
+def plot_stats(ax, bins, x, y, ref, box=False, cmap=None, percentiles=None, **kw):
+    base, ref = ones(x.shape) * 100.0, ref / 100.0
+    x = arange(bins.shape[0]).repeat(2)[1:-1] if box else x
+    y0 = base
+    if cmap is not None:
+        cmap, nb_groups = plt.get_cmap(cmap), len(y)
+    keys = tuple(y.keys())
+    for i, k in enumerate(keys[::-1]):
+        y1 = y0 - y[k] / ref
+        args = (y0.repeat(2), y1.repeat(2)) if box else (y0, y1)
+        if cmap is not None:
+            kw["color"] = cmap(1 - i / (nb_groups - 1))
+        ax.fill_between(x, *args, label=k, **kw)
+        y0 = y1
+    if percentiles:
+        for b in bins:
+            ax.axvline(b, **percentiles)
+
+
+# %%
+# Speed radius by track period
+stats = stats_compilation(
+    a, groups, "radius_s", percentile(a.radius_s, bins_percentile)
+)
+fig = plt.figure()
+ax = fig.add_subplot(111)
+plot_stats(ax, **stats, cmap="magma", percentiles=dict(color="gray", ls="-.", lw=0.4))
+ax.set_xlabel("Speed radius (m)"), ax.set_ylabel("% of class"), ax.set_ylim(0, 100)
+ax.grid(), ax.legend()
+
+# %%
+# Amplitude by track period
+stats = stats_compilation(
+    a, groups, "amplitude", percentile(a.amplitude, bins_percentile)
+)
+fig = plt.figure()
+ax = fig.add_subplot(111)
+plot_stats(ax, **stats, cmap="magma")
+ax.set_xlabel("Amplitude (m)"), ax.set_ylabel("% of class"), ax.set_ylim(0, 100)
+ax.grid(), ax.legend()
diff --git a/notebooks/python_module/10_tracking_diagnostics/pet_groups.ipynb b/notebooks/python_module/10_tracking_diagnostics/pet_groups.ipynb
@@ -0,0 +1,144 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Groups distribution\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import py_eddy_tracker_sample\nfrom matplotlib import pyplot as plt\nfrom numpy import arange, ones, percentile\n\nfrom py_eddy_tracker.observations.tracking import TrackEddiesObservations"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Load an experimental med atlas over a period of 26 years (1993-2019)\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "a = TrackEddiesObservations.load_file(\n    py_eddy_tracker_sample.get_path(\"eddies_med_adt_allsat_dt2018/Anticyclonic.zarr\")\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Group distribution\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "groups = dict()\nbins_time = [10, 20, 30, 60, 90, 180, 360, 100000]\nfor t0, t1 in zip(bins_time[:-1], bins_time[1:]):\n    groups[f\"lifetime_{t0}_{t1}\"] = lambda dataset, t0=t0, t1=t1: (\n        dataset.lifetime >= t0\n    ) * (dataset.lifetime < t1)\nbins_percentile = arange(0, 100.0001, 5)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Function to build stats\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "def stats_compilation(dataset, groups, field, bins, filter=None):\n    datas = dict(ref=dataset.bins_stat(field, bins=bins, mask=filter)[1], y=dict())\n    for k, index in groups.items():\n        i = dataset.merge_indexs(filter, index)\n        x, datas[\"y\"][k] = dataset.bins_stat(field, bins=bins, mask=i)\n    datas[\"x\"], datas[\"bins\"] = x, bins\n    return datas\n\n\ndef plot_stats(ax, bins, x, y, ref, box=False, cmap=None, percentiles=None, **kw):\n    base, ref = ones(x.shape) * 100.0, ref / 100.0\n    x = arange(bins.shape[0]).repeat(2)[1:-1] if box else x\n    y0 = base\n    if cmap is not None:\n        cmap, nb_groups = plt.get_cmap(cmap), len(y)\n    keys = tuple(y.keys())\n    for i, k in enumerate(keys[::-1]):\n        y1 = y0 - y[k] / ref\n        args = (y0.repeat(2), y1.repeat(2)) if box else (y0, y1)\n        if cmap is not None:\n            kw[\"color\"] = cmap(1 - i / (nb_groups - 1))\n        ax.fill_between(x, *args, label=k, **kw)\n        y0 = y1\n    if percentiles:\n        for b in bins:\n            ax.axvline(b, **percentiles)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Speed radius by track period\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "stats = stats_compilation(\n    a, groups, \"radius_s\", percentile(a.radius_s, bins_percentile)\n)\nfig = plt.figure()\nax = fig.add_subplot(111)\nplot_stats(ax, **stats, cmap=\"magma\", percentiles=dict(color=\"gray\", ls=\"-.\", lw=0.4))\nax.set_xlabel(\"Speed radius (m)\"), ax.set_ylabel(\"% of class\"), ax.set_ylim(0, 100)\nax.grid(), ax.legend()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Amplitude by track period\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "stats = stats_compilation(\n    a, groups, \"amplitude\", percentile(a.amplitude, bins_percentile)\n)\nfig = plt.figure()\nax = fig.add_subplot(111)\nplot_stats(ax, **stats, cmap=\"magma\")\nax.set_xlabel(\"Amplitude (m)\"), ax.set_ylabel(\"% of class\"), ax.set_ylim(0, 100)\nax.grid(), ax.legend()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.7"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/src/py_eddy_tracker/observations/observation.py b/src/py_eddy_tracker/observations/observation.py
@@ -28,6 +28,7 @@
     floor,
     histogram,
     histogram2d,
+    in1d,
     isnan,
     linspace,
     ma,
@@ -1685,6 +1686,66 @@ def filled(
         c.norm = Normalize(vmin=vmin, vmax=vmax)
         return c
 
+    def merge_indexs(self, filter, index=None):
+        """
+        Compute an intersectin between indexs and filters after to evaluate each of them
+
+        :param callable,None,slice,array[int],array[bool] filter:
+        :param callable,None,slice,array[int],array[bool] index:
+
+        :return: Return applicable object to numpy.array
+        :rtype: slice, index, mask
+        """
+        # If filter is a function we apply on dataset
+        if callable(filter):
+            filter = filter(self)
+        # Solve indexs case
+        if index is not None:
+            index = self.merge_indexs(index)
+        # Merge indexs and filter
+        if index is None and filter is None:
+            return slice(None)
+        if index is None:
+            return filter
+        if filter is None:
+            return index
+        if isinstance(index, slice):
+            reject = ones(len(self), dtype="bool")
+            reject[index] = False
+            if isinstance(filter, slice):
+                reject[filter] = False
+                return ~reject
+            # Mask case
+            elif filter.dtype == bool:
+                return ~reject * filter
+            # index case
+            else:
+                return filter[~reject[filter]]
+        # mask case
+        elif index.dtype == bool:
+            if isinstance(filter, slice):
+                select = zeros(len(self), dtype="bool")
+                select[filter] = True
+                return select * index
+            # Mask case
+            elif filter.dtype == bool:
+                return filter * index
+            # index case
+            else:
+                return filter[index[filter]]
+        # index case
+        else:
+            if isinstance(filter, slice):
+                select = zeros(len(self), dtype="bool")
+                select[filter] = True
+                return index[select[index]]
+            # Mask case
+            elif filter.dtype == bool:
+                return index[filter[index]]
+            # index case
+            else:
+                return index[in1d(index, filter)]
+
     def bins_stat(self, xname, bins=None, yname=None, method=None, mask=None):
         """
         :param str,array xname: variable to compute stats on
@@ -1698,16 +1759,15 @@ def bins_stat(self, xname, bins=None, yname=None, method=None, mask=None):
         .. minigallery:: py_eddy_tracker.EddiesObservations.bins_stat
         """
         v = self[xname] if isinstance(xname, str) else xname
-        if mask is not None:
-            v = v[mask]
+        mask = self.merge_indexs(mask)
+        v = v[mask]
         if bins is None:
             bins = arange(v.min(), v.max() + 2)
         y, x = hist_numba(v, bins=bins)
         x = (x[1:] + x[:-1]) / 2
         if method == "mean":
             y_v = self[yname] if isinstance(yname, str) else yname
-            if mask is not None:
-                y_v = y_v[mask]
+            y_v = y_v[mask]
             y_, _ = histogram(v, bins=bins, weights=y_v)
             y = y_ / y
         return x, y