Skip to content

Commit 1f3d119

Browse files
committed
Example about storage and access
1 parent c626528 commit 1f3d119

File tree

5 files changed

+329
-0
lines changed

5 files changed

+329
-0
lines changed

TODOLIST.md

Whitespace-only changes.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
General features
2+
================
3+
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
"""
2+
How data is stored
3+
==================
4+
5+
General information about eddies storage.
6+
7+
All eddies files have same structure with more or less field and a way of ordering.
8+
9+
There are 3 class of files:
10+
11+
- Eddies collections which contains a list of eddies without link between observations
12+
- Track eddies collections which manage eddies when there are merged in trajectory
13+
(track field allow to separate each track)
14+
- Network eddies collections which manage eddies when there are merged in network
15+
(track/segment field allow to separate observations)
16+
"""
17+
18+
import py_eddy_tracker_sample
19+
20+
from py_eddy_tracker.data import get_path, get_remote_sample
21+
from py_eddy_tracker.observations.network import NetworkObservations
22+
from py_eddy_tracker.observations.observation import EddiesObservations, Table
23+
from py_eddy_tracker.observations.tracking import TrackEddiesObservations
24+
25+
# %%
26+
# Eddies could be store in 2 formats with same structures:
27+
#
28+
# - zarr (https://zarr.readthedocs.io/en/stable/), which allow efficiency in IO,...
29+
# - NetCDF4 (https://unidata.github.io/netcdf4-python/), well-known format
30+
#
31+
# Each field are stored in column, each row corresponds at 1 observation,
32+
# array field like contour/profile are 2D column.
33+
34+
# %%
35+
# Eddies files (zarr or netcdf) could be loaded with `load_file` method:
36+
eddies_collections = EddiesObservations.load_file(get_path("Cyclonic_20160515.nc"))
37+
eddies_collections.field_table()
38+
# offset and scale_factor are used only when data is stored in zarr or netCDF4
39+
40+
# %%
41+
# Field access
42+
# ------------
43+
eddies_collections.amplitude
44+
45+
# %%
46+
# Data matrix is a numpy ndarray
47+
eddies_collections.obs
48+
# %%
49+
eddies_collections.obs.dtype
50+
51+
52+
# %%
53+
# Contour storage
54+
# ---------------
55+
# Contour are stored to fixed size for all, contour are resample with an algorithm before to be store in object
56+
57+
58+
# %%
59+
# Tracks
60+
# ------
61+
# Tracks add several field like:
62+
#
63+
# - track : ID which allow to identify path
64+
# - observation_flag : True if it's an observation to filled a missing detection
65+
# - observation_number : Age of eddies
66+
# - cost_association : result of cost function which allow to associate the observation with eddy path
67+
eddies_tracks = TrackEddiesObservations.load_file(
68+
py_eddy_tracker_sample.get_path("eddies_med_adt_allsat_dt2018/Cyclonic.zarr")
69+
)
70+
# In this example some field are removed like effective_contour_longitude, ... in order to save time for doc building
71+
eddies_tracks.field_table()
72+
73+
# %%
74+
# Network
75+
# -------
76+
# Network files use some specific field:
77+
#
78+
# - track : ID of network (ID 0 are for lonely eddies/trash)
79+
# - segment : ID of path in network (from 0 to N)
80+
# - previous_obs : Index of the previous observation in the full dataset, if -1 there are no previous observation
81+
# - next_obs : Index of the next observation in the full dataset, if -1 there are no next observation
82+
# - previous_cost : Result of cost_function (1 good <> 0 bad) with previous observation
83+
# - next_cost : Result of cost_function (1 good <> 0 bad) with next observation
84+
eddies_network = NetworkObservations.load_file(
85+
get_remote_sample(
86+
"eddies_med_adt_allsat_dt2018_err70_filt500_order1/Anticyclonic_network.nc"
87+
)
88+
)
89+
eddies_network.field_table()
90+
91+
# %%
92+
sl = slice(70, 100)
93+
Table(
94+
eddies_network.network(651).obs[sl][
95+
[
96+
"time",
97+
"track",
98+
"segment",
99+
"previous_obs",
100+
"previous_cost",
101+
"next_obs",
102+
"next_cost",
103+
]
104+
]
105+
)
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {
7+
"collapsed": false
8+
},
9+
"outputs": [],
10+
"source": [
11+
"%matplotlib inline"
12+
]
13+
},
14+
{
15+
"cell_type": "markdown",
16+
"metadata": {},
17+
"source": [
18+
"\n# How data is stored\n\nGeneral information about eddies storage.\n\nAll eddies files have same structure with more or less field and a way of ordering.\n\nThere are 3 class of files:\n\n- Eddies collections which contains a list of eddies without link between observations\n- Track eddies collections which manage eddies when there are merged in trajectory\n (track field allow to separate each track)\n- Network eddies collections which manage eddies when there are merged in network\n (track/segment field allow to separate observations)\n"
19+
]
20+
},
21+
{
22+
"cell_type": "code",
23+
"execution_count": null,
24+
"metadata": {
25+
"collapsed": false
26+
},
27+
"outputs": [],
28+
"source": [
29+
"import py_eddy_tracker_sample\n\nfrom py_eddy_tracker.data import get_path, get_remote_sample\nfrom py_eddy_tracker.observations.network import NetworkObservations\nfrom py_eddy_tracker.observations.observation import EddiesObservations, Table\nfrom py_eddy_tracker.observations.tracking import TrackEddiesObservations"
30+
]
31+
},
32+
{
33+
"cell_type": "markdown",
34+
"metadata": {},
35+
"source": [
36+
"Eddies could be store in 2 formats with same structures:\n\n- zarr (https://zarr.readthedocs.io/en/stable/), which allow efficiency in IO,...\n- NetCDF4 (https://unidata.github.io/netcdf4-python/), well-known format\n\nEach field are stored in column, each row corresponds at 1 observation,\narray field like contour/profile are 2D column.\n\n"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"Eddies files (zarr or netcdf) could be loaded with `load_file` method:\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"eddies_collections = EddiesObservations.load_file(get_path(\"Cyclonic_20160515.nc\"))\neddies_collections.field_table()\n# offset and scale_factor are used only when data is stored in zarr or netCDF4"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Field access\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"eddies_collections.amplitude"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"Data matrix is a numpy ndarray\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"eddies_collections.obs"
91+
]
92+
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"metadata": {
97+
"collapsed": false
98+
},
99+
"outputs": [],
100+
"source": [
101+
"eddies_collections.obs.dtype"
102+
]
103+
},
104+
{
105+
"cell_type": "markdown",
106+
"metadata": {},
107+
"source": [
108+
"## Contour storage\nContour are stored to fixed size for all, contour are resample with an algorithm before to be store in object\n\n"
109+
]
110+
},
111+
{
112+
"cell_type": "markdown",
113+
"metadata": {},
114+
"source": [
115+
"## Tracks\nTracks add several field like:\n\n- track : ID which allow to identify path\n- observation_flag : True if it's an observation to filled a missing detection\n- observation_number : Age of eddies\n- cost_association : result of cost function which allow to associate the observation with eddy path\n\n"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": null,
121+
"metadata": {
122+
"collapsed": false
123+
},
124+
"outputs": [],
125+
"source": [
126+
"eddies_tracks = TrackEddiesObservations.load_file(\n py_eddy_tracker_sample.get_path(\"eddies_med_adt_allsat_dt2018/Cyclonic.zarr\")\n)\n# In this example some field are removed like effective_contour_longitude, ... in order to save time for doc building\neddies_tracks.field_table()"
127+
]
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"## Network\nNetwork files use some specific field:\n\n- track : ID of network (ID 0 are for lonely eddies/trash)\n- segment : ID of path in network (from 0 to N)\n- previous_obs : Index of the previous observation in the full dataset, if -1 there are no previous observation\n- next_obs : Index of the next observation in the full dataset, if -1 there are no next observation\n- previous_cost : Result of cost_function (1 good <> 0 bad) with previous observation\n- next_cost : Result of cost_function (1 good <> 0 bad) with next observation\n\n"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": null,
139+
"metadata": {
140+
"collapsed": false
141+
},
142+
"outputs": [],
143+
"source": [
144+
"eddies_network = NetworkObservations.load_file(\n get_remote_sample(\n \"eddies_med_adt_allsat_dt2018_err70_filt500_order1/Anticyclonic_network.nc\"\n )\n)\neddies_network.field_table()"
145+
]
146+
},
147+
{
148+
"cell_type": "code",
149+
"execution_count": null,
150+
"metadata": {
151+
"collapsed": false
152+
},
153+
"outputs": [],
154+
"source": [
155+
"sl = slice(70, 100)\nTable(\n eddies_network.network(651).obs[sl][\n [\n \"time\",\n \"track\",\n \"segment\",\n \"previous_obs\",\n \"previous_cost\",\n \"next_obs\",\n \"next_cost\",\n ]\n ]\n)"
156+
]
157+
}
158+
],
159+
"metadata": {
160+
"kernelspec": {
161+
"display_name": "Python 3",
162+
"language": "python",
163+
"name": "python3"
164+
},
165+
"language_info": {
166+
"codemirror_mode": {
167+
"name": "ipython",
168+
"version": 3
169+
},
170+
"file_extension": ".py",
171+
"mimetype": "text/x-python",
172+
"name": "python",
173+
"nbconvert_exporter": "python",
174+
"pygments_lexer": "ipython3",
175+
"version": "3.7.7"
176+
}
177+
},
178+
"nbformat": 4,
179+
"nbformat_minor": 0
180+
}

src/py_eddy_tracker/observations/observation.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,28 @@ def shifted_ellipsoid_degrees_mask2(lon0, lat0, lon1, lat1, minor=1.5, major=1.5
109109
return m
110110

111111

112+
class Table(object):
113+
def __init__(self, values):
114+
self.values = values
115+
116+
def _repr_html_(self):
117+
rows = list()
118+
if isinstance(self.values, ndarray):
119+
row = "\n".join([f"<td>{v}</td >" for v in self.values.dtype.names])
120+
rows.append(f"<tr>{row}</tr>")
121+
for row in self.values:
122+
row = "\n".join([f"<td>{v}</td >" for v in row])
123+
rows.append(f"<tr>{row}</tr>")
124+
rows = "\n".join(rows)
125+
return (
126+
f'<font size="2">'
127+
f'<table class="docutils align-default">'
128+
f"{rows}"
129+
f"</table>"
130+
f"</font>"
131+
)
132+
133+
112134
class EddiesObservations(object):
113135
"""
114136
Class to store eddy observations.
@@ -259,6 +281,25 @@ def box_display(value):
259281
"""Return value evenly spaced with few numbers"""
260282
return "".join([f"{v_:10.2f}" for v_ in value])
261283

284+
def field_table(self):
285+
"""
286+
Produce description table of field available in this object
287+
"""
288+
rows = [("Name(Unit)", "Long name", "Scale factor", "Offset")]
289+
names = list(self.obs.dtype.names)
290+
names.sort()
291+
for field in names:
292+
infos = VAR_DESCR[field]
293+
rows.append(
294+
(
295+
f"{infos.get('nc_name', field).capitalize()} ({infos['nc_attr'].get('units', '')})",
296+
infos["nc_attr"].get("long_name", "").capitalize(),
297+
infos.get("scale_factor", ""),
298+
infos.get("add_offset", ""),
299+
)
300+
)
301+
return Table(rows)
302+
262303
def __repr__(self):
263304
"""
264305
Return general informations on dataset as strings.

0 commit comments

Comments
 (0)