Skip to content

Commit fcb43db

Browse files
committed
Add variable description and feature when we load
data to be compatible with other data (in this case Frenger(2018) data)
1 parent 0ba3db4 commit fcb43db

File tree

3 files changed

+114
-8
lines changed

3 files changed

+114
-8
lines changed

src/py_eddy_tracker/__init__.py

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def parse_args(self, *args, **kwargs):
163163
attr_name='lon',
164164
compute_type='float64',
165165
nc_name='longitude',
166-
old_nc_name=['lon'],
166+
old_nc_name=['lon', 'Lon'],
167167
nc_type='float32',
168168
nc_dims=('obs',),
169169
nc_attr=dict(
@@ -178,7 +178,7 @@ def parse_args(self, *args, **kwargs):
178178
attr_name='lat',
179179
compute_type='float64',
180180
nc_name='latitude',
181-
old_nc_name=['lat'],
181+
old_nc_name=['lat', 'Lat'],
182182
nc_type='float32',
183183
nc_dims=('obs',),
184184
nc_attr=dict(
@@ -296,7 +296,7 @@ def parse_args(self, *args, **kwargs):
296296
radius_e=dict(
297297
attr_name='radius_e',
298298
nc_name='effective_radius',
299-
old_nc_name=['radius_e'],
299+
old_nc_name=['radius_e', 'Dia'],
300300
nc_type='float32',
301301
output_type='u2',
302302
scale_factor=50.,
@@ -324,6 +324,7 @@ def parse_args(self, *args, **kwargs):
324324
track=dict(
325325
attr_name=None,
326326
nc_name='track',
327+
old_nc_name=['Eddy_id'],
327328
nc_type='uint32',
328329
nc_dims=('obs',),
329330
nc_attr=dict(
@@ -346,7 +347,7 @@ def parse_args(self, *args, **kwargs):
346347
n=dict(
347348
attr_name=None,
348349
nc_name='observation_number',
349-
old_nc_name=['n'],
350+
old_nc_name=['n', 'Eddy_tsp'],
350351
nc_type='uint16',
351352
nc_dims=('obs',),
352353
nc_attr=dict(
@@ -481,6 +482,72 @@ def parse_args(self, *args, **kwargs):
481482
units='m',
482483
)
483484
),
485+
chl=dict(
486+
attr_name=None,
487+
nc_name='chl',
488+
old_nc_name=['Chl'],
489+
nc_type='f4',
490+
nc_dims=('obs',),
491+
nc_attr=dict(
492+
longname='Log base 10 chlorophyll',
493+
units='Log(Chl/[mg/m^3])',
494+
)
495+
),
496+
dchl=dict(
497+
attr_name=None,
498+
nc_name='dchl',
499+
old_nc_name=['dChl'],
500+
nc_type='f4',
501+
nc_dims=('obs',),
502+
nc_attr=dict(
503+
longname='Log base 10 chlorophyll anomaly (Chl minus Chl_bg)',
504+
units='Log(Chl/[mg/m^3])',
505+
)
506+
),
507+
chl_bg=dict(
508+
attr_name=None,
509+
nc_name='chl_bg',
510+
old_nc_name=['Chl_bg'],
511+
nc_type='f4',
512+
nc_dims=('obs',),
513+
nc_attr=dict(
514+
longname='Log base 10 background chlorophyll',
515+
units='Log(Chl/[mg/m^3])',
516+
)
517+
),
518+
year=dict(
519+
attr_name=None,
520+
nc_name='year',
521+
old_nc_name=['Year'],
522+
nc_type='u2',
523+
nc_dims=('obs',),
524+
nc_attr=dict(
525+
longname='Year',
526+
units='year',
527+
)
528+
),
529+
month=dict(
530+
attr_name=None,
531+
nc_name='month',
532+
old_nc_name=['Month'],
533+
nc_type='u1',
534+
nc_dims=('obs',),
535+
nc_attr=dict(
536+
longname='Month',
537+
units='month',
538+
)
539+
),
540+
day=dict(
541+
attr_name=None,
542+
nc_name='day',
543+
old_nc_name=['Day'],
544+
nc_type='u1',
545+
nc_dims=('obs',),
546+
nc_attr=dict(
547+
longname='Day',
548+
units='day',
549+
)
550+
),
484551
nb_contour_selected=dict(
485552
attr_name=None,
486553
nc_name='num_contours',

src/py_eddy_tracker/observations/observation.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
from datetime import datetime
5050
from numba import njit, types as numba_types
5151
from Polygon import Polygon
52+
from pint import UnitRegistry
53+
from pint.errors import UndefinedUnitError
54+
from pint.compat.tokenize import TokenError
5255

5356

5457
@njit(cache=True, fastmath=True)
@@ -210,6 +213,12 @@ def __getitem__(self, attr):
210213
return self.observations[attr]
211214
raise KeyError("%s unknown" % attr)
212215

216+
@classmethod
217+
def obs_dimension(cls, handler):
218+
for candidate in ('obs', 'Nobs', 'observation', 'i'):
219+
if candidate in handler.dimensions.keys():
220+
return candidate
221+
213222
@property
214223
def dtype(self):
215224
"""Return dtype to build numpy array
@@ -357,7 +366,8 @@ def load_from_netcdf(cls, filename, raw_data=False):
357366
if not isinstance(filename, str):
358367
filename = filename.astype(str)
359368
with Dataset(filename) as h_nc:
360-
nb_obs = len(h_nc.dimensions["obs"])
369+
nb_obs = len(h_nc.dimensions[cls.obs_dimension(h_nc)])
370+
logging.debug('%d observations will be load', nb_obs)
361371
kwargs = dict()
362372
if array_dim in h_nc.dimensions:
363373
kwargs["track_array_variables"] = len(h_nc.dimensions[array_dim])
@@ -382,7 +392,32 @@ def load_from_netcdf(cls, filename, raw_data=False):
382392
continue
383393
# Patch
384394
h_nc.variables[variable].set_auto_maskandscale(not raw_data)
385-
eddies.obs[var_inv] = h_nc.variables[variable][:]
395+
logging.debug('Up load %s variable%s', variable, ', with raw mode' if raw_data else '')
396+
# find unit factor
397+
factor = 1
398+
if not raw_data:
399+
input_unit = getattr(h_nc.variables[variable], 'unit', None)
400+
output_unit = VAR_DESCR[var_inv]['nc_attr'].get('units', None)
401+
if output_unit is not None and input_unit is not None and output_unit != input_unit:
402+
units = UnitRegistry()
403+
try:
404+
input_unit = units.parse_expression(input_unit, case_sensitive=False)
405+
output_unit = units.parse_expression(output_unit, case_sensitive=False)
406+
except UndefinedUnitError:
407+
input_unit = None
408+
except TokenError:
409+
input_unit = None
410+
if input_unit is not None:
411+
factor = input_unit.to(output_unit).to_tuple()[0]
412+
# If we are able to find a conversion
413+
if factor != 1:
414+
logging.info('%s will be multiply by %f to take care of units', variable, factor)
415+
if factor != 1:
416+
eddies.obs[var_inv] = h_nc.variables[variable][:] * factor
417+
else:
418+
eddies.obs[var_inv] = h_nc.variables[variable][:]
419+
420+
386421
for variable in h_nc.variables:
387422
var_inv = VAR_DESCR_inv[variable]
388423
if var_inv == "type_cyc":
@@ -396,7 +431,7 @@ def load_from_netcdf(cls, filename, raw_data=False):
396431

397432
@classmethod
398433
def from_netcdf(cls, handler):
399-
nb_obs = len(handler.dimensions["obs"])
434+
nb_obs = len(handler.dimensions[cls.obs_dimension(handler)])
400435
kwargs = dict()
401436
if hasattr(handler, "track_array_variables"):
402437
kwargs["track_array_variables"] = handler.track_array_variables

src/scripts/EddySubSetter

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,17 @@ def id_parser():
2323
help='Coordinates of bounding to extract'
2424
)
2525
parser.add_argument('-i', '--ids', nargs='+', type=int, help='List of tracks which will be extract')
26+
parser.add_argument('-n', '--no_raw_mode', action='store_true',
27+
help='Uncompress all data, could be create a memory error for huge file, but is safer for extern file of py eddy tracker')
2628
return parser
2729

2830

2931
if __name__ == '__main__':
3032
args = id_parser().parse_args()
3133

32-
dataset = TrackEddiesObservations.load_from_netcdf(args.filename, raw_data=True)
34+
# Original dataset
35+
dataset = TrackEddiesObservations.load_from_netcdf(args.filename, raw_data=False if args.no_raw_mode else True)
36+
3337
if args.ids is not None:
3438
dataset = dataset.extract_ids(args.ids)
3539
if args.period is not None:

0 commit comments

Comments
 (0)