Skip to content

Commit f63f2f6

Browse files
committed
Extract by default unused eddies from tracking
1 parent 82ca026 commit f63f2f6

File tree

4 files changed

+182
-71
lines changed

4 files changed

+182
-71
lines changed

src/py_eddy_tracker/observations.py

Lines changed: 44 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ def cost_function(records_in, records_out, distance):
429429

430430
def circle_mask(self, other, radius=100):
431431
"""Return a mask of available link"""
432-
return self.distance(other) < radius
432+
return (self.distance(other).T < radius).T
433433

434434
def shifted_ellipsoid_degrees_mask(self, other, minor=1.5, major=1.5):
435435
# c = (major ** 2 - minor ** 2) ** .5 + major
@@ -711,6 +711,49 @@ def create_variable(handler_nc, kwargs_variable, attr_variable,
711711
except ValueError:
712712
logging.warning('Data is empty')
713713

714+
def write_netcdf(self, path='./', filename='%(path)s/%(sign_type)s.nc'):
715+
"""Write a netcdf with eddy obs
716+
"""
717+
eddy_size = len(self.observations)
718+
sign_type = 'Cyclonic' if self.sign_type == -1 else 'Anticyclonic'
719+
filename = filename % dict(path=path, sign_type=sign_type, prod_time=datetime.now().strftime('%Y%m%d'))
720+
logging.info('Store in %s', filename)
721+
with Dataset(filename, 'w', format='NETCDF4') as h_nc:
722+
logging.info('Create file %s', filename)
723+
# Create dimensions
724+
logging.debug('Create Dimensions "Nobs" : %d', eddy_size)
725+
h_nc.createDimension('Nobs', eddy_size)
726+
if self.track_array_variables != 0:
727+
h_nc.createDimension('NbSample', self.track_array_variables)
728+
# Iter on variables to create:
729+
for field in self.observations.dtype.descr:
730+
name = field[0]
731+
logging.debug('Create Variable %s', VAR_DESCR[name]['nc_name'])
732+
self.create_variable(
733+
h_nc,
734+
dict(varname=VAR_DESCR[name]['nc_name'],
735+
datatype=VAR_DESCR[name]['output_type'],
736+
dimensions=VAR_DESCR[name]['nc_dims']),
737+
VAR_DESCR[name]['nc_attr'],
738+
self.observations[name],
739+
scale_factor=VAR_DESCR[name].get('scale_factor', None),
740+
add_offset=VAR_DESCR[name].get('add_offset', None)
741+
)
742+
743+
# Add cyclonic information
744+
self.create_variable(
745+
h_nc,
746+
dict(varname=VAR_DESCR['type_cyc']['nc_name'],
747+
datatype=VAR_DESCR['type_cyc']['nc_type'],
748+
dimensions=VAR_DESCR['type_cyc']['nc_dims']),
749+
VAR_DESCR['type_cyc']['nc_attr'],
750+
self.sign_type)
751+
# Global attr
752+
self.set_global_attr_netcdf(h_nc)
753+
754+
def set_global_attr_netcdf(self, h_nc):
755+
pass
756+
714757

715758
class VirtualEddiesObservations(EddiesObservations):
716759
"""Class to work with virtual obs
@@ -774,46 +817,6 @@ def elements(self):
774817
elements.extend(['track', 'n', 'virtual'])
775818
return elements
776819

777-
def write_netcdf(self, path='./', filename='%(path)s/%(sign_type)s.nc'):
778-
"""Write a netcdf with eddy obs
779-
"""
780-
eddy_size = len(self.observations)
781-
sign_type = 'Cyclonic' if self.sign_type == -1 else 'Anticyclonic'
782-
filename = filename % dict(path=path, sign_type=sign_type, prod_time=datetime.now().strftime('%Y%m%d'))
783-
logging.info('Store in %s', filename)
784-
with Dataset(filename, 'w', format='NETCDF4') as h_nc:
785-
logging.info('Create file %s', filename)
786-
# Create dimensions
787-
logging.debug('Create Dimensions "Nobs" : %d', eddy_size)
788-
h_nc.createDimension('Nobs', eddy_size)
789-
if self.track_array_variables != 0:
790-
h_nc.createDimension('NbSample', self.track_array_variables)
791-
# Iter on variables to create:
792-
for field in self.observations.dtype.descr:
793-
name = field[0]
794-
logging.debug('Create Variable %s', VAR_DESCR[name]['nc_name'])
795-
self.create_variable(
796-
h_nc,
797-
dict(varname=VAR_DESCR[name]['nc_name'],
798-
datatype=VAR_DESCR[name]['output_type'],
799-
dimensions=VAR_DESCR[name]['nc_dims']),
800-
VAR_DESCR[name]['nc_attr'],
801-
self.observations[name],
802-
scale_factor=VAR_DESCR[name].get('scale_factor', None),
803-
add_offset=VAR_DESCR[name].get('add_offset', None)
804-
)
805-
806-
# Add cyclonic information
807-
self.create_variable(
808-
h_nc,
809-
dict(varname=VAR_DESCR['type_cyc']['nc_name'],
810-
datatype=VAR_DESCR['type_cyc']['nc_type'],
811-
dimensions=VAR_DESCR['type_cyc']['nc_dims']),
812-
VAR_DESCR['type_cyc']['nc_attr'],
813-
self.sign_type)
814-
# Global attr
815-
self.set_global_attr_netcdf(h_nc)
816-
817820
def set_global_attr_netcdf(self, h_nc):
818821
"""Set global attr
819822
"""

src/py_eddy_tracker/tracking.py

Lines changed: 117 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from matplotlib.dates import julian2num, num2date
3131

3232
from py_eddy_tracker.observations import EddiesObservations, VirtualEddiesObservations, TrackEddiesObservations
33-
from numpy import bool_, array, arange, ones, setdiff1d, zeros, uint16, where, empty, isin
33+
from numpy import bool_, array, arange, ones, setdiff1d, zeros, uint16, where, empty, isin, unique, concatenate
3434
from netCDF4 import Dataset
3535
import logging
3636

@@ -96,6 +96,21 @@ def __init__(self, datasets, virtual=0, class_method=None, previous_correspondan
9696
self.nb_obs = 0
9797
self.eddies = None
9898

99+
def _copy(self):
100+
new = self.__class__(
101+
datasets=self.datasets,
102+
virtual=self.nb_virtual,
103+
class_method=self.class_method,
104+
previous_correspondance=self.filename_previous_correspondance)
105+
for i in self:
106+
new.append(i)
107+
new.current_id = self.current_id
108+
new.nb_link_max = self.nb_link_max
109+
new.nb_obs = self.nb_obs
110+
new.prepare_merging()
111+
logging.debug('Copy done')
112+
return new
113+
99114
def reset_dataset_cache(self):
100115
self.previous2_obs = None
101116
self.previous_obs = None
@@ -317,7 +332,7 @@ def track(self):
317332
# We begin with second file, first one is in previous
318333
for file_name in self.datasets[first_dataset:]:
319334
self.swap_dataset(file_name)
320-
logging.debug('%s match with previous state', file_name)
335+
logging.info('%s match with previous state', file_name)
321336
logging.debug('%d obs to match', len(self.current_obs))
322337

323338
nb_real_obs = len(self.previous_obs)
@@ -391,6 +406,7 @@ def save(self, filename, dict_completion=None):
391406
self.previous_virtual_obs.to_netcdf(group)
392407
h_nc.module = self.class_method.__module__
393408
h_nc.classname = self.class_method.__qualname__
409+
logging.info('Create correspondance file done')
394410

395411
def load_compatible(self, filename):
396412
if filename is None:
@@ -456,7 +472,45 @@ def prepare_merging(self):
456472
logging.info('%d tracks identified', self.current_id)
457473
logging.info('%d observations will be join', self.nb_obs)
458474

459-
def merge(self, until=-1, size_min=None):
475+
def longer_than(self, size_min):
476+
"""Remove from correspondance table all association for shorter eddies than size_min
477+
"""
478+
# Identify eddies longer than
479+
i_keep_track = where(self.nb_obs_by_tracks >= size_min)[0]
480+
# Reduce array
481+
self.nb_obs_by_tracks = self.nb_obs_by_tracks[i_keep_track]
482+
self.i_current_by_tracks = self.nb_obs_by_tracks.cumsum() - self.nb_obs_by_tracks
483+
self.nb_obs = self.nb_obs_by_tracks.sum()
484+
# Give the last id used
485+
self.current_id = self.nb_obs_by_tracks.shape[0]
486+
translate = empty(i_keep_track.max() + 1, dtype='u4')
487+
translate[i_keep_track] = arange(self.current_id)
488+
for i, correspondance in enumerate(self):
489+
m_keep = isin(correspondance['id'], i_keep_track)
490+
self[i] = correspondance[m_keep]
491+
self[i]['id'] = translate[self[i]['id']]
492+
logging.debug('Select longer than %d done', size_min)
493+
494+
def shorter_than(self, size_max):
495+
"""Remove from correspondance table all association for longer eddies than size_max
496+
"""
497+
# Identify eddies longer than
498+
i_keep_track = where(self.nb_obs_by_tracks < size_max)[0]
499+
# Reduce array
500+
self.nb_obs_by_tracks = self.nb_obs_by_tracks[i_keep_track]
501+
self.i_current_by_tracks = self.nb_obs_by_tracks.cumsum() - self.nb_obs_by_tracks
502+
self.nb_obs = self.nb_obs_by_tracks.sum()
503+
# Give the last id used
504+
self.current_id = self.nb_obs_by_tracks.shape[0]
505+
translate = empty(i_keep_track.max() + 1, dtype='u4')
506+
translate[i_keep_track] = arange(self.current_id)
507+
for i, correspondance in enumerate(self):
508+
m_keep = isin(correspondance['id'], i_keep_track)
509+
self[i] = correspondance[m_keep]
510+
self[i]['id'] = translate[self[i]['id']]
511+
logging.debug('Select shorter than %d done', size_max)
512+
513+
def merge(self, until=-1):
460514
"""Merge all the correspondance in one array with all fields
461515
"""
462516
# Start loading identification again to save in the finals tracks
@@ -466,14 +520,6 @@ def merge(self, until=-1, size_min=None):
466520

467521
# Start create netcdf to agglomerate all eddy
468522
logging.debug('We will create an array (size %d)', self.nb_obs)
469-
i_keep_track = slice(None)
470-
if size_min is not None:
471-
i_keep_track = where(self.nb_obs_by_tracks >= size_min)
472-
self.nb_obs_by_tracks = self.nb_obs_by_tracks[i_keep_track]
473-
self.i_current_by_tracks[i_keep_track] = self.nb_obs_by_tracks.cumsum() - self.nb_obs_by_tracks
474-
self.nb_obs = self.nb_obs_by_tracks.sum()
475-
# ??
476-
self.current_id = self.nb_obs_by_tracks.shape[0]
477523
eddies = TrackEddiesObservations(
478524
size=self.nb_obs,
479525
track_extra_variables=self.current_obs.track_extra_variables,
@@ -484,11 +530,9 @@ def merge(self, until=-1, size_min=None):
484530
# in u2 (which are limited to 65535)
485531
logging.debug('Compute global index array (N)')
486532
eddies['n'][:] = uint16(
487-
arange(self.nb_obs, dtype='u4') - self.i_current_by_tracks[i_keep_track].repeat(self.nb_obs_by_tracks))
533+
arange(self.nb_obs, dtype='u4') - self.i_current_by_tracks.repeat(self.nb_obs_by_tracks))
488534
logging.debug('Compute global track array')
489535
eddies['track'][:] = arange(self.current_id).repeat(self.nb_obs_by_tracks)
490-
if size_min is not None:
491-
eddies['track'][:] += 1
492536

493537
# Set type of eddy with first file
494538
eddies.sign_type = self.current_obs.sign_type
@@ -506,19 +550,15 @@ def merge(self, until=-1, size_min=None):
506550
self.swap_dataset(file_name)
507551
# We select the list of id which are involve in the correspondance
508552
i_id = self[i]['id']
509-
if size_min is not None:
510-
m_id = isin(i_id, i_keep_track)
511-
i_id= i_id[m_id]
512-
else:
513-
m_id = slice(None)
514-
# Index where we will write in the final object
553+
# Index where we will write in the final object
554+
print(i_id.max())
515555
index_final = self.i_current_by_tracks[i_id]
516556

517557
# First obs of eddies
518558
m_first_obs = ~first_obs_save_in_tracks[i_id]
519559
if m_first_obs.any():
520560
# Index in the previous file
521-
index_in = self[i]['in'][m_id][m_first_obs]
561+
index_in = self[i]['in'][m_first_obs]
522562
# Copy all variable
523563
for field in fields:
524564
var = field[0]
@@ -532,15 +572,15 @@ def merge(self, until=-1, size_min=None):
532572
if self.virtual:
533573
# If the flag virtual in correspondance is active,
534574
# the previous is virtual
535-
m_virtual = self[i]['virtual'][m_id]
575+
m_virtual = self[i]['virtual']
536576
if m_virtual.any():
537577
# Incrementing index
538-
self.i_current_by_tracks[i_id[m_virtual]] += self[i]['virtual_length'][m_id][m_virtual]
578+
self.i_current_by_tracks[i_id[m_virtual]] += self[i]['virtual_length'][m_virtual]
539579
# Get new index
540580
index_final = self.i_current_by_tracks[i_id]
541581

542582
# Index in the current file
543-
index_current = self[i]['out'][m_id]
583+
index_current = self[i]['out']
544584

545585
# Copy all variable
546586
for field in fields:
@@ -551,3 +591,56 @@ def merge(self, until=-1, size_min=None):
551591
self.i_current_by_tracks[i_id] += 1
552592
self.previous_obs = self.current_obs
553593
return eddies
594+
595+
def get_unused_data(self):
596+
"""
597+
Add in track object all the observations which aren't selected
598+
Returns: Unused Eddies
599+
600+
"""
601+
self.reset_dataset_cache()
602+
self.swap_dataset(self.datasets[0])
603+
604+
nb_dataset = len(self.datasets)
605+
# Get the number of obs unused
606+
nb_obs = 0
607+
list_mask= list()
608+
has_virtual = 'virtual' in self[0].dtype.names
609+
for i, filename in enumerate(self.datasets):
610+
last_dataset = i == (nb_dataset - 1)
611+
first_dataset = i == 0
612+
if has_virtual:
613+
if not last_dataset:
614+
m_in = ~self[i]['virtual']
615+
if not first_dataset:
616+
m_out = ~self[i - 1]['virtual']
617+
else:
618+
m_in, m_out = slice(None), slice(None)
619+
if i == 0:
620+
eddies_used = self[i]['in'][m_in]
621+
elif last_dataset:
622+
eddies_used = self[i - 1]['out'][m_out]
623+
else:
624+
eddies_used = unique(concatenate((self[i - 1]['out'][m_out], self[i]['in'][m_in])))
625+
with Dataset(filename) as h:
626+
nb_obs_day = len(h.dimensions['Nobs'])
627+
m = ones(nb_obs_day, dtype='bool')
628+
m[eddies_used] = False
629+
list_mask.append(m)
630+
nb_obs += m.sum()
631+
632+
eddies = EddiesObservations(
633+
size=nb_obs,
634+
track_extra_variables=self.current_obs.track_extra_variables,
635+
track_array_variables=self.current_obs.track_array_variables,
636+
array_variables=self.current_obs.array_variables)
637+
j = 0
638+
for i, dataset in enumerate(self.datasets):
639+
current_obs = self.class_method.load_from_netcdf(dataset)
640+
if i ==0:
641+
eddies.sign_type = current_obs.sign_type
642+
unused_obs = current_obs.observations[list_mask[i]]
643+
nb = unused_obs.shape[0]
644+
eddies.observations[j:j + nb] = unused_obs
645+
j += nb
646+
return eddies

src/py_eddy_tracker/tracking_objects.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,8 @@ def set_global_attr_netcdf(self, h_nc):
162162
h_nc.llcrnrlat = self.grd.latmin
163163
h_nc.urcrnrlat = self.grd.latmax
164164

165-
def create_variable(self, handler_nc, kwargs_variable,
165+
@staticmethod
166+
def create_variable(handler_nc, kwargs_variable,
166167
attr_variable, data, scale_factor=None, add_offset=None):
167168
var = handler_nc.createVariable(
168169
zlib=True,

0 commit comments

Comments
 (0)