Skip to content

Commit 9d7b956

Browse files
committed
Speed up correspondance I/O
1 parent f63f2f6 commit 9d7b956

File tree

3 files changed

+21
-12
lines changed

3 files changed

+21
-12
lines changed

src/py_eddy_tracker/observations.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ def load_from_netcdf(cls, filename):
367367
] = h_nc.variables[variable][:]
368368
eddies.sign_type = h_nc.variables['cyc'][0]
369369
if eddies.sign_type == 0:
370-
logging.info('File come from another algorithm')
370+
logging.debug('File come from another algorithm of identification')
371371
eddies.sign_type = -1
372372
return eddies
373373

src/py_eddy_tracker/tracking.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from matplotlib.dates import julian2num, num2date
3131

3232
from py_eddy_tracker.observations import EddiesObservations, VirtualEddiesObservations, TrackEddiesObservations
33-
from numpy import bool_, array, arange, ones, setdiff1d, zeros, uint16, where, empty, isin, unique, concatenate
33+
from numpy import bool_, array, arange, ones, setdiff1d, zeros, uint16, where, empty, isin, unique, concatenate, ma
3434
from netCDF4 import Dataset
3535
import logging
3636

@@ -377,6 +377,7 @@ def save(self, filename, dict_completion=None):
377377
zlib=True, complevel=1,
378378
varname='nb_link', datatype='u2', dimensions='Nstep')
379379

380+
datas = dict()
380381
for name, dtype in self.correspondance_dtype:
381382
if dtype is bool_:
382383
dtype = 'u1'
@@ -390,12 +391,18 @@ def save(self, filename, dict_completion=None):
390391
dimensions=('Nstep', 'Nlink'),
391392
**kwargs_cv
392393
)
394+
datas[name] = ma.empty((nb_step, self.nb_link_max),dtype=dtype)
395+
datas[name].mask = datas[name] == datas[name]
393396

394397
for i, correspondance in enumerate(self):
398+
logging.debug('correspondance %d', i)
395399
nb_elt = correspondance.shape[0]
396400
var_nb_link[i] = nb_elt
397401
for name, _ in self.correspondance_dtype:
398-
h_nc.variables[name][i, :nb_elt] = correspondance[name]
402+
datas[name][i, :nb_elt] = correspondance[name]
403+
for name, data in datas.items():
404+
h_nc.variables[name][:] = data
405+
399406
h_nc.virtual_use = str(self.virtual)
400407
h_nc.virtual_max_segment = self.nb_virtual
401408
h_nc.last_current_id = self.current_id
@@ -425,8 +432,10 @@ def load_compatible(self, filename):
425432
def load(cls, filename):
426433
logging.info('Try load %s', filename)
427434
with Dataset(filename, 'r', format='NETCDF4') as h_nc:
428-
datasets = list(h_nc.variables['FileIn'][:])
429-
datasets.append(h_nc.variables['FileOut'][-1])
435+
datas = {varname: data[:] for varname, data in h_nc.variables.items()}
436+
437+
datasets = list(datas['FileIn'])
438+
datasets.append(datas['FileOut'][-1])
430439

431440
if hasattr(h_nc, 'module'):
432441
class_method= getattr(__import__(h_nc.module, globals(), locals(), h_nc.classname), h_nc.classname)
@@ -435,19 +444,19 @@ def load(cls, filename):
435444
obj = cls(datasets, h_nc.virtual_max_segment, class_method=class_method)
436445

437446
id_max = 0
438-
for i, nb_elt in enumerate(h_nc.variables['nb_link'][:]):
447+
for i, nb_elt in enumerate(datas['nb_link'][:]):
439448
logging.debug(
440449
'Link between %s and %s',
441-
h_nc.variables['FileIn'][i],
442-
h_nc.variables['FileOut'][i])
443-
correspondance = array(h_nc.variables['in'][i, :nb_elt],
450+
datas['FileIn'][i],
451+
datas['FileOut'][i])
452+
correspondance = array(datas['in'][i, :nb_elt],
444453
dtype=obj.correspondance_dtype)
445454
for name, _ in obj.correspondance_dtype:
446455
if name == 'in':
447456
continue
448457
if name == 'virtual_length':
449458
correspondance[name] = 255
450-
correspondance[name] = h_nc.variables[name][i, :nb_elt]
459+
correspondance[name] = datas[name][i, :nb_elt]
451460
id_max = max(id_max, correspondance['id'].max())
452461
obj.append(correspondance)
453462
obj.current_id = id_max + 1
@@ -551,7 +560,6 @@ def merge(self, until=-1):
551560
# We select the list of id which are involve in the correspondance
552561
i_id = self[i]['id']
553562
# Index where we will write in the final object
554-
print(i_id.max())
555563
index_final = self.i_current_by_tracks[i_id]
556564

557565
# First obs of eddies

src/scripts/EddyFinalTracking

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ if __name__ == '__main__':
5151

5252
logging.info('The longest tracks have %d observations', CORRESPONDANCES.nb_obs_by_tracks.max())
5353
logging.info('The mean length is %d observations before filtering', CORRESPONDANCES.nb_obs_by_tracks.mean())
54-
FINAL_EDDIES = CORRESPONDANCES.merge(size_min=CONFIG.nb_obs_min)
54+
CORRESPONDANCES.longer_than(size_min=CONFIG.nb_obs_min)
55+
FINAL_EDDIES = CORRESPONDANCES.merge()
5556

5657
# We flag obs
5758
if CORRESPONDANCES.virtual:

0 commit comments

Comments
 (0)