Skip to content

Commit 5c05cc6

Browse files
committed
read netcdf file in memory
1 parent fead948 commit 5c05cc6

File tree

2 files changed

+36
-5
lines changed

2 files changed

+36
-5
lines changed
Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,27 @@
11
from os import path
2-
2+
import requests
3+
import io
4+
import tarfile
5+
import lzma
36

47
def get_path(name):
58
return path.join(path.dirname(__file__), name)
9+
10+
11+
def get_remote_sample(path):
12+
# url = (
13+
# f"https://github.com/AntSimi/py-eddy-tracker-sample-id/raw/master/{path}.tar.xz"
14+
# )
15+
# content = requests.get(url).content
16+
url = f"/home/toto/dev/py-eddy-tracker-sample-id/{path}.tar.xz"
17+
content = open(url, "rb").read()
18+
19+
# Tar module could manage lzma tar, but it will apply un compress for each extractfile
20+
tar = tarfile.open(mode="r", fileobj=io.BytesIO(lzma.decompress(content)))
21+
# tar = tarfile.open(mode="r:xz", fileobj=io.BytesIO(content))
22+
files_content = list()
23+
for item in tar:
24+
content = tar.extractfile(item)
25+
content.filename = item.name
26+
files_content.append(content)
27+
return files_content

src/py_eddy_tracker/observations/observation.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from pint import UnitRegistry
5959
from pint.errors import UndefinedUnitError
6060
from tokenize import TokenError
61+
from tarfile import ExFileObject
6162
from matplotlib.path import Path as BasePath
6263
from .. import VAR_DESCR, VAR_DESCR_inv
6364
from ..generic import (
@@ -431,8 +432,11 @@ def zarr_dimension(filename):
431432

432433
@classmethod
433434
def load_file(cls, filename, **kwargs):
434-
end = b".zarr" if isinstance(filename, bytes) else ".zarr"
435-
if filename.endswith(end):
435+
filename_ = (
436+
filename.filename if isinstance(filename, ExFileObject) else filename
437+
)
438+
end = b".zarr" if isinstance(filename_, bytes) else ".zarr"
439+
if filename_.endswith(end):
436440
return cls.load_from_zarr(filename, **kwargs)
437441
else:
438442
return cls.load_from_netcdf(filename, **kwargs)
@@ -539,9 +543,14 @@ def load_from_netcdf(
539543
cls, filename, raw_data=False, remove_vars=None, include_vars=None
540544
):
541545
array_dim = "NbSample"
542-
if not isinstance(filename, str):
546+
if isinstance(filename, bytes):
543547
filename = filename.astype(str)
544-
with Dataset(filename) as h_nc:
548+
if isinstance(filename, ExFileObject):
549+
filename.seek(0)
550+
args, kwargs = ("in-mem-file",), dict(memory=filename.read())
551+
else:
552+
args, kwargs = (filename,), dict()
553+
with Dataset(*args, **kwargs) as h_nc:
545554
var_list = list(h_nc.variables.keys())
546555
if include_vars is not None:
547556
var_list = [i for i in var_list if i in include_vars]

0 commit comments

Comments
 (0)