|  | 
| 3 | 3 | Applications on detection and tracking files | 
| 4 | 4 | """ | 
| 5 | 5 | import argparse | 
|  | 6 | +import logging | 
|  | 7 | +from datetime import datetime | 
|  | 8 | +from glob import glob | 
|  | 9 | +from os import mkdir | 
|  | 10 | +from os.path import basename, dirname, exists | 
|  | 11 | +from os.path import join as join_path | 
|  | 12 | +from re import compile as re_compile | 
| 6 | 13 | 
 | 
| 7 | 14 | from netCDF4 import Dataset | 
|  | 15 | +from numpy import bytes_, empty, unique | 
|  | 16 | +from yaml import safe_load | 
| 8 | 17 | 
 | 
| 9 | 18 | from .. import EddyParser | 
| 10 | 19 | from ..observations.observation import EddiesObservations | 
| 11 | 20 | from ..observations.tracking import TrackEddiesObservations | 
|  | 21 | +from ..tracking import Correspondances | 
|  | 22 | + | 
|  | 23 | +logger = logging.getLogger("pet") | 
| 12 | 24 | 
 | 
| 13 | 25 | 
 | 
| 14 | 26 | def eddies_add_circle(): | 
| @@ -41,24 +53,22 @@ def merge_eddies(): | 
| 41 | 53 |     parser.add_argument( | 
| 42 | 54 |         "--include_var", nargs="+", type=str, help="use only listed variable" | 
| 43 | 55 |     ) | 
|  | 56 | +    parser.memory_arg() | 
| 44 | 57 |     args = parser.parse_args() | 
| 45 | 58 | 
 | 
| 46 | 59 |     if args.include_var is None: | 
| 47 | 60 |         with Dataset(args.filename[0]) as h: | 
| 48 | 61 |             args.include_var = h.variables.keys() | 
| 49 | 62 | 
 | 
| 50 |  | -    obs = TrackEddiesObservations.load_file( | 
| 51 |  | -        args.filename[0], raw_data=True, include_vars=args.include_var | 
| 52 |  | -    ) | 
| 53 |  | -    if args.add_rotation_variable: | 
| 54 |  | -        obs = obs.add_rotation_type() | 
| 55 |  | -    for filename in args.filename[1:]: | 
| 56 |  | -        other = TrackEddiesObservations.load_file( | 
|  | 63 | +    obs = list() | 
|  | 64 | +    for filename in args.filename: | 
|  | 65 | +        e = TrackEddiesObservations.load_file( | 
| 57 | 66 |             filename, raw_data=True, include_vars=args.include_var | 
| 58 | 67 |         ) | 
| 59 | 68 |         if args.add_rotation_variable: | 
| 60 |  | -            other = other.add_rotation_type() | 
| 61 |  | -        obs = obs.merge(other) | 
|  | 69 | +            e = e.add_rotation_type() | 
|  | 70 | +        obs.append(e) | 
|  | 71 | +    obs = TrackEddiesObservations.concatenate(obs) | 
| 62 | 72 |     obs.write_file(filename=args.out) | 
| 63 | 73 | 
 | 
| 64 | 74 | 
 | 
| @@ -141,3 +151,229 @@ def display_infos(): | 
| 141 | 151 |             ) | 
| 142 | 152 |             e = e.extract_with_area(area) | 
| 143 | 153 |         print(e) | 
|  | 154 | + | 
|  | 155 | + | 
|  | 156 | +def eddies_tracking(): | 
|  | 157 | +    parser = EddyParser("Tool to use identification step to compute tracking") | 
|  | 158 | +    parser.add_argument("yaml_file", help="Yaml file to configure py-eddy-tracker") | 
|  | 159 | +    parser.add_argument("--correspondance_in", help="Filename of saved correspondance") | 
|  | 160 | +    parser.add_argument("--correspondance_out", help="Filename to save correspondance") | 
|  | 161 | +    parser.add_argument( | 
|  | 162 | +        "--save_correspondance_and_stop", | 
|  | 163 | +        action="store_true", | 
|  | 164 | +        help="Stop tracking after correspondance computation," | 
|  | 165 | +        " merging can be done with EddyFinalTracking", | 
|  | 166 | +    ) | 
|  | 167 | +    parser.add_argument( | 
|  | 168 | +        "--zarr", action="store_true", help="Output will be wrote in zarr" | 
|  | 169 | +    ) | 
|  | 170 | +    parser.add_argument("--unraw", action="store_true", help="Load unraw data") | 
|  | 171 | +    parser.add_argument( | 
|  | 172 | +        "--blank_period", | 
|  | 173 | +        type=int, | 
|  | 174 | +        default=0, | 
|  | 175 | +        help="Nb of detection which will not use at the end of the period", | 
|  | 176 | +    ) | 
|  | 177 | +    parser.memory_arg() | 
|  | 178 | +    args = parser.parse_args() | 
|  | 179 | + | 
|  | 180 | +    # Read yaml configuration file | 
|  | 181 | +    with open(args.yaml_file, "r") as stream: | 
|  | 182 | +        config = safe_load(stream) | 
|  | 183 | + | 
|  | 184 | +    if "CLASS" in config: | 
|  | 185 | +        classname = config["CLASS"]["CLASS"] | 
|  | 186 | +        obs_class = dict( | 
|  | 187 | +            class_method=getattr( | 
|  | 188 | +                __import__(config["CLASS"]["MODULE"], globals(), locals(), classname), | 
|  | 189 | +                classname, | 
|  | 190 | +            ), | 
|  | 191 | +            class_kw=config["CLASS"].get("OPTIONS", dict()), | 
|  | 192 | +        ) | 
|  | 193 | +    else: | 
|  | 194 | +        obs_class = dict() | 
|  | 195 | + | 
|  | 196 | +    c_in, c_out = args.correspondance_in, args.correspondance_out | 
|  | 197 | +    if c_in is None: | 
|  | 198 | +        c_in = config["PATHS"].get("CORRESPONDANCES_IN", None) | 
|  | 199 | +    y_c_out = config["PATHS"].get( | 
|  | 200 | +        "CORRESPONDANCES_OUT", "{path}/{sign_type}_correspondances.nc" | 
|  | 201 | +    ) | 
|  | 202 | +    if c_out is None: | 
|  | 203 | +        c_out = y_c_out | 
|  | 204 | + | 
|  | 205 | +    # Create ouput folder if necessary | 
|  | 206 | +    save_dir = config["PATHS"].get("SAVE_DIR", None) | 
|  | 207 | +    if save_dir is not None and not exists(save_dir): | 
|  | 208 | +        mkdir(save_dir) | 
|  | 209 | + | 
|  | 210 | +    track( | 
|  | 211 | +        pattern=config["PATHS"]["FILES_PATTERN"], | 
|  | 212 | +        output_dir=save_dir, | 
|  | 213 | +        c_out=c_out, | 
|  | 214 | +        **obs_class, | 
|  | 215 | +        virtual=int(config.get("VIRTUAL_LENGTH_MAX", 0)), | 
|  | 216 | +        previous_correspondance=c_in, | 
|  | 217 | +        memory=args.memory, | 
|  | 218 | +        correspondances_only=args.save_correspondance_and_stop, | 
|  | 219 | +        raw=not args.unraw, | 
|  | 220 | +        zarr=args.zarr, | 
|  | 221 | +        nb_obs_min=int(config.get("TRACK_DURATION_MIN", 10)), | 
|  | 222 | +        blank_period=args.blank_period, | 
|  | 223 | +    ) | 
|  | 224 | + | 
|  | 225 | + | 
|  | 226 | +def browse_dataset_in( | 
|  | 227 | +    data_dir, | 
|  | 228 | +    files_model, | 
|  | 229 | +    date_regexp, | 
|  | 230 | +    date_model, | 
|  | 231 | +    start_date=None, | 
|  | 232 | +    end_date=None, | 
|  | 233 | +    sub_sampling_step=1, | 
|  | 234 | +    files=None, | 
|  | 235 | +): | 
|  | 236 | +    pattern_regexp = re_compile(".*/" + date_regexp) | 
|  | 237 | +    if files is not None: | 
|  | 238 | +        filenames = bytes_(files) | 
|  | 239 | +    else: | 
|  | 240 | +        full_path = join_path(data_dir, files_model) | 
|  | 241 | +        logger.info("Search files : %s", full_path) | 
|  | 242 | +        filenames = bytes_(glob(full_path)) | 
|  | 243 | + | 
|  | 244 | +    dataset_list = empty( | 
|  | 245 | +        len(filenames), | 
|  | 246 | +        dtype=[ | 
|  | 247 | +            ("filename", "S500"), | 
|  | 248 | +            ("date", "datetime64[D]"), | 
|  | 249 | +        ], | 
|  | 250 | +    ) | 
|  | 251 | +    dataset_list["filename"] = filenames | 
|  | 252 | + | 
|  | 253 | +    logger.info("%s grids available", dataset_list.shape[0]) | 
|  | 254 | +    mode_attrs = False | 
|  | 255 | +    if "(" not in date_regexp: | 
|  | 256 | +        logger.debug("Attrs date : %s", date_regexp) | 
|  | 257 | +        mode_attrs = date_regexp.strip().split(":") | 
|  | 258 | +    else: | 
|  | 259 | +        logger.debug("Pattern date : %s", date_regexp) | 
|  | 260 | + | 
|  | 261 | +    for item in dataset_list: | 
|  | 262 | +        str_date = None | 
|  | 263 | +        if mode_attrs: | 
|  | 264 | +            with Dataset(item["filename"].decode("utf-8")) as h: | 
|  | 265 | +                if len(mode_attrs) == 1: | 
|  | 266 | +                    str_date = getattr(h, mode_attrs[0]) | 
|  | 267 | +                else: | 
|  | 268 | +                    str_date = getattr(h.variables[mode_attrs[0]], mode_attrs[1]) | 
|  | 269 | +        else: | 
|  | 270 | +            result = pattern_regexp.match(str(item["filename"])) | 
|  | 271 | +            if result: | 
|  | 272 | +                str_date = result.groups()[0] | 
|  | 273 | + | 
|  | 274 | +        if str_date is not None: | 
|  | 275 | +            item["date"] = datetime.strptime(str_date, date_model).date() | 
|  | 276 | + | 
|  | 277 | +    dataset_list.sort(order=["date", "filename"]) | 
|  | 278 | + | 
|  | 279 | +    steps = unique(dataset_list["date"][1:] - dataset_list["date"][:-1]) | 
|  | 280 | +    if len(steps) > 1: | 
|  | 281 | +        raise Exception("Several days steps in grid dataset %s" % steps) | 
|  | 282 | + | 
|  | 283 | +    if sub_sampling_step != 1: | 
|  | 284 | +        logger.info("Grid subsampling %d", sub_sampling_step) | 
|  | 285 | +        dataset_list = dataset_list[::sub_sampling_step] | 
|  | 286 | + | 
|  | 287 | +    if start_date is not None or end_date is not None: | 
|  | 288 | +        logger.info( | 
|  | 289 | +            "Available grid from %s to %s", | 
|  | 290 | +            dataset_list[0]["date"], | 
|  | 291 | +            dataset_list[-1]["date"], | 
|  | 292 | +        ) | 
|  | 293 | +        logger.info("Filtering grid by time %s, %s", start_date, end_date) | 
|  | 294 | +        mask = (dataset_list["date"] >= start_date) * (dataset_list["date"] <= end_date) | 
|  | 295 | + | 
|  | 296 | +        dataset_list = dataset_list[mask] | 
|  | 297 | +    return dataset_list | 
|  | 298 | + | 
|  | 299 | + | 
|  | 300 | +def track( | 
|  | 301 | +    pattern, | 
|  | 302 | +    output_dir, | 
|  | 303 | +    c_out, | 
|  | 304 | +    nb_obs_min=10, | 
|  | 305 | +    raw=True, | 
|  | 306 | +    zarr=False, | 
|  | 307 | +    blank_period=0, | 
|  | 308 | +    correspondances_only=False, | 
|  | 309 | +    **kw_c, | 
|  | 310 | +): | 
|  | 311 | +    kw = dict(date_regexp=".*_([0-9]*?).[nz].*", date_model="%Y%m%d") | 
|  | 312 | +    if isinstance(pattern, list): | 
|  | 313 | +        kw.update(dict(data_dir=None, files_model=None, files=pattern)) | 
|  | 314 | +    else: | 
|  | 315 | +        kw.update(dict(data_dir=dirname(pattern), files_model=basename(pattern))) | 
|  | 316 | +    datasets = browse_dataset_in(**kw) | 
|  | 317 | +    if blank_period > 0: | 
|  | 318 | +        datasets = datasets[:-blank_period] | 
|  | 319 | +        logger.info("Last %d files will be pop", blank_period) | 
|  | 320 | + | 
|  | 321 | +    if nb_obs_min > len(datasets): | 
|  | 322 | +        raise Exception( | 
|  | 323 | +            "Input file number (%s) is shorter than TRACK_DURATION_MIN (%s)." | 
|  | 324 | +            % (len(datasets), nb_obs_min) | 
|  | 325 | +        ) | 
|  | 326 | + | 
|  | 327 | +    c = Correspondances(datasets=datasets["filename"], **kw_c) | 
|  | 328 | +    c.track() | 
|  | 329 | +    logger.info("Track finish") | 
|  | 330 | +    t0, t1 = c.period | 
|  | 331 | +    kw_save = dict( | 
|  | 332 | +        date_start=t0, | 
|  | 333 | +        date_stop=t1, | 
|  | 334 | +        date_prod=datetime.now(), | 
|  | 335 | +        path=output_dir, | 
|  | 336 | +        sign_type=c.current_obs.sign_legend, | 
|  | 337 | +    ) | 
|  | 338 | + | 
|  | 339 | +    c.save(c_out, kw_save) | 
|  | 340 | +    if correspondances_only: | 
|  | 341 | +        return | 
|  | 342 | + | 
|  | 343 | +    logger.info("Start merging") | 
|  | 344 | +    c.prepare_merging() | 
|  | 345 | +    logger.info("Longer track saved have %d obs", c.nb_obs_by_tracks.max()) | 
|  | 346 | +    logger.info( | 
|  | 347 | +        "The mean length is %d observations for all tracks", c.nb_obs_by_tracks.mean() | 
|  | 348 | +    ) | 
|  | 349 | + | 
|  | 350 | +    kw_write = dict(path=output_dir, zarr_flag=zarr) | 
|  | 351 | + | 
|  | 352 | +    c.get_unused_data(raw_data=raw).write_file( | 
|  | 353 | +        filename="%(path)s/%(sign_type)s_untracked.nc", **kw_write | 
|  | 354 | +    ) | 
|  | 355 | + | 
|  | 356 | +    short_c = c._copy() | 
|  | 357 | +    short_c.shorter_than(size_max=nb_obs_min) | 
|  | 358 | +    c.longer_than(size_min=nb_obs_min) | 
|  | 359 | + | 
|  | 360 | +    long_track = c.merge(raw_data=raw) | 
|  | 361 | +    short_track = short_c.merge(raw_data=raw) | 
|  | 362 | + | 
|  | 363 | +    # We flag obs | 
|  | 364 | +    if c.virtual: | 
|  | 365 | +        long_track["virtual"][:] = long_track["time"] == 0 | 
|  | 366 | +        long_track.filled_by_interpolation(long_track["virtual"] == 1) | 
|  | 367 | +        short_track["virtual"][:] = short_track["time"] == 0 | 
|  | 368 | +        short_track.filled_by_interpolation(short_track["virtual"] == 1) | 
|  | 369 | + | 
|  | 370 | +    logger.info("Longer track saved have %d obs", c.nb_obs_by_tracks.max()) | 
|  | 371 | +    logger.info( | 
|  | 372 | +        "The mean length is %d observations for long track", | 
|  | 373 | +        c.nb_obs_by_tracks.mean(), | 
|  | 374 | +    ) | 
|  | 375 | + | 
|  | 376 | +    long_track.write_file(**kw_write) | 
|  | 377 | +    short_track.write_file( | 
|  | 378 | +        filename="%(path)s/%(sign_type)s_track_too_short.nc", **kw_write | 
|  | 379 | +    ) | 
0 commit comments