|
| 1 | +import warnings |
1 | 2 | from collections import defaultdict
|
| 3 | +from datetime import datetime as dt |
2 | 4 |
|
3 | 5 | import numpy as np
|
4 | 6 | import xarray as xr
|
| 7 | +from _echopype_version import version as ECHOPYPE_VERSION |
5 | 8 |
|
6 | 9 | from .set_groups_base import DEFAULT_CHUNK_SIZE, SetGroupsBase, set_encodings
|
7 | 10 |
|
8 | 11 |
|
9 | 12 | class SetGroupsEK60(SetGroupsBase):
|
10 | 13 | """Class for saving groups to netcdf or zarr from EK60 data files."""
|
11 | 14 |
|
| 15 | + def __init__(self, *args, **kwargs): |
| 16 | + super().__init__(*args, **kwargs) |
| 17 | + |
| 18 | + self.old_ping_time = None |
| 19 | + # correct duplicate ping_time |
| 20 | + for ch in self.parser_obj.config_datagram["transceivers"].keys(): |
| 21 | + ping_time = self.parser_obj.ping_time[ch] |
| 22 | + _, unique_idx = np.unique(ping_time, return_index=True) |
| 23 | + duplicates = np.invert(np.isin(np.arange(len(ping_time)), unique_idx)) |
| 24 | + if duplicates.any(): |
| 25 | + if self.old_ping_time is None: |
| 26 | + if ( |
| 27 | + len({arr.shape for arr in self.parser_obj.ping_time.values()}) |
| 28 | + == 1 |
| 29 | + and np.unique( |
| 30 | + np.stack(self.parser_obj.ping_time.values()), axis=0 |
| 31 | + ).shape[0] |
| 32 | + == 1 |
| 33 | + ): |
| 34 | + self.old_ping_time = self.parser_obj.ping_time[ch] |
| 35 | + else: |
| 36 | + ping_times = [ |
| 37 | + xr.DataArray(arr, dims="ping_time") |
| 38 | + for arr in self.parser_obj.ping_time.values() |
| 39 | + ] |
| 40 | + self.old_ping_time = xr.concat(ping_times, dim="ping_time") |
| 41 | + |
| 42 | + backscatter_r = self.parser_obj.ping_data_dict["power"][ch] |
| 43 | + # indexes of duplicates including the originals |
| 44 | + # (if there are 2 times that are the same, both will be included) |
| 45 | + (all_duplicates_idx,) = np.where( |
| 46 | + np.isin(ping_time, ping_time[duplicates][0]) |
| 47 | + ) |
| 48 | + if np.array_equal( |
| 49 | + backscatter_r[all_duplicates_idx[0]], |
| 50 | + backscatter_r[all_duplicates_idx[1]], |
| 51 | + ): |
| 52 | + warnings.warn( |
| 53 | + "duplicate pings with identical values detected; the duplicate pings will be removed" # noqa |
| 54 | + ) |
| 55 | + for v in self.parser_obj.ping_data_dict.values(): |
| 56 | + if v[ch] is None or len(v[ch]) == 0: |
| 57 | + continue |
| 58 | + if isinstance(v[ch], np.ndarray): |
| 59 | + v[ch] = v[ch][unique_idx] |
| 60 | + else: |
| 61 | + v[ch] = [v[ch][i] for i in unique_idx] |
| 62 | + self.parser_obj.ping_time[ch] = self.parser_obj.ping_time[ch][ |
| 63 | + unique_idx |
| 64 | + ] |
| 65 | + else: |
| 66 | + warnings.warn( |
| 67 | + "duplicate ping times detected; the duplicate times will be incremented by 1 nanosecond and remain in the ping_time coordinate. The original ping times will be preserved in the Provenance group" # noqa |
| 68 | + ) |
| 69 | + |
| 70 | + deltas = duplicates * np.timedelta64(1, "ns") |
| 71 | + new_ping_time = ping_time + deltas |
| 72 | + self.parser_obj.ping_time[ch] = new_ping_time |
| 73 | + |
| 74 | + def set_provenance(self) -> xr.Dataset: |
| 75 | + """Set the Provenance group.""" |
| 76 | + # Collect variables |
| 77 | + prov_dict = { |
| 78 | + "conversion_software_name": "echopype", |
| 79 | + "conversion_software_version": ECHOPYPE_VERSION, |
| 80 | + "conversion_time": dt.utcnow().isoformat(timespec="seconds") |
| 81 | + + "Z", # use UTC time |
| 82 | + "src_filenames": self.input_file, |
| 83 | + "duplicate_ping_times": 1 if self.old_ping_time is not None else 0, |
| 84 | + } |
| 85 | + # Save |
| 86 | + if self.old_ping_time is not None: |
| 87 | + ds = xr.Dataset(data_vars={"old_ping_time": self.old_ping_time}) |
| 88 | + else: |
| 89 | + ds = xr.Dataset() |
| 90 | + ds = ds.assign_attrs(prov_dict) |
| 91 | + return ds |
| 92 | + |
12 | 93 | def set_env(self) -> xr.Dataset:
|
13 | 94 | """Set the Environment group."""
|
14 | 95 | ch_ids = list(self.parser_obj.config_datagram["transceivers"].keys())
|
|
0 commit comments