Skip to content

Commit 33c96c3

Browse files
authored
Correct duplicate ping_times during EK60 conversion (fixes #235) (#433)
* Add fix for duplicate ping_time * Remove dead code * Increment by 1ns instead of 1ms * Add duplicate ping time warning and store in provenance * Autoformatter * Add test for duplicate ping times file * Add qc method to remove duplicate pings * Drop entire pings with duplicate values * Store entire original ping_time and add attribute when there are duplicate ping_times * Remove duplicate ping_time qc method, it will be added back in future PR * Change EchoData combine ping_time reversal attribute to netCDF encodable value * Remove unused import * Update tests * Clarify duplicate ping_time removal and drop warnings
1 parent 164344a commit 33c96c3

File tree

4 files changed

+94
-1
lines changed

4 files changed

+94
-1
lines changed

echopype/convert/set_groups_ek60.py

+81
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,95 @@
1+
import warnings
12
from collections import defaultdict
3+
from datetime import datetime as dt
24

35
import numpy as np
46
import xarray as xr
7+
from _echopype_version import version as ECHOPYPE_VERSION
58

69
from .set_groups_base import DEFAULT_CHUNK_SIZE, SetGroupsBase, set_encodings
710

811

912
class SetGroupsEK60(SetGroupsBase):
1013
"""Class for saving groups to netcdf or zarr from EK60 data files."""
1114

15+
def __init__(self, *args, **kwargs):
16+
super().__init__(*args, **kwargs)
17+
18+
self.old_ping_time = None
19+
# correct duplicate ping_time
20+
for ch in self.parser_obj.config_datagram["transceivers"].keys():
21+
ping_time = self.parser_obj.ping_time[ch]
22+
_, unique_idx = np.unique(ping_time, return_index=True)
23+
duplicates = np.invert(np.isin(np.arange(len(ping_time)), unique_idx))
24+
if duplicates.any():
25+
if self.old_ping_time is None:
26+
if (
27+
len({arr.shape for arr in self.parser_obj.ping_time.values()})
28+
== 1
29+
and np.unique(
30+
np.stack(self.parser_obj.ping_time.values()), axis=0
31+
).shape[0]
32+
== 1
33+
):
34+
self.old_ping_time = self.parser_obj.ping_time[ch]
35+
else:
36+
ping_times = [
37+
xr.DataArray(arr, dims="ping_time")
38+
for arr in self.parser_obj.ping_time.values()
39+
]
40+
self.old_ping_time = xr.concat(ping_times, dim="ping_time")
41+
42+
backscatter_r = self.parser_obj.ping_data_dict["power"][ch]
43+
# indexes of duplicates including the originals
44+
# (if there are 2 times that are the same, both will be included)
45+
(all_duplicates_idx,) = np.where(
46+
np.isin(ping_time, ping_time[duplicates][0])
47+
)
48+
if np.array_equal(
49+
backscatter_r[all_duplicates_idx[0]],
50+
backscatter_r[all_duplicates_idx[1]],
51+
):
52+
warnings.warn(
53+
"duplicate pings with identical values detected; the duplicate pings will be removed" # noqa
54+
)
55+
for v in self.parser_obj.ping_data_dict.values():
56+
if v[ch] is None or len(v[ch]) == 0:
57+
continue
58+
if isinstance(v[ch], np.ndarray):
59+
v[ch] = v[ch][unique_idx]
60+
else:
61+
v[ch] = [v[ch][i] for i in unique_idx]
62+
self.parser_obj.ping_time[ch] = self.parser_obj.ping_time[ch][
63+
unique_idx
64+
]
65+
else:
66+
warnings.warn(
67+
"duplicate ping times detected; the duplicate times will be incremented by 1 nanosecond and remain in the ping_time coordinate. The original ping times will be preserved in the Provenance group" # noqa
68+
)
69+
70+
deltas = duplicates * np.timedelta64(1, "ns")
71+
new_ping_time = ping_time + deltas
72+
self.parser_obj.ping_time[ch] = new_ping_time
73+
74+
def set_provenance(self) -> xr.Dataset:
75+
"""Set the Provenance group."""
76+
# Collect variables
77+
prov_dict = {
78+
"conversion_software_name": "echopype",
79+
"conversion_software_version": ECHOPYPE_VERSION,
80+
"conversion_time": dt.utcnow().isoformat(timespec="seconds")
81+
+ "Z", # use UTC time
82+
"src_filenames": self.input_file,
83+
"duplicate_ping_times": 1 if self.old_ping_time is not None else 0,
84+
}
85+
# Save
86+
if self.old_ping_time is not None:
87+
ds = xr.Dataset(data_vars={"old_ping_time": self.old_ping_time})
88+
else:
89+
ds = xr.Dataset()
90+
ds = ds.assign_attrs(prov_dict)
91+
return ds
92+
1293
def set_env(self) -> xr.Dataset:
1394
"""Set the Environment group."""
1495
ch_ids = list(self.parser_obj.config_datagram["transceivers"].keys())

echopype/echodata/combine.py

+3
Original file line numberDiff line numberDiff line change
@@ -251,12 +251,15 @@ def combine_echodata(echodatas: List[EchoData], combine_attrs="override") -> Ech
251251
# save ping time before reversal correction
252252
if old_ping_time is not None:
253253
result.provenance["old_ping_time"] = old_ping_time
254+
result.provenance.attrs["reversed_ping_times"] = 1
254255
# save location time before reversal correction
255256
if old_location_time is not None:
256257
result.provenance["old_location_time"] = old_location_time
258+
result.provenance.attrs["reversed_ping_times"] = 1
257259
# save mru time before reversal correction
258260
if old_mru_time is not None:
259261
result.provenance["old_mru_time"] = old_mru_time
262+
result.provenance.attrs["reversed_ping_times"] = 1
260263
# TODO: possible parameter to disable original attributes and original ping_time storage
261264
# in provenance group?
262265
# save attrs from before combination

echopype/tests/convert/test_convert_ek60.py

+9
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,12 @@ def test_convert_ek60_echoview_raw():
6666
echodata.beam.backscatter_r.isel(frequency=fidx, ping_time=slice(None, 10), range_bin=slice(1, None)),
6767
atol=9e-6, rtol=atol
6868
)
69+
70+
def test_convert_ek60_duplicate_ping_times():
71+
"""Convert a file with duplicate ping times"""
72+
73+
raw_path = ek60_path / "ooi" / "CE02SHBP-MJ01C-07-ZPLSCB101_OOI-D20191201-T000000.raw"
74+
ed = open_raw(raw_path, "EK60")
75+
76+
assert "duplicate_ping_times" in ed.provenance.attrs
77+
assert "old_ping_time" in ed.provenance

echopype/tests/echodata/test_echodata_combine.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def test_attr_storage():
150150
group_attrs = combined.provenance[f"{group}_attrs"]
151151
for i, ed in enumerate(eds):
152152
for attr, value in getattr(ed, group).attrs.items():
153-
assert group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()] == value
153+
assert str(group_attrs.isel(echodata_filename=i).sel({f"{group}_attr_key": attr}).data[()]) == str(value)
154154

155155
# check selection by echodata_filename
156156
for file in ek60_test_data:

0 commit comments

Comments
 (0)